chg: [DB Migration] UI: Extract + highlight leaks and trackers match, Data Retention save object first/last date, Refactor Tools

2025-01-18 08:26:15 +00:00 · 2022-12-19 16:38:20 +01:00 · 2022-12-19 16:38:20 +01:00 · bf71c9ba99
commit bf71c9ba99
parent f9715408be
26 changed files with 883 additions and 873 deletions
--- a/bin/DB_KVROCKS_MIGRATION.py
+++ b/bin/DB_KVROCKS_MIGRATION.py
@ -375,6 +375,7 @@ def items_migration():
    #         item = Items.Item(item_id)
    #         item.set_father(father_id)

+    # DUPLICATES
    for tag in ['infoleak:automatic-detection="credential"']:  # Creditcards, Mail, Keys ???????????????????????????????
        print(f'Duplicate migration: {tag}')
        tag_first = get_tag_first_seen(tag)
@ -389,6 +390,10 @@ def items_migration():
                            print(algo, duplicates_dict[id_2][algo], id_2)
                            item.add_duplicate(algo, duplicates_dict[id_2][algo], id_2)

+    # ITEM FIRST/LAST DATE
+    Items._manual_set_items_date_first_last()
+
+

 # TODO: test cookies migration
 # TODO: migrate auto crawlers
@ -840,14 +845,14 @@ if __name__ == '__main__':
    #core_migration()
    #user_migration()
    #tags_migration()
-    # items_migration()
+    items_migration()
    #crawler_migration()
    # domain_migration()                      # TO TEST ###########################
    #decodeds_migration()
    # screenshots_migration()
-    #subtypes_obj_migration()
+    subtypes_obj_migration()
    # ail_2_ail_migration()
-    trackers_migration()
+    # trackers_migration()
    # investigations_migration()
    # statistics_migration()

--- a/bin/Tools.py
+++ b/bin/Tools.py
@ -1,753 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-"""
-Tools Module
-============================
-
-Search tools outpout
-
-"""
-
-from Helper import Process
-from pubsublogger import publisher
-
-import os
-import re
-import sys
-import time
-import redis
-import signal
-
-sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
-import Item
-
-
-class TimeoutException(Exception):
-    pass
-
-def timeout_handler(signum, frame):
-    raise TimeoutException
-
-signal.signal(signal.SIGALRM, timeout_handler)
-
-
-def search_tools(item_id, item_content):
-
-    tools_in_item = False
-
-    for tools_name in tools_dict:
-        tool_dict = tools_dict[tools_name]
-
-        regex_match = False
-        for regex_nb in list(range(tool_dict['nb_regex'])):
-            regex_index = regex_nb + 1
-            regex = tool_dict['regex{}'.format(regex_index)]
-
-            signal.alarm(tool_dict['max_execution_time'])
-            try:
-                tools_found = re.findall(regex, item_content)
-            except TimeoutException:
-                tools_found = []
-                p.incr_module_timeout_statistic() # add encoder type
-                print ("{0} processing timeout".format(item_id))
-                continue
-            else:
-                signal.alarm(0)
-
-
-            if not tools_found:
-                regex_match = False
-                break
-            else:
-                regex_match = True
-                if 'tag{}'.format(regex_index) in tool_dict:
-                    print('{} found: {}'.format(item_id, tool_dict['tag{}'.format(regex_index)]))
-                    msg = '{};{}'.format(tool_dict['tag{}'.format(regex_index)], item_id)
-                    p.populate_set_out(msg, 'Tags')
-
-        if regex_match:
-            print('{} found: {}'.format(item_id, tool_dict['name']))
-            # Tag Item
-            msg = '{};{}'.format(tool_dict['tag'], item_id)
-            p.populate_set_out(msg, 'Tags')
-
-
-    if tools_in_item:
-        # send to duplicate module
-        p.populate_set_out(item_id, 'Duplicate')
-
-
-default_max_execution_time = 30
-
-tools_dict = {
-    'sqlmap': {
-        'name': 'sqlmap',
-        'regex1': r'Usage of sqlmap for attacking targets without|all tested parameters do not appear to be injectable|sqlmap identified the following injection point|Title:[^\n]*((error|time|boolean)-based|stacked queries|UNION query)',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="sqlmap-tool"', # tag if all regex match
-    },
-    'wig': {
-        'name': 'wig',
-        'regex1': r'(?s)wig - WebApp Information Gatherer.+?_{10,}',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="wig-tool"', # tag if all regex match
-    },
-    'dmytry': {
-        'name': 'dmitry',
-        'regex1': r'(?s)Gathered (TCP Port|Inet-whois|Netcraft|Subdomain|E-Mail) information for.+?-{10,}',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dmitry-tool"', # tag if all regex match
-    },
-    'inurlbr': {
-        'name': 'inurlbr',
-        'regex1': r'Usage of INURLBR for attacking targets without prior mutual consent is illegal',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="inurlbr-tool"', # tag if all regex match
-    },
-    'wafw00f': {
-        'name': 'wafw00f',
-        'regex1': r'(?s)WAFW00F - Web Application Firewall Detection Tool.+?Checking',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="wafw00f-tool"', # tag if all regex match
-    },
-    'sslyze': {
-        'name': 'sslyze',
-        'regex1': r'(?s)PluginSessionRenegotiation.+?SCAN RESULTS FOR',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="sslyze-tool"', # tag if all regex match
-    },
-    'nmap': {
-        'name': 'nmap',
-        'regex1': r'(?s)Nmap scan report for.+?Host is',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="nmap-tool"', # tag if all regex match
-    },
-    'dnsenum': {
-        'name': 'dnsenum',
-        'regex1': r'(?s)dnsenum(\.pl)? VERSION:.+?Trying Zone Transfer',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dnsenum-tool"', # tag if all regex match
-    },
-    'knock': {
-        'name': 'knock',
-        'regex1': r'I scannig with my internal wordlist',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="knock-tool"', # tag if all regex match
-    },
-    'nikto': {
-        'name': 'nikto',
-        'regex1': r'(?s)\+ Target IP:.+?\+ Start Time:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="nikto-tool"', # tag if all regex match
-    },
-    'dnscan': {
-        'name': 'dnscan',
-        'regex1': r'(?s)\[\*\] Processing domain.+?\[\+\] Getting nameservers.+?records found',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dnscan-tool"', # tag if all regex match
-    },
-    'dnsrecon': {
-        'name': 'dnsrecon',
-        'regex1': r'Performing General Enumeration of Domain:|Performing TLD Brute force Enumeration against',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dnsrecon-tool"', # tag if all regex match
-    },
-    'striker': {
-        'name': 'striker',
-        'regex1': r'Crawling the target for fuzzable URLs|Honeypot Probabilty:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="striker-tool"', # tag if all regex match
-    },
-    'rhawk': {
-        'name': 'rhawk',
-        'regex1': r'S U B - D O M A I N   F I N D E R',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="rhawk-tool"', # tag if all regex match
-    },
-    'uniscan': {
-        'name': 'uniscan',
-        'regex1': r'\| \[\+\] E-mail Found:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="uniscan-tool"', # tag if all regex match
-    },
-    'masscan': {
-        'name': 'masscan',
-        'regex1': r'(?s)Starting masscan [\d.]+.+?Scanning|bit.ly/14GZzcT',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="masscan-tool"', # tag if all regex match
-    },
-    'msfconsole': {
-        'name': 'msfconsole',
-        'regex1': r'=\[ metasploit v[\d.]+.+?msf >',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="msfconsole-tool"', # tag if all regex match
-    },
-    'amap': {
-        'name': 'amap',
-        'regex1': r'\bamap v[\d.]+ \(www.thc.org/thc-amap\)',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="amap-tool"', # tag if all regex match
-    },
-    'automater': {
-        'name': 'automater',
-        'regex1': r'(?s)\[\*\] Checking.+?_+ Results found for:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="automater-tool"', # tag if all regex match
-    },
-    'braa': {
-        'name': 'braa',
-        'regex1': r'\bbraa public@[\d.]+',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="braa-tool"', # tag if all regex match
-    },
-    'ciscotorch': {
-        'name': 'ciscotorch',
-        'regex1': r'Becase we need it',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="ciscotorch-tool"', # tag if all regex match
-    },
-    'theharvester': {
-        'name': 'theharvester',
-        'regex1': r'Starting harvesting process for domain:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="theharvester-tool"', # tag if all regex match
-    },
-    'sslstrip': {
-        'name': 'sslstrip',
-        'regex1': r'sslstrip [\d.]+ by Moxie Marlinspike running',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="sslstrip-tool"', # tag if all regex match
-    },
-    'sslcaudit': {
-        'name': 'sslcaudit',
-        'regex1': r'# filebag location:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="sslcaudit-tool"', # tag if all regex match
-    },
-    'smbmap': {
-        'name': 'smbmap',
-        'regex1': r'\[\+\] Finding open SMB ports\.\.\.',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="smbmap-tool"', # tag if all regex match
-    },
-    'reconng': {
-        'name': 'reconng',
-        'regex1': r'\[\*\] Status: unfixed|\[recon-ng\]\[default\]',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="reconng-tool"', # tag if all regex match
-    },
-    'p0f': {
-        'name': 'p0f',
-        'regex1': r'\bp0f [^ ]+ by Michal Zalewski',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="p0f-tool"', # tag if all regex match
-    },
-    'hping3': {
-        'name': 'hping3',
-        'regex1': r'\bHPING [^ ]+ \([^)]+\): [^ ]+ mode set',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="hping3-tool"', # tag if all regex match
-    },
-    'enum4linux': {
-        'name': 'enum4linux',
-        'regex1': r'Starting enum4linux v[\d.]+|\|    Target Information    \|',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="enum4linux-tool"', # tag if all regex match
-    },
-    'dnstracer': {
-        'name': 'dnstracer',
-        'regex1': r'(?s)Tracing to.+?DNS HEADER \(send\)',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dnstracer-tool"', # tag if all regex match
-    },
-    'dnmap': {
-        'name': 'dnmap',
-        'regex1': r'dnmap_(client|server)|Nmap output files stored in \'nmap_output\' directory',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dnmap-tool"', # tag if all regex match
-    },
-    'arpscan': {
-        'name': 'arpscan',
-        'regex1': r'Starting arp-scan [^ ]+ with \d+ hosts',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="arpscan-tool"', # tag if all regex match
-    },
-    'cdpsnarf': {
-        'name': 'cdpsnarf',
-        'regex1': r'(?s)CDPSnarf v[^ ]+.+?Waiting for a CDP packet\.\.\.',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="cdpsnarf-tool"', # tag if all regex match
-    },
-    'dnsmap': {
-        'name': 'dnsmap',
-        'regex1': r'DNS Network Mapper by pagvac',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dnsmap-tool"', # tag if all regex match
-    },
-    'dotdotpwn': {
-        'name': 'dotdotpwn',
-        'regex1': r'DotDotPwn v[^ ]+|dotdotpwn@sectester.net|\[\+\] Creating Traversal patterns',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dotdotpwn-tool"', # tag if all regex match
-    },
-    'searchsploit': {
-        'name': 'searchsploit',
-        'regex1': r'(exploits|shellcodes)/|searchsploit_rc|Exploit Title',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="searchsploit-tool"', # tag if all regex match
-    },
-    'fierce': {
-        'name': 'fierce',
-        'regex1': r'(?s)Trying zone transfer first.+Checking for wildcard DNS',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="fierce-tool"', # tag if all regex match
-    },
-    'firewalk': {
-        'name': 'firewalk',
-        'regex1': r'Firewalk state initialization completed successfully|Ramping phase source port',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="firewalk-tool"', # tag if all regex match
-    },
-    'fragroute': {
-        'name': 'fragroute',
-        'regex1': r'\bfragroute: tcp_seg -> ip_frag',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="fragroute-tool"', # tag if all regex match
-    },
-    'fragrouter': {
-        'name': 'fragrouter',
-        'regex1': r'fragrouter: frag-\d+:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="fragrouter-tool"', # tag if all regex match
-    },
-    'goofile': {
-        'name': 'goofile',
-        'regex1': r'code.google.com/p/goofile\b',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="goofile-tool"', # tag if all regex match
-    },
-    'intrace': {
-        'name': 'intrace',
-        'regex1': r'\bInTrace [\d.]+ \-\-',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="intrace-tool"', # tag if all regex match
-    },
-    'ismtp': {
-        'name': 'ismtp',
-        'regex1': r'Testing SMTP server \[user enumeration\]',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="ismtp-tool"', # tag if all regex match
-    },
-    'lbd': {
-        'name': 'lbd',
-        'regex1': r'Checking for (DNS|HTTP)-Loadbalancing',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="lbd-tool"', # tag if all regex match
-    },
-    'miranda': {
-        'name': 'miranda',
-        'regex1': r'Entering discovery mode for \'upnp:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="miranda-tool"', # tag if all regex match
-    },
-    'ncat': {
-        'name': 'ncat',
-        'regex1': r'nmap.org/ncat',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="ncat-tool"', # tag if all regex match
-    },
-    'ohrwurm': {
-        'name': 'ohrwurm',
-        'regex1': r'\bohrwurm-[\d.]+',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="ohrwurm-tool"', # tag if all regex match
-    },
-    'oscanner': {
-        'name': 'oscanner',
-        'regex1': r'Loading services/sids from service file',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="oscanner-tool"', # tag if all regex match
-    },
-    'sfuzz': {
-        'name': 'sfuzz',
-        'regex1': r'AREALLYBADSTRING|sfuzz/sfuzz',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="sfuzz-tool"', # tag if all regex match
-    },
-    'sidguess': {
-        'name': 'sidguess',
-        'regex1': r'SIDGuesser v[\d.]+',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="sidguess-tool"', # tag if all regex match
-    },
-    'sqlninja': {
-        'name': 'sqlninja',
-        'regex1': r'Sqlninja rel\. [\d.]+',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="sqlninja-tool"', # tag if all regex match
-    },
-    'sqlsus': {
-        'name': 'sqlsus',
-        'regex1': r'sqlsus version [\d.]+',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="sqlsus-tool"', # tag if all regex match
-    },
-    'dnsdict6': {
-        'name': 'dnsdict6',
-        'regex1': r'Starting DNS enumeration work on',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dnsdict6-tool"', # tag if all regex match
-    },
-    'unixprivesccheck': {
-        'name': 'unixprivesccheck',
-        'regex1': r'Recording Interface IP addresses',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="unixprivesccheck-tool"', # tag if all regex match
-    },
-    'yersinia': {
-        'name': 'yersinia',
-        'regex1': r'yersinia@yersinia.net',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="yersinia-tool"', # tag if all regex match
-    },
-    'armitage': {
-        'name': 'armitage',
-        'regex1': r'\[\*\] Starting msfrpcd for you',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="armitage-tool"', # tag if all regex match
-    },
-    'backdoorfactory': {
-        'name': 'backdoorfactory',
-        'regex1': r'\[\*\] In the backdoor module',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="backdoorfactory-tool"', # tag if all regex match
-    },
-    'beef': {
-        'name': 'beef',
-        'regex1': r'Please wait as BeEF services are started',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="beef-tool"', # tag if all regex match
-    },
-    'cat': {
-        'name': 'cat',
-        'regex1': r'Cisco Auditing Tool.+?g0ne',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="cat-tool"', # tag if all regex match
-    },
-    'cge': {
-        'name': 'cge',
-        'regex1': r'Vulnerability successful exploited with \[',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="cge-tool"', # tag if all regex match
-    },
-    'john': {
-        'name': 'john',
-        'regex1': r'John the Ripper password cracker, ver:|Loaded \d+ password hash \(',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="john-tool"', # tag if all regex match
-    },
-    'keimpx': {
-        'name': 'keimpx',
-        'regex1': r'\bkeimpx [\d.]+',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="keimpx-tool"', # tag if all regex match
-    },
-    'maskprocessor': {
-        'name': 'maskprocessor',
-        'regex1': r'mp by atom, High-Performance word generator',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="maskprocessor-tool"', # tag if all regex match
-    },
-    'ncrack': {
-        'name': 'ncrack',
-        'regex1': r'Starting Ncrack[^\n]+http://ncrack.org',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="ncrack-tool"', # tag if all regex match
-    },
-    'patator': {
-        'name': 'patator',
-        'regex1': r'http://code.google.com/p/patator/|Starting Patator v',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="patator-tool"', # tag if all regex match
-    },
-    'phrasendrescher': {
-        'name': 'phrasendrescher',
-        'regex1': r'phrasen\|drescher [\d.]+',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="phrasendrescher-tool"', # tag if all regex match
-    },
-    'polenum': {
-        'name': 'polenum',
-        'regex1': r'\[\+\] Password Complexity Flags:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="polenum-tool"', # tag if all regex match
-    },
-    'rainbowcrack': {
-        'name': 'rainbowcrack',
-        'regex1': r'Official Website: http://project-rainbowcrack.com/',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="rainbowcrack-tool"', # tag if all regex match
-    },
-    'rcracki_mt': {
-        'name': 'rcracki_mt',
-        'regex1': r'Found \d+ rainbowtable files\.\.\.',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="rcracki_mt-tool"', # tag if all regex match
-    },
-    'tcpdump': {
-        'name': 'tcpdump',
-        'regex1': r'tcpdump: listening on.+capture size \d+|\d+ packets received by filter',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="tcpdump-tool"', # tag if all regex match
-    },
-    'hydra': {
-        'name': 'hydra',
-        'regex1': r'Hydra \(http://www.thc.org/thc-hydra\)',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="hydra-tool"', # tag if all regex match
-    },
-    'netcat': {
-        'name': 'netcat',
-        'regex1': r'Listening on \[[\d.]+\] \(family',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="netcat-tool"', # tag if all regex match
-    },
-    'nslookup': {
-        'name': 'nslookup',
-        'regex1': r'Non-authoritative answer:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="nslookup-tool"', # tag if all regex match
-    },
-    'dig': {
-        'name': 'dig',
-        'regex1': r'; <<>> DiG [\d.]+',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dig-tool"', # tag if all regex match
-    },
-    'whois': {
-        'name': 'whois',
-        'regex1': r'(?i)Registrar WHOIS Server:|Registrar URL: http://|DNSSEC: unsigned|information on Whois status codes|REGISTERED, DELEGATED|[Rr]egistrar:|%[^\n]+(WHOIS|2016/679)',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="whois-tool"', # tag if all regex match
-    },
-    'nessus': {
-        'name': 'nessus',
-        'regex1': r'nessus_(report_(get|list|exploits)|scan_(new|status))|nessuscli|nessusd|nessus-service',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="nessus-tool"', # tag if all regex match
-    },
-    'openvas': {
-        'name': 'openvas',
-        'regex1': r'/openvas/',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="openvas-tool"', # tag if all regex match
-    },
-    'golismero': {
-        'name': 'golismero',
-        'regex1': r'GoLismero[\n]+The Web Knife',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="golismero-tool"', # tag if all regex match
-    },
-    'wpscan': {
-        'name': 'wpscan',
-        'regex1': r'WordPress Security Scanner by the WPScan Team|\[\+\] Interesting header:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="wpscan-tool"', # tag if all regex match
-    },
-    'skipfish': {
-        'name': 'skipfish',
-        'regex1': r'\[\+\] Sorting and annotating crawl nodes:|skipfish version [\d.]+',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="skipfish-tool"', # tag if all regex match
-    },
-    'arachni': {
-        'name': 'arachni',
-        'regex1': r'With the support of the community and the Arachni Team|\[\*\] Waiting for plugins to settle\.\.\.',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="arachni-tool"', # tag if all regex match
-    },
-    'dirb': {
-        'name': 'dirb',
-        'regex1': r'==> DIRECTORY:|\bDIRB v[\d.]+',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dirb-tool"', # tag if all regex match
-    },
-    'joomscan': {
-        'name': 'joomscan',
-        'regex1': r'OWASP Joomla! Vulnerability Scanner v[\d.]+',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="joomscan-tool"', # tag if all regex match
-    },
-    'jbossautopwn': {
-        'name': 'jbossautopwn',
-        'regex1': r'\[x\] Now creating BSH script\.\.\.|\[x\] Now deploying \.war file:',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="jbossautopwn-tool"', # tag if all regex match
-    },
-    'grabber': {
-        'name': 'grabber',
-        'regex1': r'runSpiderScan @',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="grabber-tool"', # tag if all regex match
-    },
-    'fimap': {
-        'name': 'fimap',
-        'regex1': r'Automatic LFI/RFI scanner and exploiter',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="fimap-tool"', # tag if all regex match
-    },
-    'dsxs': {
-        'name': 'dsxs',
-        'regex1': r'Damn Small XSS Scanner \(DSXS\)',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dsxs-tool"', # tag if all regex match
-    },
-    'dsss': {
-        'name': 'dsss',
-        'regex1': r'Damn Small SQLi Scanner \(DSSS\)',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dsss-tool"', # tag if all regex match
-    },
-    'dsjs': {
-        'name': 'dsjs',
-        'regex1': r'Damn Small JS Scanner \(DSJS\)',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dsjs-tool"', # tag if all regex match
-    },
-    'dsfs': {
-        'name': 'dsfs',
-        'regex1': r'Damn Small FI Scanner \(DSFS\)',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="dsfs-tool"', # tag if all regex match
-    },
-    'identywaf': {
-        'name': 'identywaf',
-        'regex1': r'\[o\] initializing handlers\.\.\.',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="identywaf-tool"', # tag if all regex match
-    },
-    'whatwaf': {
-        'name': 'whatwaf',
-        'regex1': r'<sCRIPT>ALeRt.+?WhatWaf\?',
-        'nb_regex': 1,
-        'max_execution_time': default_max_execution_time,
-        'tag': 'infoleak:automatic-detection="whatwaf-tool"', # tag if all regex match
-    }
-}
-
-if __name__ == "__main__":
-    publisher.port = 6380
-    publisher.channel = "Script"
-
-    config_section = 'Tools'
-    # # TODO: add duplicate
-
-    # Setup the I/O queues
-    p = Process(config_section)
-
-    # Sent to the logging a description of the module
-    publisher.info("Run Tools module ")
-
-    # Endless loop getting messages from the input queue
-    while True:
-        # Get one message from the input queue
-        item_id = p.get_from_set()
-        if item_id is None:
-            publisher.debug("{} queue is empty, waiting".format(config_section))
-            time.sleep(1)
-            continue
-
-        # Do something with the message from the queue
-        item_content = Item.get_item_content(item_id)
-        search_tools(item_id, item_content)
--- a/bin/lib/Tracker.py
+++ b/bin/lib/Tracker.py
@ -295,10 +295,10 @@ def get_item_all_trackers_uuid(obj_id):
    #obj_type = 'item'
    return r_serv_tracker.smembers(f'obj:trackers:item:{obj_id}')

-def is_obj_tracked(obj_type, subtype, id):
+def is_obj_tracked(obj_type, subtype, obj_id):
    return r_serv_tracker.exists(f'obj:trackers:{obj_type}:{obj_id}')

-def get_obj_all_trackers(obj_type, subtype, id):
+def get_obj_all_trackers(obj_type, subtype, obj_id):
    return r_serv_tracker.smembers(f'obj:trackers:{obj_type}:{obj_id}')

 # # TODO: ADD all Objects + Subtypes
@ -664,6 +664,10 @@ def get_all_tracked_yara_files(filter_disabled=False):
        pass
    return yara_files

+def get_yara_rule_by_uuid(tracker_uuid):
+    yar_path = get_tracker_by_uuid(tracker_uuid)
+    return yara.compile(filepath=os.path.join(get_yara_rules_dir(), yar_path))
+
 def reload_yara_rules():
    yara_files = get_all_tracked_yara_files()
    # {uuid: filename}
--- a/bin/lib/data_retention_engine.py
+++ b/bin/lib/data_retention_engine.py
@ -11,46 +11,85 @@ sys.path.append(os.environ['AIL_BIN'])
 from lib import ConfigLoader

 config_loader = ConfigLoader.ConfigLoader()
-r_serv_db = config_loader.get_db_conn("Kvrocks_Objects")
+r_obj = config_loader.get_db_conn("Kvrocks_Objects")
+r_cache = config_loader.get_redis_conn("Redis_Cache")
 config_loader = None

-def get_first_object_date(object_type, subtype, field=''):
-    first_date = r_serv_db.zscore('objs:first_date', f'{object_type}:{subtype}:{field}')
-    if not first_date:
-        first_date = 99999999
-    return int(first_date)
+# TODO HOW TO HANDLE SCREENSHOTS ????
+# SCREENSHOT ID -> MEMBER OF ITEMS -> DATES
+# META SCREENSHOT -> NB DOMAINS + FIRST/LAST SEEN ???

-def get_last_object_date(object_type, subtype, field=''):
-    last_date = r_serv_db.zscore('objs:last_date', f'{object_type}:{subtype}:{field}')
-    if not last_date:
-        last_date = 0
-    return int(last_date)
-
-def _set_first_object_date(object_type, subtype, date, field=''):
-    return r_serv_db.zadd('objs:first_date', {f'{object_type}:{subtype}:{field}': date})
-
-def _set_last_object_date(object_type, subtype, date, field=''):
-    return r_serv_db.zadd('objs:last_date', {f'{object_type}:{subtype}:{field}': float(date)})
-
-def update_first_object_date(object_type, subtype, date, field=''):
-    first_date = get_first_object_date(object_type, subtype, field=field)
-    if int(date) < first_date:
-        _set_first_object_date(object_type, subtype, date, field=field)
-        return date
-    else:
-        return first_date
-
-def update_last_object_date(object_type, subtype, date, field=''):
-    last_date = get_last_object_date(object_type, subtype, field=field)
-    if int(date) > last_date:
-        _set_last_object_date(object_type, subtype, date, field=field)
-        return date
-    else:
-        return last_date
-
-def update_object_date(object_type, subtype, date, field=''):
-    update_first_object_date(object_type, subtype, date, field=field)
-    update_last_object_date(object_type, subtype, date, field=field)
+# TAG /!\ DIFF TAG CREDENTIAL ITEM != DOMAIN:CREDENTIAL
+#   -> IN OBJECT TYPE ?????
+# OR SPECIAL FIRST SEEN / LAST SEEN IN TAG LIB
+
+
+# DOMAIN -> subtype = domain type
+
+# TAG -> type = "TAG"
+# TAG -> subtype = "OBJ:"tag
+
+def load_obj_date_first_last():
+    # LOAD FIRST DATE
+    dates = r_obj.hgetall(f'date:first')
+    for str_row in dates:
+        obj_type, subtype = str_row.split(':', 1)
+        date = dates[str_row]
+        _set_obj_date_first(date, obj_type, subtype=subtype)
+    # LOAD LAST DATE
+    dates = r_obj.hgetall(f'date:last')
+    for str_row in dates:
+        obj_type, subtype = str_row.split(':', 1)
+        date = dates[str_row]
+        _set_obj_date_last(date, obj_type, subtype=subtype)
+
+
+# MAKE IT WORK WITH TAGS
+def get_obj_date_first(obj_type, subtype='', r_int=False):
+    first = r_cache.hget(f'date:first', f'{obj_type}:{subtype}')
+    if not first:
+        first = r_obj.hget(f'date:first', f'{obj_type}:{subtype}')
+    if r_int:
+        if not first:
+            return 0
+        else:
+            return int(first)
+    return first
+
+def get_obj_date_last(obj_type, subtype='', r_int=False):
+    last = r_cache.hget(f'date:last', f'{obj_type}:{subtype}')
+    if not last:
+        last = r_obj.hget(f'date:last', f'{obj_type}:{subtype}')
+    if r_int:
+        if not last:
+            return 99999999
+        else:
+            return int(last)
+    return last
+
+# FIRST
+def _set_obj_date_first(date, obj_type, subtype=''):
+    r_cache.hset(f'date:first', f'{obj_type}:{subtype}', date)
+
+def set_obj_date_first(date, obj_type, subtype=''):
+    _set_obj_date_first(date, obj_type, subtype=subtype)
+    r_obj.hset(f'date:first', f'{obj_type}:{subtype}', date)
+
+# LAST
+def _set_obj_date_last(date, obj_type, subtype=''):
+    r_cache.hset(f'date:last', f'{obj_type}:{subtype}', date)
+
+def set_obj_date_last(date, obj_type, subtype=''):
+    _set_obj_date_last(date, obj_type, subtype=subtype)
+    r_obj.hset(f'date:last', f'{obj_type}:{subtype}', date)
+
+def update_obj_date(date, obj_type, subtype=''):
+    date = int(date)
+    first = get_obj_date_first(obj_type, subtype=subtype, r_int=True)
+    last = get_obj_date_last(obj_type, subtype=subtype, r_int=True)
+    if date < first:
+        set_obj_date_first(date, obj_type, subtype=subtype)
+    if date > last:
+        set_obj_date_last(date, obj_type, subtype=subtype)


-###############################################################
--- a/bin/lib/module_extractor.py
+++ b/bin/lib/module_extractor.py
@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import os
+import sys
+import time
+
+import yara
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+import lib.objects.ail_objects
+from lib.objects.Items import Item
+from lib import correlations_engine
+from lib import regex_helper
+from lib.ConfigLoader import ConfigLoader
+
+from lib import Tracker
+
+from modules.CreditCards import CreditCards
+from modules.Iban import Iban
+from modules.Mail import Mail
+from modules.Onion import Onion
+from modules.Tools import Tools
+
+creditCards = CreditCards()
+ibans = Iban()
+mails = Mail()
+onions = Onion()
+tools = Tools()
+
+config_loader = ConfigLoader()
+r_cache = config_loader.get_redis_conn("Redis_Cache")
+config_loader = None
+
+r_key = regex_helper.generate_redis_cache_key('extractor')
+
+MODULES = {
+    'infoleak:automatic-detection="credit-card"': creditCards,
+    'infoleak:automatic-detection="iban"': ibans,
+    'infoleak:automatic-detection="mail"': mails,
+    'infoleak:automatic-detection="onion"': onions,
+    # APIkey ???
+    # Credentials
+    # Zerobins
+    # CERTIFICATE + KEYS ???
+    # SQL Injetction / Libinjection ???
+
+}
+for tool_name in tools.get_tools():
+    MODULES[f'infoleak:automatic-detection="{tool_name}-tool"'] = tools
+
+def get_correl_match(extract_type, obj_id, content, filter_subtypes=['']):
+    correl = correlations_engine.get_correlation_by_correl_type('item', '', obj_id, extract_type)
+    to_extract = []
+    for c in correl:
+        subtype, value = c.split(':', 1)
+        # if subtype in filter_subtypes:
+        to_extract.append(value)
+    if to_extract:
+        return regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
+    else:
+        return []
+
+def _get_yara_match(data):
+    for row in data.get('strings'):
+        start, i, value = row
+        value = value.decode()
+        end = start + len(value)
+        r_cache.sadd(f'extractor:yara:match:{r_key}', f'{start}:{end}:{value}')
+        r_cache.expire(f'extractor:yara:match:{r_key}', 300)
+    return yara.CALLBACK_CONTINUE
+
+# TODO RETRO HUNTS
+def get_tracker_match(obj_id, content):
+    trackers = Tracker.get_obj_all_trackers('item', '', obj_id)
+    for tracker_uuid in trackers:
+        tracker_type = Tracker.get_tracker_type(tracker_uuid)
+        tracker = Tracker.get_tracker_by_uuid(tracker_uuid)
+        if tracker_type == 'regex':
+            return regex_helper.regex_finditer(r_key, tracker, obj_id, content)
+        elif tracker_type == 'yara':
+            rule = Tracker.get_yara_rule_by_uuid(tracker_uuid)
+            rule.match(data=content, callback=_get_yara_match,
+                       which_callbacks=yara.CALLBACK_MATCHES, timeout=30)
+            yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}')
+            r_cache.delete(f'extractor:yara:match:{r_key}')
+            extracted = []
+            for match in yara_match:
+                start, end, value = match.split(':', 2)
+                extracted.append((int(start), int(end), value))
+            return extracted
+
+        # elif tracker_type == 'term': # TODO
+        #
+        # elif tracker_type == '':
+    return []
+
+
+def extract(obj_id, content=None):
+    item = Item(obj_id)
+    if not content:
+        content = item.get_content()
+    extracted = []
+
+    extracted = extracted + get_tracker_match(obj_id, content)
+
+    # print(item.get_tags())
+    for tag in item.get_tags():
+        if MODULES.get(tag):
+            # print(tag)
+            module = MODULES.get(tag)
+            matches = module.extract(obj_id, content, tag)
+            if matches:
+                extracted = extracted + matches
+
+    for obj_t in ['cve', 'cryptocurrency', 'username']: # Decoded, PGP->extract bloc
+        matches = get_correl_match(obj_t, obj_id, content)
+        if matches:
+            extracted = extracted + matches
+
+    from operator import itemgetter
+
+    extracted = sorted(extracted, key=itemgetter(0))
+    print(extracted)
+    return extracted
+
+
+if __name__ == '__main__':
+    t0 = time.time()
+    obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
+    obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
+    obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
+    # obj_id = 'tests/2021/01/01/credit_cards.gz'
+    # obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
+    obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
+    obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
+
+    extract(obj_id)
+
+    # get_obj_correl('cve', obj_id, content)
+    # r = get_tracker_match(obj_id, content)
+    # print(r)
+
+    print(time.time() - t0)
+
--- a/bin/lib/objects/CryptoCurrencies.py
+++ b/bin/lib/objects/CryptoCurrencies.py
@ -17,7 +17,7 @@ from lib.ConfigLoader import ConfigLoader
 from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id

 config_loader = ConfigLoader()
-
+baseurl = config_loader.get_config_str("Notifications", "ail_domain")
 config_loader = None

 digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
--- a/bin/lib/objects/Domains.py
+++ b/bin/lib/objects/Domains.py
@ -21,7 +21,7 @@ from lib.objects.abstract_object import AbstractObject

 from lib.ail_core import paginate_iterator
 from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_domain, get_item_har
-from lib import data_retention_engine
+from lib.data_retention_engine import update_obj_date

 from packages import Date

@ -426,7 +426,7 @@ class Domain(AbstractObject):
        except (ValueError, TypeError):
            status = True

-        data_retention_engine.update_object_date('domain', self.domain_type, date)
+        update_obj_date(date, 'domain', self.domain_type)
        # UP
        if status:
            r_crawler.srem(f'full_{self.domain_type}_down', self.id)
--- a/bin/lib/objects/Items.py
+++ b/bin/lib/objects/Items.py
@ -22,6 +22,7 @@ from lib.ail_core import get_ail_uuid
 from lib.objects.abstract_object import AbstractObject
 from lib.ConfigLoader import ConfigLoader
 from lib import item_basic
+from lib.data_retention_engine import update_obj_date


 from flask import url_for
@ -245,7 +246,7 @@ class Item(AbstractObject):
            return None

    def get_url(self):
-        return r_object.hset(f'meta:item::{self.id}', 'url')
+        return r_object.hget(f'meta:item::{self.id}', 'url')

    def set_crawled(self, url, parent_id):
        r_object.hset(f'meta:item::{self.id}', 'url', url)
@ -375,6 +376,24 @@ def get_items_by_source(source):
            l_items.append(item_id)
    return l_items

+def _manual_set_items_date_first_last():
+    first = 9999
+    last = 0
+    sources = get_items_sources()
+    for source in sources:
+        dir_source = os.path.join(os.environ['AIL_HOME'], ITEMS_FOLDER, source)
+        for dir_name in os.listdir(dir_source):
+            if os.path.isdir(os.path.join(dir_source, dir_name)):
+                date = int(dir_name)
+                if date < first:
+                    first = date
+                if date > last:
+                    last = date
+    if first != 9999:
+        update_obj_date(first, 'item')
+    if last != 0:
+        update_obj_date(last, 'item')
+
 ################################################################################
 ################################################################################
 ################################################################################
--- a/bin/lib/objects/Screenshots.py
+++ b/bin/lib/objects/Screenshots.py
@ -15,6 +15,7 @@ sys.path.append(os.environ['AIL_BIN'])
 ##################################
 from lib.ConfigLoader import ConfigLoader
 from lib.objects.abstract_object import AbstractObject
+# from lib import data_retention_engine

 config_loader = ConfigLoader()
 r_serv_metadata = config_loader.get_db_conn("Kvrocks_Objects")
--- a/bin/lib/objects/abstract_daterange_object.py
+++ b/bin/lib/objects/abstract_daterange_object.py
@ -19,6 +19,7 @@ sys.path.append(os.environ['AIL_BIN'])
 from lib.objects.abstract_object import AbstractObject
 from lib.ConfigLoader import ConfigLoader
 from lib.item_basic import is_crawled, get_item_domain
+from lib.data_retention_engine import update_obj_date

 from packages import Date

@ -41,10 +42,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
        super().__init__(obj_type, id)

    def exists(self):
-        return r_object.exists(f'{self.type}:meta:{self.id}')
+        return r_object.exists(f'meta:{self.type}:{self.id}')

    def get_first_seen(self, r_int=False):
-        first_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'first_seen')
+        first_seen = r_object.hget(f'meta:{self.type}:{self.id}', 'first_seen')
        if r_int:
            if first_seen:
                return int(first_seen)
@ -54,7 +55,7 @@ class AbstractDaterangeObject(AbstractObject, ABC):
            return first_seen

    def get_last_seen(self, r_int=False):
-        last_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'last_seen')
+        last_seen = r_object.hget(f'meta:{self.type}:{self.id}', 'last_seen')
        if r_int:
            if last_seen:
                return int(last_seen)
@ -64,7 +65,7 @@ class AbstractDaterangeObject(AbstractObject, ABC):
            return last_seen

    def get_nb_seen(self):
-        return r_object.hget(f'{self.type}:meta:{self.id}', 'nb')
+        return r_object.hget(f'meta:{self.type}:{self.id}', 'nb')

    def get_nb_seen_by_date(self, date):
        nb = r_object.hget(f'{self.type}:date:{date}', self.id)
@ -82,10 +83,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
        return meta_dict

    def set_first_seen(self, first_seen):
-        r_object.hset(f'{self.type}:meta:{self.id}', 'first_seen', first_seen)
+        r_object.hset(f'meta:{self.type}:{self.id}', 'first_seen', first_seen)

    def set_last_seen(self, last_seen):
-        r_object.hset(f'{self.type}:meta:{self.id}', 'last_seen', last_seen)
+        r_object.hset(f'meta:{self.type}:{self.id}', 'last_seen', last_seen)

    def update_daterange(self, date):
        date = int(date)
@ -114,12 +115,13 @@ class AbstractDaterangeObject(AbstractObject, ABC):
            r_object.sadd(f'{self.type}:all', self.id)
        else:
            self.update_daterange(date)
+        update_obj_date(date, self.type)

        # NB Object seen by day
        r_object.hincrby(f'{self.type}:date:{date}', self.id, 1)
        r_object.zincrby(f'{self.type}:date:{date}', 1, self.id) # # # # # # # # # #
        # NB Object seen
-        r_object.hincrby(f'{self.type}:meta:{self.id}', 'nb', 1)
+        r_object.hincrby(f'meta:{self.type}:{self.id}', 'nb', 1)

        # Correlations
        self.add_correlation('item', '', item_id)
--- a/bin/lib/objects/abstract_subtype_object.py
+++ b/bin/lib/objects/abstract_subtype_object.py
@ -19,12 +19,14 @@ sys.path.append(os.environ['AIL_BIN'])
 from lib.objects.abstract_object import AbstractObject
 from lib.ConfigLoader import ConfigLoader
 from lib.item_basic import is_crawled, get_item_domain
+from lib.data_retention_engine import update_obj_date

 from packages import Date

 # LOAD CONFIG
 config_loader = ConfigLoader()
 r_metadata = config_loader.get_redis_conn("ARDB_Metadata")
+r_object = config_loader.get_db_conn("Kvrocks_Objects")
 config_loader = None

 # # TODO: ADD CORRELATION ENGINE
@ -47,7 +49,7 @@ class AbstractSubtypeObject(AbstractObject):
        self.subtype = subtype

    def exists(self):
-        return r_metadata.exists(f'{self.type}_metadata_{self.subtype}:{self.id}')
+        return r_object.exists(f'meta:{self.type}:{self.subtype}:{self.id}')

    # def exists(self):
    #     res = r_metadata.zscore(f'{self.type}_all:{self.subtype}', self.id)
@ -57,7 +59,7 @@ class AbstractSubtypeObject(AbstractObject):
    #         return False

    def get_first_seen(self, r_int=False):
-        first_seen = r_metadata.hget(f'{self.type}_metadata_{self.subtype}:{self.id}', 'first_seen')
+        first_seen = r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'first_seen')
        if r_int:
            if first_seen:
                return int(first_seen)
@ -67,7 +69,7 @@ class AbstractSubtypeObject(AbstractObject):
            return first_seen

    def get_last_seen(self, r_int=False):
-        last_seen = r_metadata.hget(f'{self.type}_metadata_{self.subtype}:{self.id}', 'last_seen')
+        last_seen = r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'last_seen')
        if r_int:
            if last_seen:
                return int(last_seen)
@ -94,10 +96,10 @@ class AbstractSubtypeObject(AbstractObject):
        return meta_dict

    def set_first_seen(self, first_seen):
-        r_metadata.hset(f'{self.type}_metadata_{self.subtype}:{self.id}', 'first_seen', first_seen)
+        r_object.hset(f'meta:{self.type}:{self.subtype}:{self.id}', 'first_seen', first_seen)

    def set_last_seen(self, last_seen):
-        r_metadata.hset(f'{self.type}_metadata_{self.subtype}:{self.id}', 'last_seen', last_seen)
+        r_object.hset(f'meta:{self.type}:{self.subtype}:{self.id}', 'last_seen', last_seen)

    def update_daterange(self, date):
        date = int(date)
@ -124,12 +126,13 @@ class AbstractSubtypeObject(AbstractObject):
 # NEW field => first record(last record)
 #                   by subtype ??????

-#               => data Retention + efficicent search
+#               => data Retention + efficient search
 #
 #

    def add(self, date, item_id):
        self.update_daterange(date)
+        update_obj_date(date, self.type, self.subtype)
        # daily
        r_metadata.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
        # all subtypes
--- a/bin/lib/regex_helper.py
+++ b/bin/lib/regex_helper.py
@ -71,7 +71,7 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
            if r_set:
                all_items = r_serv_cache.smembers(redis_key)
            else:
-                all_items = r_serv_cache.lrange(redis_key, 0 ,-1)
+                all_items = r_serv_cache.lrange(redis_key, 0, -1)
            r_serv_cache.delete(redis_key)
            proc.terminate()
            return all_items
@ -80,29 +80,66 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
        proc.terminate()
        sys.exit(0)

-def _regex_search(redis_key, regex, item_content):
-    first_occ = regex.search(item_content)
-    if first_occ:
-        r_serv_cache.set(redis_key, first_occ)
+def _regex_finditer(r_key, regex, content):
+    iterator = re.finditer(regex, content)
+    for match in iterator:
+        value = match.group()
+        start = match.start()
+        end = match.end()
+        r_serv_cache.rpush(r_key, f'{start}:{end}:{value}')
+    r_serv_cache.expire(r_key, 360)

-def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30):
-    proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, ))
+def regex_finditer(r_key, regex, item_id, content, max_time=30):
+    proc = Proc(target=_regex_finditer, args=(r_key, regex, content))
    try:
        proc.start()
        proc.join(max_time)
        if proc.is_alive():
            proc.terminate()
-            Statistics.incr_module_timeout_statistic(module_name)
-            err_mess = f"{module_name}: processing timeout: {item_id}"
+            Statistics.incr_module_timeout_statistic(r_key)
+            err_mess = f"{r_key}: processing timeout: {item_id}"
            print(err_mess)
            publisher.info(err_mess)
-            return None
+            return []
        else:
-            first_occ = r_serv_cache.get(redis_key)
-            r_serv_cache.delete(redis_key)
+            res = r_serv_cache.lrange(r_key, 0, -1)
+            r_serv_cache.delete(r_key)
            proc.terminate()
-            return first_occ
+            all_match = []
+            for match in res:
+                start, end, value = match.split(':', 2)
+                all_match.append((int(start), int(end), value))
+            return all_match
    except KeyboardInterrupt:
-        print("Caught KeyboardInterrupt, terminating workers")
+        print("Caught KeyboardInterrupt, terminating regex worker")
+        proc.terminate()
+        sys.exit(0)
+
+def _regex_search(r_key, regex, content):
+    if re.search(regex, content):
+        r_serv_cache.set(r_key, 1)
+        r_serv_cache.expire(r_key, 360)
+
+def regex_search(r_key, regex, item_id, content, max_time=30):
+    proc = Proc(target=_regex_search, args=(r_key, regex, content))
+    try:
+        proc.start()
+        proc.join(max_time)
+        if proc.is_alive():
+            proc.terminate()
+            Statistics.incr_module_timeout_statistic(r_key)
+            err_mess = f"{r_key}: processing timeout: {item_id}"
+            print(err_mess)
+            publisher.info(err_mess)
+            return False
+        else:
+            if r_serv_cache.exists(r_key):
+                r_serv_cache.delete(r_key)
+                return True
+            else:
+                r_serv_cache.delete(r_key)
+                return False
+    except KeyboardInterrupt:
+        print("Caught KeyboardInterrupt, terminating regex worker")
        proc.terminate()
        sys.exit(0)
--- a/bin/modules/CreditCards.py
+++ b/bin/modules/CreditCards.py
@ -45,41 +45,57 @@ class CreditCards(AbstractModule):
            ]

        self.regex = re.compile('|'.join(cards))
+        self.re_clean_card = r'[^0-9]'

-        # Waiting time in secondes between to message proccessed
+        # Waiting time in seconds between to message processed
        self.pending_seconds = 10

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")

+    def get_valid_card(self, card):
+        clean_card = re.sub(self.re_clean_card, '', card)
+        if lib_refine.is_luhn_valid(clean_card):
+            return clean_card
+
+    def extract(self, obj_id, content, tag):
+        extracted = []
+        cards = self.regex_finditer(self.regex, obj_id, content)
+        for card in cards:
+            start, end, value = card
+            if self.get_valid_card(value):
+                extracted.append(card)
+        return extracted
+
    def compute(self, message, r_result=False):
        item_id, score = message.split()
        item = Item(item_id)
        content = item.get_content()
-        all_cards = re.findall(self.regex, content)
+        all_cards = self.regex_findall(self.regex, item.id, content)

        if len(all_cards) > 0:
            # self.redis_logger.debug(f'All matching {all_cards}')
-            creditcard_set = set([])
-
+            creditcard_set = set()
            for card in all_cards:
-                clean_card = re.sub('[^0-9]', '', card)
-                if lib_refine.is_luhn_valid(clean_card):
-                    self.redis_logger.debug(f'{clean_card} is valid')
-                    creditcard_set.add(clean_card)
+                print(card)
+                valid_card = self.get_valid_card(card)
+                if valid_card:
+                    creditcard_set.add(valid_card)

-            # pprint.pprint(creditcard_set)
+            # print(creditcard_set)
            to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
-            if len(creditcard_set) > 0:
-                self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}')
+            if creditcard_set:
+                mess = f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.id}'
+                print(mess)
+                self.redis_logger.warning(mess)

-                msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}'
+                msg = f'infoleak:automatic-detection="credit-card";{item.id}'
                self.send_message_to_queue(msg, 'Tags')

                if r_result:
                    return creditcard_set
            else:
-                self.redis_logger.info(f'{to_print}CreditCard related;{item.get_id()}')
+                self.redis_logger.info(f'{to_print}CreditCard related;{item.id}')


 if __name__ == '__main__':
--- a/bin/modules/CveModule.py
+++ b/bin/modules/CveModule.py
@ -26,13 +26,13 @@ from lib.objects import Cves
 from lib.objects.Items import Item


-class Cve(AbstractModule):
+class CveModule(AbstractModule):
    """
-    Cve module for AIL framework
+    CveModule for AIL framework
    """

    def __init__(self):
-        super(Cve, self).__init__()
+        super(CveModule, self).__init__()

        # regex to find CVE
        self.reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,5}')
@ -68,6 +68,6 @@ class Cve(AbstractModule):

 if __name__ == '__main__':

-    module = Cve()
+    module = CveModule()
    # module.run()
    module.compute('crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd 9')
--- a/bin/modules/Global.py
+++ b/bin/modules/Global.py
@ -41,7 +41,8 @@ sys.path.append(os.environ['AIL_BIN'])
 ##################################
 from modules.abstract_module import AbstractModule
 from lib.ConfigLoader import ConfigLoader
-
+from lib.data_retention_engine import update_obj_date
+from lib import item_basic

 class Global(AbstractModule):
    """
@ -85,7 +86,7 @@ class Global(AbstractModule):
        if len(splitted) == 2:
            item, gzip64encoded = splitted

-            # Remove PASTES_FOLDER from item path (crawled item + submited)
+            # Remove PASTES_FOLDER from item path (crawled item + submitted)
            if self.PASTES_FOLDERS in item:
                item = item.replace(self.PASTES_FOLDERS, '', 1)

@ -125,6 +126,8 @@ class Global(AbstractModule):
                        if self.PASTES_FOLDERS in item_id:
                            item_id = item_id.replace(self.PASTES_FOLDERS, '', 1)

+                        update_obj_date(item_basic.get_item_date(item_id), 'item')
+
                        self.send_message_to_queue(item_id)
                        self.processed_item += 1
                        print(item_id)
--- a/bin/modules/Iban.py
+++ b/bin/modules/Iban.py
@ -62,6 +62,17 @@ class Iban(AbstractModule):
            return True
        return False

+    def extract(self, obj_id, content, tag):
+        extracted = []
+        ibans = self.regex_finditer(self.iban_regex, obj_id, content)
+        for iban in ibans:
+            start, end, value = iban
+            value = ''.join(e for e in value if e.isalnum())
+            if self.is_valid_iban(value):
+                print(value)
+                extracted.append(iban)
+        return extracted
+
    def compute(self, message):
        item = Item(message)
        item_id = item.get_id()
--- a/bin/modules/Mail.py
+++ b/bin/modules/Mail.py
@ -57,7 +57,7 @@ class Mail(AbstractModule):
        return self.r_cache.exists(f'mxdomain:{mxdomain}')

    def save_mxdomain_in_cache(self, mxdomain):
-        self.r_cache.setex(f'mxdomain:{mxdomain}', 1, datetime.timedelta(days=1))
+        self.r_cache.setex(f'mxdomain:{mxdomain}', datetime.timedelta(days=1), 1)

    def check_mx_record(self, set_mxdomains):
        """Check if emails MX domains are responding.
@ -118,6 +118,21 @@ class Mail(AbstractModule):
                    print(e)
        return valid_mxdomain

+    def extract(self, obj_id, content, tag):
+        extracted = []
+        mxdomains = {}
+        mails = self.regex_finditer(self.email_regex, obj_id, content)
+        for mail in mails:
+            start, end, value = mail
+            mxdomain = value.rsplit('@', 1)[1].lower()
+            if mxdomain not in mxdomains:
+                mxdomains[mxdomain] = []
+            mxdomains[mxdomain].append(mail)
+        for mx in self.check_mx_record(mxdomains.keys()):
+            for row in mxdomains[mx]:
+                extracted.append(row)
+        return extracted
+
    # # TODO: sanitize mails
    def compute(self, message):
        item_id, score = message.split()
--- a/bin/modules/Onion.py
+++ b/bin/modules/Onion.py
@ -55,6 +55,17 @@ class Onion(AbstractModule):
        # TEMP var: SAVE I2P Domain (future I2P crawler)
        # self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")

+    def extract(self, obj_id, content, tag):
+        extracted = []
+        onions = self.regex_finditer(self.onion_regex, obj_id, content)
+        for onion in onions:
+            start, end, value = onion
+            url_unpack = crawlers.unpack_url(value)
+            domain = url_unpack['domain']
+            if crawlers.is_valid_onion_domain(domain):
+                extracted.append(onion)
+        return extracted
+
    def compute(self, message):
        onion_urls = []
        domains = []
--- a/bin/modules/Tools.py
+++ b/bin/modules/Tools.py
@ -0,0 +1,434 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+"""
+Tools Module
+============================
+
+Search tools outpout
+
+"""
+
+import os
+import sys
+import time
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from modules.abstract_module import AbstractModule
+from lib.objects.Items import Item
+
+
+TOOLS = {
+    'sqlmap': {
+        'regex': r'Usage of sqlmap for attacking targets without|all tested parameters do not appear to be injectable|sqlmap identified the following injection point|Title:[^\n]*((error|time|boolean)-based|stacked queries|UNION query)',
+        'tag': 'infoleak:automatic-detection="sqlmap-tool"',
+    },
+    'wig': {
+        'regex': r'(?s)wig - WebApp Information Gatherer.+?_{10,}',
+        'tag': 'infoleak:automatic-detection="wig-tool"',
+    },
+    'dmytry': {
+        'regex': r'(?s)Gathered (TCP Port|Inet-whois|Netcraft|Subdomain|E-Mail) information for.+?-{10,}',
+        'tag': 'infoleak:automatic-detection="dmitry-tool"',
+    },
+    'inurlbr': {
+        'regex': r'Usage of INURLBR for attacking targets without prior mutual consent is illegal',
+        'tag': 'infoleak:automatic-detection="inurlbr-tool"',
+    },
+    'wafw00f': {
+        'regex': r'(?s)WAFW00F - Web Application Firewall Detection Tool.+?Checking',
+        'tag': 'infoleak:automatic-detection="wafw00f-tool"',
+    },
+    'sslyze': {
+        'regex': r'(?s)PluginSessionRenegotiation.+?SCAN RESULTS FOR',
+        'tag': 'infoleak:automatic-detection="sslyze-tool"',
+    },
+    'nmap': {
+        'regex': r'(?s)Nmap scan report for.+?Host is',
+        'tag': 'infoleak:automatic-detection="nmap-tool"',
+    },
+    'dnsenum': {
+        'regex': r'(?s)dnsenum(\.pl)? VERSION:.+?Trying Zone Transfer',
+        'tag': 'infoleak:automatic-detection="dnsenum-tool"',
+    },
+    'knock': {
+        'regex': r'I scannig with my internal wordlist',
+        'tag': 'infoleak:automatic-detection="knock-tool"',
+    },
+    'nikto': {
+        'regex': r'(?s)\+ Target IP:.+?\+ Start Time:',
+        'tag': 'infoleak:automatic-detection="nikto-tool"',
+    },
+    'dnscan': {
+        'regex': r'(?s)\[\*\] Processing domain.+?\[\+\] Getting nameservers.+?records found',
+        'tag': 'infoleak:automatic-detection="dnscan-tool"',
+    },
+    'dnsrecon': {
+        'regex': r'Performing General Enumeration of Domain:|Performing TLD Brute force Enumeration against',
+        'tag': 'infoleak:automatic-detection="dnsrecon-tool"',
+    },
+    'striker': {
+        'regex': r'Crawling the target for fuzzable URLs|Honeypot Probabilty:',
+        'tag': 'infoleak:automatic-detection="striker-tool"',
+    },
+    'rhawk': {
+        'regex': r'S U B - D O M A I N   F I N D E R',
+        'tag': 'infoleak:automatic-detection="rhawk-tool"',
+    },
+    'uniscan': {
+        'regex': r'\| \[\+\] E-mail Found:',
+        'tag': 'infoleak:automatic-detection="uniscan-tool"',
+    },
+    'masscan': {
+        'regex': r'(?s)Starting masscan [\d.]+.+?Scanning|bit.ly/14GZzcT',
+        'tag': 'infoleak:automatic-detection="masscan-tool"',
+    },
+    'msfconsole': {
+        'regex': r'=\[ metasploit v[\d.]+.+?msf >',
+        'tag': 'infoleak:automatic-detection="msfconsole-tool"',
+    },
+    'amap': {
+        'regex': r'\bamap v[\d.]+ \(www.thc.org/thc-amap\)',
+        'tag': 'infoleak:automatic-detection="amap-tool"',
+    },
+    'automater': {
+        'regex': r'(?s)\[\*\] Checking.+?_+ Results found for:',
+        'tag': 'infoleak:automatic-detection="automater-tool"',
+    },
+    'braa': {
+        'regex': r'\bbraa public@[\d.]+',
+        'tag': 'infoleak:automatic-detection="braa-tool"',
+    },
+    'ciscotorch': {
+        'regex': r'Becase we need it',
+        'tag': 'infoleak:automatic-detection="ciscotorch-tool"',
+    },
+    'theharvester': {
+        'regex': r'Starting harvesting process for domain:',
+        'tag': 'infoleak:automatic-detection="theharvester-tool"',
+    },
+    'sslstrip': {
+        'regex': r'sslstrip [\d.]+ by Moxie Marlinspike running',
+        'tag': 'infoleak:automatic-detection="sslstrip-tool"',
+    },
+    'sslcaudit': {
+        'regex': r'# filebag location:',
+        'tag': 'infoleak:automatic-detection="sslcaudit-tool"',
+    },
+    'smbmap': {
+        'regex': r'\[\+\] Finding open SMB ports\.\.\.',
+        'tag': 'infoleak:automatic-detection="smbmap-tool"',
+    },
+    'reconng': {
+        'regex': r'\[\*\] Status: unfixed|\[recon-ng\]\[default\]',
+        'tag': 'infoleak:automatic-detection="reconng-tool"',
+    },
+    'p0f': {
+        'regex': r'\bp0f [^ ]+ by Michal Zalewski',
+        'tag': 'infoleak:automatic-detection="p0f-tool"',
+    },
+    'hping3': {
+        'regex': r'\bHPING [^ ]+ \([^)]+\): [^ ]+ mode set',
+        'tag': 'infoleak:automatic-detection="hping3-tool"',
+    },
+    'enum4linux': {
+        'regex': r'Starting enum4linux v[\d.]+|\|    Target Information    \|',
+        'tag': 'infoleak:automatic-detection="enum4linux-tool"',
+    },
+    'dnstracer': {
+        'regex': r'(?s)Tracing to.+?DNS HEADER \(send\)',
+        'tag': 'infoleak:automatic-detection="dnstracer-tool"',
+    },
+    'dnmap': {
+        'regex': r'dnmap_(client|server)|Nmap output files stored in \'nmap_output\' directory',
+        'tag': 'infoleak:automatic-detection="dnmap-tool"',
+    },
+    'arpscan': {
+        'regex': r'Starting arp-scan [^ ]+ with \d+ hosts',
+        'tag': 'infoleak:automatic-detection="arpscan-tool"',
+    },
+    'cdpsnarf': {
+        'regex': r'(?s)CDPSnarf v[^ ]+.+?Waiting for a CDP packet\.\.\.',
+        'tag': 'infoleak:automatic-detection="cdpsnarf-tool"',
+    },
+    'dnsmap': {
+        'regex': r'DNS Network Mapper by pagvac',
+        'tag': 'infoleak:automatic-detection="dnsmap-tool"',
+    },
+    'dotdotpwn': {
+        'regex': r'DotDotPwn v[^ ]+|dotdotpwn@sectester.net|\[\+\] Creating Traversal patterns',
+        'tag': 'infoleak:automatic-detection="dotdotpwn-tool"',
+    },
+    'searchsploit': {
+        'regex': r'(exploits|shellcodes)/|searchsploit_rc|Exploit Title',
+        'tag': 'infoleak:automatic-detection="searchsploit-tool"',
+    },
+    'fierce': {
+        'regex': r'(?s)Trying zone transfer first.+Checking for wildcard DNS',
+        'tag': 'infoleak:automatic-detection="fierce-tool"',
+    },
+    'firewalk': {
+        'regex': r'Firewalk state initialization completed successfully|Ramping phase source port',
+        'tag': 'infoleak:automatic-detection="firewalk-tool"',
+    },
+    'fragroute': {
+        'regex': r'\bfragroute: tcp_seg -> ip_frag',
+        'tag': 'infoleak:automatic-detection="fragroute-tool"',
+    },
+    'fragrouter': {
+        'regex': r'fragrouter: frag-\d+:',
+        'tag': 'infoleak:automatic-detection="fragrouter-tool"',
+    },
+    'goofile': {
+        'regex': r'code.google.com/p/goofile\b',
+        'tag': 'infoleak:automatic-detection="goofile-tool"',
+    },
+    'intrace': {
+        'regex': r'\bInTrace [\d.]+ \-\-',
+        'tag': 'infoleak:automatic-detection="intrace-tool"',
+    },
+    'ismtp': {
+        'regex': r'Testing SMTP server \[user enumeration\]',
+        'tag': 'infoleak:automatic-detection="ismtp-tool"',
+    },
+    'lbd': {
+        'regex': r'Checking for (DNS|HTTP)-Loadbalancing',
+        'tag': 'infoleak:automatic-detection="lbd-tool"',
+    },
+    'miranda': {
+        'regex': r'Entering discovery mode for \'upnp:',
+        'tag': 'infoleak:automatic-detection="miranda-tool"',
+    },
+    'ncat': {
+        'regex': r'nmap.org/ncat',
+        'tag': 'infoleak:automatic-detection="ncat-tool"',
+    },
+    'ohrwurm': {
+        'regex': r'\bohrwurm-[\d.]+',
+        'tag': 'infoleak:automatic-detection="ohrwurm-tool"',
+    },
+    'oscanner': {
+        'regex': r'Loading services/sids from service file',
+        'tag': 'infoleak:automatic-detection="oscanner-tool"',
+    },
+    'sfuzz': {
+        'regex': r'AREALLYBADSTRING|sfuzz/sfuzz',
+        'tag': 'infoleak:automatic-detection="sfuzz-tool"',
+    },
+    'sidguess': {
+        'regex': r'SIDGuesser v[\d.]+',
+        'tag': 'infoleak:automatic-detection="sidguess-tool"',
+    },
+    'sqlninja': {
+        'regex': r'Sqlninja rel\. [\d.]+',
+        'tag': 'infoleak:automatic-detection="sqlninja-tool"',
+    },
+    'sqlsus': {
+        'regex': r'sqlsus version [\d.]+',
+        'tag': 'infoleak:automatic-detection="sqlsus-tool"',
+    },
+    'dnsdict6': {
+        'regex': r'Starting DNS enumeration work on',
+        'tag': 'infoleak:automatic-detection="dnsdict6-tool"',
+    },
+    'unixprivesccheck': {
+        'regex': r'Recording Interface IP addresses',
+        'tag': 'infoleak:automatic-detection="unixprivesccheck-tool"',
+    },
+    'yersinia': {
+        'regex': r'yersinia@yersinia.net',
+        'tag': 'infoleak:automatic-detection="yersinia-tool"',
+    },
+    'armitage': {
+        'regex': r'\[\*\] Starting msfrpcd for you',
+        'tag': 'infoleak:automatic-detection="armitage-tool"',
+    },
+    'backdoorfactory': {
+        'regex': r'\[\*\] In the backdoor module',
+        'tag': 'infoleak:automatic-detection="backdoorfactory-tool"',
+    },
+    'beef': {
+        'regex': r'Please wait as BeEF services are started',
+        'tag': 'infoleak:automatic-detection="beef-tool"',
+    },
+    'cat': {
+        'regex': r'Cisco Auditing Tool.+?g0ne',
+        'tag': 'infoleak:automatic-detection="cat-tool"',
+    },
+    'cge': {
+        'regex': r'Vulnerability successful exploited with \[',
+        'tag': 'infoleak:automatic-detection="cge-tool"',
+    },
+    'john': {
+        'regex': r'John the Ripper password cracker, ver:|Loaded \d+ password hash \(',
+        'tag': 'infoleak:automatic-detection="john-tool"',
+    },
+    'keimpx': {
+        'regex': r'\bkeimpx [\d.]+',
+        'tag': 'infoleak:automatic-detection="keimpx-tool"',
+    },
+    'maskprocessor': {
+        'regex': r'mp by atom, High-Performance word generator',
+        'tag': 'infoleak:automatic-detection="maskprocessor-tool"',
+    },
+    'ncrack': {
+        'regex': r'Starting Ncrack[^\n]+http://ncrack.org',
+        'tag': 'infoleak:automatic-detection="ncrack-tool"',
+    },
+    'patator': {
+        'regex': r'http://code.google.com/p/patator/|Starting Patator v',
+        'tag': 'infoleak:automatic-detection="patator-tool"',
+    },
+    'phrasendrescher': {
+        'regex': r'phrasen\|drescher [\d.]+',
+        'tag': 'infoleak:automatic-detection="phrasendrescher-tool"',
+    },
+    'polenum': {
+        'regex': r'\[\+\] Password Complexity Flags:',
+        'tag': 'infoleak:automatic-detection="polenum-tool"',
+    },
+    'rainbowcrack': {
+        'regex': r'Official Website: http://project-rainbowcrack.com/',
+        'tag': 'infoleak:automatic-detection="rainbowcrack-tool"',
+    },
+    'rcracki_mt': {
+        'regex': r'Found \d+ rainbowtable files\.\.\.',
+        'tag': 'infoleak:automatic-detection="rcracki_mt-tool"',
+    },
+    'tcpdump': {
+        'regex': r'tcpdump: listening on.+capture size \d+|\d+ packets received by filter',
+        'tag': 'infoleak:automatic-detection="tcpdump-tool"',
+    },
+    'hydra': {
+        'regex': r'Hydra \(http://www.thc.org/thc-hydra\)',
+        'tag': 'infoleak:automatic-detection="hydra-tool"',
+    },
+    'netcat': {
+        'regex': r'Listening on \[[\d.]+\] \(family',
+        'tag': 'infoleak:automatic-detection="netcat-tool"',
+    },
+    'nslookup': {
+        'regex': r'Non-authoritative answer:',
+        'tag': 'infoleak:automatic-detection="nslookup-tool"',
+    },
+    'dig': {
+        'regex': r'; <<>> DiG [\d.]+',
+        'tag': 'infoleak:automatic-detection="dig-tool"',
+    },
+    'whois': {
+        'regex': r'(?i)Registrar WHOIS Server:|Registrar URL: http://|DNSSEC: unsigned|information on Whois status codes|REGISTERED, DELEGATED|[Rr]egistrar:|%[^\n]+(WHOIS|2016/679)',
+        'tag': 'infoleak:automatic-detection="whois-tool"',
+    },
+    'nessus': {
+        'regex': r'nessus_(report_(get|list|exploits)|scan_(new|status))|nessuscli|nessusd|nessus-service',
+        'tag': 'infoleak:automatic-detection="nessus-tool"',
+    },
+    'openvas': {
+        'regex': r'/openvas/',
+        'tag': 'infoleak:automatic-detection="openvas-tool"',
+    },
+    'golismero': {
+        'regex': r'GoLismero[\n]+The Web Knife',
+        'tag': 'infoleak:automatic-detection="golismero-tool"',
+    },
+    'wpscan': {
+        'regex': r'WordPress Security Scanner by the WPScan Team|\[\+\] Interesting header:',
+        'tag': 'infoleak:automatic-detection="wpscan-tool"',
+    },
+    'skipfish': {
+        'regex': r'\[\+\] Sorting and annotating crawl nodes:|skipfish version [\d.]+',
+        'tag': 'infoleak:automatic-detection="skipfish-tool"',
+    },
+    'arachni': {
+        'regex': r'With the support of the community and the Arachni Team|\[\*\] Waiting for plugins to settle\.\.\.',
+        'tag': 'infoleak:automatic-detection="arachni-tool"',
+    },
+    'dirb': {
+        'regex': r'==> DIRECTORY:|\bDIRB v[\d.]+',
+        'tag': 'infoleak:automatic-detection="dirb-tool"',
+    },
+    'joomscan': {
+        'regex': r'OWASP Joomla! Vulnerability Scanner v[\d.]+',
+        'tag': 'infoleak:automatic-detection="joomscan-tool"',
+    },
+    'jbossautopwn': {
+        'regex': r'\[x\] Now creating BSH script\.\.\.|\[x\] Now deploying \.war file:',
+        'tag': 'infoleak:automatic-detection="jbossautopwn-tool"',
+    },
+    'grabber': {
+        'regex': r'runSpiderScan @',
+        'tag': 'infoleak:automatic-detection="grabber-tool"',
+    },
+    'fimap': {
+        'regex': r'Automatic LFI/RFI scanner and exploiter',
+        'tag': 'infoleak:automatic-detection="fimap-tool"',
+    },
+    'dsxs': {
+        'regex': r'Damn Small XSS Scanner \(DSXS\)',
+        'tag': 'infoleak:automatic-detection="dsxs-tool"',
+    },
+    'dsss': {
+        'regex': r'Damn Small SQLi Scanner \(DSSS\)',
+        'tag': 'infoleak:automatic-detection="dsss-tool"',
+    },
+    'dsjs': {
+        'regex': r'Damn Small JS Scanner \(DSJS\)',
+        'tag': 'infoleak:automatic-detection="dsjs-tool"',
+    },
+    'dsfs': {
+        'regex': r'Damn Small FI Scanner \(DSFS\)',
+        'tag': 'infoleak:automatic-detection="dsfs-tool"',
+    },
+    'identywaf': {
+        'regex': r'\[o\] initializing handlers\.\.\.',
+        'tag': 'infoleak:automatic-detection="identywaf-tool"',
+    },
+    'whatwaf': {
+        'regex': r'<sCRIPT>ALeRt.+?WhatWaf\?',
+        'tag': 'infoleak:automatic-detection="whatwaf-tool"',
+    }
+}
+
+class Tools(AbstractModule):
+    """
+    Tools module for AIL framework
+    """
+
+    def __init__(self):
+        super(Tools, self).__init__()
+
+        self.max_execution_time = 30
+        # Waiting time in seconds between to message processed
+        self.pending_seconds = 10
+        # Send module state to logs
+        self.redis_logger.info(f"Module {self.module_name} initialized")
+
+    def get_tools(self):
+        return TOOLS.keys()
+
+    def extract(self, obj_id, content, tag):
+        tool_name = tag.rsplit('"', 2)[1][:-5]
+        return self.regex_finditer(TOOLS[tool_name]['regex'], obj_id, content)
+
+    def compute(self, message):
+        item = Item(message)
+        content = item.get_content()
+
+        for tool_name in TOOLS:
+            tool = TOOLS[tool_name]
+            match = self.regex_search(tool['regex'], item.id, content)
+            if match:
+                print(f'{item.id} found: {tool_name}')
+                # Tag Item
+                msg = f"{tool['tag']};{item.id}"
+                self.send_message_to_queue(msg, 'Tags')
+                # TODO ADD LOGS
+
+
+if __name__ == '__main__':
+    module = Tools()
+    # module.run()
+    module.compute('crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b')
+
--- a/bin/modules/abstract_module.py
+++ b/bin/modules/abstract_module.py
@ -47,7 +47,7 @@ class AbstractModule(ABC):
        self.redis_logger.channel = logger_channel

        #Cache key
-        self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
+        self.r_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
        self.max_execution_time = 30

        # Run module endlessly
@ -81,6 +81,12 @@ class AbstractModule(ABC):
        self.process.populate_set_out(message, queue_name)
        # add to new set_module

+    def regex_search(self, regex, obj_id, content):
+        return regex_helper.regex_search(self.r_cache_key, regex, obj_id, content, max_time=self.max_execution_time)
+
+    def regex_finditer(self, regex, obj_id, content):
+        return regex_helper.regex_finditer(self.r_cache_key, regex, obj_id, content, max_time=self.max_execution_time)
+
    def regex_findall(self, regex, id, content):
        """
        regex findall helper (force timeout)
@ -90,7 +96,7 @@ class AbstractModule(ABC):

        ex: send_to_queue(item_id, 'Global')
        """
-        return regex_helper.regex_findall(self.module_name, self.redis_cache_key, regex, id, content, max_time=self.max_execution_time)
+        return regex_helper.regex_findall(self.module_name, self.r_cache_key, regex, id, content, max_time=self.max_execution_time)

    def run(self):
        """
--- a/bin/packages/modules.cfg
+++ b/bin/packages/modules.cfg
@ -126,7 +126,7 @@ subscribe = Redis_Global
 subscribe = Redis_Credential
 publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags

-[Cve]
+[CveModule]
 subscribe = Redis_Cve
 publish = Redis_Tags

--- a/files/misp-galaxy
+++ b/files/misp-galaxy
@ -1 +1 @@
-Subproject commit aba1321b34e18122ec1825b54e2fc8176a4bd25c
+Subproject commit de12f46ba6305d457b1e248cfeeec89827ec93c9
--- a/other_installers/docker/docker-compose.yml
+++ b/other_installers/docker/docker-compose.yml
@ -248,7 +248,7 @@ services:
    depends_on:
      - redis-log
    entrypoint:
-      - /opt/AIL/bin/Cve.py
+      - /opt/AIL/bin/CveModule.py
    network_mode: service:flask
    image: ail-framework
    volumes:
--- a/var/www/blueprints/objects_item.py
+++ b/var/www/blueprints/objects_item.py
@ -26,6 +26,8 @@ from lib.objects.Screenshots import Screenshot
 from lib import Tag
 from export import Export

+from lib import module_extractor
+

 # ============ BLUEPRINT ============
 objects_item = Blueprint('objects_item', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/item'))
@ -73,10 +75,12 @@ def showItem():  # # TODO: support post
    # # TODO: ADD in Export SECTION
    meta['hive_case'] = Export.get_item_hive_cases(item_id)

+    extracted = module_extractor.extract(item.id, content=meta['content'])
+
    return render_template("show_item.html", bootstrap_label=bootstrap_label,
                            modal_add_tags=Tag.get_modal_add_tags(meta['id'], object_type='item'),
                            is_hive_connected=Export.get_item_hive_cases(item_id),
-                            meta=meta)
+                            meta=meta, extracted=extracted)

    # kvrocks data

--- a/var/www/modules/hashDecoded/Flask_hashDecoded.py
+++ b/var/www/modules/hashDecoded/Flask_hashDecoded.py
@ -16,6 +16,8 @@ sys.path.append(os.environ['AIL_BIN'])
 ##################################
 # Import Project packages
 ##################################
+from lib.objects import ail_objects
+
 from packages.Date import Date

 # ============ VARIABLES ============
@ -167,22 +169,9 @@ def get_all_types_id(correlation_type):
    else:
        return []

-def is_valid_type_id(correlation_type, type_id):
-    all_type_id = get_all_types_id(correlation_type)
-    if type_id in all_type_id:
-        return True
-    else:
-        return False
-
-def get_key_id_metadata(correlation_type, type_id, key_id):
-    key_id_metadata = {}
-    if r_serv_metadata.exists('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id)):
-        key_id_metadata['first_seen'] = r_serv_metadata.hget('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id), 'first_seen')
-        key_id_metadata['first_seen'] = '{}/{}/{}'.format(key_id_metadata['first_seen'][0:4], key_id_metadata['first_seen'][4:6], key_id_metadata['first_seen'][6:8])
-        key_id_metadata['last_seen'] = r_serv_metadata.hget('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id), 'last_seen')
-        key_id_metadata['last_seen'] = '{}/{}/{}'.format(key_id_metadata['last_seen'][0:4], key_id_metadata['last_seen'][4:6], key_id_metadata['last_seen'][6:8])
-        key_id_metadata['nb_seen'] = r_serv_metadata.scard('set_{}_{}:{}'.format(correlation_type, type_id, key_id))
-    return key_id_metadata
+def get_key_id_metadata(obj_type, subtype, obj_id):
+    obj = ail_objects.get_object_meta(obj_type, subtype, obj_id)
+    return obj._get_meta()

 def list_sparkline_type_id_values(date_range_sparkline, correlation_type, type_id, key_id):
    sparklines_value = []
@ -250,7 +239,7 @@ def main_correlation_page(correlation_type, type_id, date_from, date_to, show_de
    if type_id is not None:
        #retrieve char
        type_id = type_id.replace(' ', '')
-        if not is_valid_type_id(correlation_type, type_id):
+        if not ail_objects.is_valid_object_subtype(correlation_type, type_id):
            type_id = None

    date_range = []
@ -897,7 +886,7 @@ def pgpdump_graph_line_json():

 def correlation_graph_line_json(correlation_type, type_id, key_id, date_from, date_to):
    # verify input
-    if key_id is not None and is_valid_type_id(correlation_type, type_id) and r_serv_metadata.exists('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id)):
+    if key_id is not None and ail_objects.is_valid_object_subtype(correlation_type, type_id) and ail_objects.exists_obj(correlation_type, type_id, key_id):

        if date_from is None or date_to is None:
            nb_days_seen_in_pastes = 30
--- a/var/www/templates/objects/item/show_item.html
+++ b/var/www/templates/objects/item/show_item.html
@ -10,6 +10,7 @@
 	<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
  <link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
  <link href="{{ url_for('static', filename='css/tags.css') }}" rel="stylesheet" type="text/css" />
+    <link href="{{ url_for('static', filename='css/ail-project.css') }}" rel="stylesheet">

  <script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
  <script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
@ -256,6 +257,8 @@

        </div>
      </div>
+
+
    </div>
  </div>
  {% endif %}
@ -346,6 +349,13 @@
  {% endif %}


+  {% if extracted %}
+      {% for row in extracted %}
+          <div><a href="#{{ row[0] }}:{{ row[1] }}">{{ row[2] }}</a></div>
+      {% endfor  %}
+  {% endif %}
+
+
  <!-- nav-pills nav-justified nav-tabs-->

  <div class="card">
@ -367,15 +377,21 @@
        </li>
      </ul>

-
      <div class="tab-content" id="pills-tabContent">
        <div class="tab-pane fade show active" id="pills-content" role="tabpanel" aria-labelledby="pills-content-tab">
-          <p class="my-0"> <pre class="border">{{ meta['content'] }}</pre></p>
+            {% if not extracted %}
+                <p class="my-0"> <pre class="border">{{ meta['content'] }}</pre></p>
+            {% else %}
+                <p class="my-0"> <pre class="border">{{ meta['content'][:extracted[0][0]] }}{% for row in extracted %}<span class="hg-text" id="{{ row[0] }}:{{ row[1] }}">{{ meta['content'][row[0]:row[1]] }}</span>{% if loop.index + 1 > extracted|length %}{{ meta['content'][extracted[-1][1]:] }}{% else %}{{ meta['content'][row[1]:extracted[loop.index][0]] }}{% endif %}{% endfor %}</pre></p>
+            {% endif %}
        </div>
        <div class="tab-pane fade" id="pills-html2text" role="tabpanel" aria-labelledby="pills-html2text-tab">
          <p class="my-0"> <pre id="html2text-container" class="border"></pre></p>
        </div>
      </div>
+
+
+
    </div>
  </div>