chg: [DB Migration] UI: Extract + highlight leaks and trackers match, Data Retention save object first/last date, Refactor Tools

This commit is contained in:
Terrtia 2022-12-19 16:38:20 +01:00
parent f9715408be
commit bf71c9ba99
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
26 changed files with 883 additions and 873 deletions

View file

@ -375,6 +375,7 @@ def items_migration():
# item = Items.Item(item_id)
# item.set_father(father_id)
# DUPLICATES
for tag in ['infoleak:automatic-detection="credential"']: # Creditcards, Mail, Keys ???????????????????????????????
print(f'Duplicate migration: {tag}')
tag_first = get_tag_first_seen(tag)
@ -389,6 +390,10 @@ def items_migration():
print(algo, duplicates_dict[id_2][algo], id_2)
item.add_duplicate(algo, duplicates_dict[id_2][algo], id_2)
# ITEM FIRST/LAST DATE
Items._manual_set_items_date_first_last()
# TODO: test cookies migration
# TODO: migrate auto crawlers
@ -840,14 +845,14 @@ if __name__ == '__main__':
#core_migration()
#user_migration()
#tags_migration()
# items_migration()
items_migration()
#crawler_migration()
# domain_migration() # TO TEST ###########################
#decodeds_migration()
# screenshots_migration()
#subtypes_obj_migration()
subtypes_obj_migration()
# ail_2_ail_migration()
trackers_migration()
# trackers_migration()
# investigations_migration()
# statistics_migration()

View file

@ -1,753 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Tools Module
============================
Search tools outpout
"""
from Helper import Process
from pubsublogger import publisher
import os
import re
import sys
import time
import redis
import signal
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Item
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def search_tools(item_id, item_content):
tools_in_item = False
for tools_name in tools_dict:
tool_dict = tools_dict[tools_name]
regex_match = False
for regex_nb in list(range(tool_dict['nb_regex'])):
regex_index = regex_nb + 1
regex = tool_dict['regex{}'.format(regex_index)]
signal.alarm(tool_dict['max_execution_time'])
try:
tools_found = re.findall(regex, item_content)
except TimeoutException:
tools_found = []
p.incr_module_timeout_statistic() # add encoder type
print ("{0} processing timeout".format(item_id))
continue
else:
signal.alarm(0)
if not tools_found:
regex_match = False
break
else:
regex_match = True
if 'tag{}'.format(regex_index) in tool_dict:
print('{} found: {}'.format(item_id, tool_dict['tag{}'.format(regex_index)]))
msg = '{};{}'.format(tool_dict['tag{}'.format(regex_index)], item_id)
p.populate_set_out(msg, 'Tags')
if regex_match:
print('{} found: {}'.format(item_id, tool_dict['name']))
# Tag Item
msg = '{};{}'.format(tool_dict['tag'], item_id)
p.populate_set_out(msg, 'Tags')
if tools_in_item:
# send to duplicate module
p.populate_set_out(item_id, 'Duplicate')
default_max_execution_time = 30
tools_dict = {
'sqlmap': {
'name': 'sqlmap',
'regex1': r'Usage of sqlmap for attacking targets without|all tested parameters do not appear to be injectable|sqlmap identified the following injection point|Title:[^\n]*((error|time|boolean)-based|stacked queries|UNION query)',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="sqlmap-tool"', # tag if all regex match
},
'wig': {
'name': 'wig',
'regex1': r'(?s)wig - WebApp Information Gatherer.+?_{10,}',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="wig-tool"', # tag if all regex match
},
'dmytry': {
'name': 'dmitry',
'regex1': r'(?s)Gathered (TCP Port|Inet-whois|Netcraft|Subdomain|E-Mail) information for.+?-{10,}',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dmitry-tool"', # tag if all regex match
},
'inurlbr': {
'name': 'inurlbr',
'regex1': r'Usage of INURLBR for attacking targets without prior mutual consent is illegal',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="inurlbr-tool"', # tag if all regex match
},
'wafw00f': {
'name': 'wafw00f',
'regex1': r'(?s)WAFW00F - Web Application Firewall Detection Tool.+?Checking',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="wafw00f-tool"', # tag if all regex match
},
'sslyze': {
'name': 'sslyze',
'regex1': r'(?s)PluginSessionRenegotiation.+?SCAN RESULTS FOR',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="sslyze-tool"', # tag if all regex match
},
'nmap': {
'name': 'nmap',
'regex1': r'(?s)Nmap scan report for.+?Host is',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="nmap-tool"', # tag if all regex match
},
'dnsenum': {
'name': 'dnsenum',
'regex1': r'(?s)dnsenum(\.pl)? VERSION:.+?Trying Zone Transfer',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dnsenum-tool"', # tag if all regex match
},
'knock': {
'name': 'knock',
'regex1': r'I scannig with my internal wordlist',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="knock-tool"', # tag if all regex match
},
'nikto': {
'name': 'nikto',
'regex1': r'(?s)\+ Target IP:.+?\+ Start Time:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="nikto-tool"', # tag if all regex match
},
'dnscan': {
'name': 'dnscan',
'regex1': r'(?s)\[\*\] Processing domain.+?\[\+\] Getting nameservers.+?records found',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dnscan-tool"', # tag if all regex match
},
'dnsrecon': {
'name': 'dnsrecon',
'regex1': r'Performing General Enumeration of Domain:|Performing TLD Brute force Enumeration against',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dnsrecon-tool"', # tag if all regex match
},
'striker': {
'name': 'striker',
'regex1': r'Crawling the target for fuzzable URLs|Honeypot Probabilty:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="striker-tool"', # tag if all regex match
},
'rhawk': {
'name': 'rhawk',
'regex1': r'S U B - D O M A I N F I N D E R',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="rhawk-tool"', # tag if all regex match
},
'uniscan': {
'name': 'uniscan',
'regex1': r'\| \[\+\] E-mail Found:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="uniscan-tool"', # tag if all regex match
},
'masscan': {
'name': 'masscan',
'regex1': r'(?s)Starting masscan [\d.]+.+?Scanning|bit.ly/14GZzcT',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="masscan-tool"', # tag if all regex match
},
'msfconsole': {
'name': 'msfconsole',
'regex1': r'=\[ metasploit v[\d.]+.+?msf >',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="msfconsole-tool"', # tag if all regex match
},
'amap': {
'name': 'amap',
'regex1': r'\bamap v[\d.]+ \(www.thc.org/thc-amap\)',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="amap-tool"', # tag if all regex match
},
'automater': {
'name': 'automater',
'regex1': r'(?s)\[\*\] Checking.+?_+ Results found for:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="automater-tool"', # tag if all regex match
},
'braa': {
'name': 'braa',
'regex1': r'\bbraa public@[\d.]+',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="braa-tool"', # tag if all regex match
},
'ciscotorch': {
'name': 'ciscotorch',
'regex1': r'Becase we need it',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="ciscotorch-tool"', # tag if all regex match
},
'theharvester': {
'name': 'theharvester',
'regex1': r'Starting harvesting process for domain:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="theharvester-tool"', # tag if all regex match
},
'sslstrip': {
'name': 'sslstrip',
'regex1': r'sslstrip [\d.]+ by Moxie Marlinspike running',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="sslstrip-tool"', # tag if all regex match
},
'sslcaudit': {
'name': 'sslcaudit',
'regex1': r'# filebag location:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="sslcaudit-tool"', # tag if all regex match
},
'smbmap': {
'name': 'smbmap',
'regex1': r'\[\+\] Finding open SMB ports\.\.\.',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="smbmap-tool"', # tag if all regex match
},
'reconng': {
'name': 'reconng',
'regex1': r'\[\*\] Status: unfixed|\[recon-ng\]\[default\]',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="reconng-tool"', # tag if all regex match
},
'p0f': {
'name': 'p0f',
'regex1': r'\bp0f [^ ]+ by Michal Zalewski',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="p0f-tool"', # tag if all regex match
},
'hping3': {
'name': 'hping3',
'regex1': r'\bHPING [^ ]+ \([^)]+\): [^ ]+ mode set',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="hping3-tool"', # tag if all regex match
},
'enum4linux': {
'name': 'enum4linux',
'regex1': r'Starting enum4linux v[\d.]+|\| Target Information \|',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="enum4linux-tool"', # tag if all regex match
},
'dnstracer': {
'name': 'dnstracer',
'regex1': r'(?s)Tracing to.+?DNS HEADER \(send\)',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dnstracer-tool"', # tag if all regex match
},
'dnmap': {
'name': 'dnmap',
'regex1': r'dnmap_(client|server)|Nmap output files stored in \'nmap_output\' directory',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dnmap-tool"', # tag if all regex match
},
'arpscan': {
'name': 'arpscan',
'regex1': r'Starting arp-scan [^ ]+ with \d+ hosts',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="arpscan-tool"', # tag if all regex match
},
'cdpsnarf': {
'name': 'cdpsnarf',
'regex1': r'(?s)CDPSnarf v[^ ]+.+?Waiting for a CDP packet\.\.\.',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="cdpsnarf-tool"', # tag if all regex match
},
'dnsmap': {
'name': 'dnsmap',
'regex1': r'DNS Network Mapper by pagvac',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dnsmap-tool"', # tag if all regex match
},
'dotdotpwn': {
'name': 'dotdotpwn',
'regex1': r'DotDotPwn v[^ ]+|dotdotpwn@sectester.net|\[\+\] Creating Traversal patterns',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dotdotpwn-tool"', # tag if all regex match
},
'searchsploit': {
'name': 'searchsploit',
'regex1': r'(exploits|shellcodes)/|searchsploit_rc|Exploit Title',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="searchsploit-tool"', # tag if all regex match
},
'fierce': {
'name': 'fierce',
'regex1': r'(?s)Trying zone transfer first.+Checking for wildcard DNS',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="fierce-tool"', # tag if all regex match
},
'firewalk': {
'name': 'firewalk',
'regex1': r'Firewalk state initialization completed successfully|Ramping phase source port',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="firewalk-tool"', # tag if all regex match
},
'fragroute': {
'name': 'fragroute',
'regex1': r'\bfragroute: tcp_seg -> ip_frag',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="fragroute-tool"', # tag if all regex match
},
'fragrouter': {
'name': 'fragrouter',
'regex1': r'fragrouter: frag-\d+:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="fragrouter-tool"', # tag if all regex match
},
'goofile': {
'name': 'goofile',
'regex1': r'code.google.com/p/goofile\b',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="goofile-tool"', # tag if all regex match
},
'intrace': {
'name': 'intrace',
'regex1': r'\bInTrace [\d.]+ \-\-',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="intrace-tool"', # tag if all regex match
},
'ismtp': {
'name': 'ismtp',
'regex1': r'Testing SMTP server \[user enumeration\]',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="ismtp-tool"', # tag if all regex match
},
'lbd': {
'name': 'lbd',
'regex1': r'Checking for (DNS|HTTP)-Loadbalancing',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="lbd-tool"', # tag if all regex match
},
'miranda': {
'name': 'miranda',
'regex1': r'Entering discovery mode for \'upnp:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="miranda-tool"', # tag if all regex match
},
'ncat': {
'name': 'ncat',
'regex1': r'nmap.org/ncat',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="ncat-tool"', # tag if all regex match
},
'ohrwurm': {
'name': 'ohrwurm',
'regex1': r'\bohrwurm-[\d.]+',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="ohrwurm-tool"', # tag if all regex match
},
'oscanner': {
'name': 'oscanner',
'regex1': r'Loading services/sids from service file',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="oscanner-tool"', # tag if all regex match
},
'sfuzz': {
'name': 'sfuzz',
'regex1': r'AREALLYBADSTRING|sfuzz/sfuzz',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="sfuzz-tool"', # tag if all regex match
},
'sidguess': {
'name': 'sidguess',
'regex1': r'SIDGuesser v[\d.]+',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="sidguess-tool"', # tag if all regex match
},
'sqlninja': {
'name': 'sqlninja',
'regex1': r'Sqlninja rel\. [\d.]+',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="sqlninja-tool"', # tag if all regex match
},
'sqlsus': {
'name': 'sqlsus',
'regex1': r'sqlsus version [\d.]+',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="sqlsus-tool"', # tag if all regex match
},
'dnsdict6': {
'name': 'dnsdict6',
'regex1': r'Starting DNS enumeration work on',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dnsdict6-tool"', # tag if all regex match
},
'unixprivesccheck': {
'name': 'unixprivesccheck',
'regex1': r'Recording Interface IP addresses',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="unixprivesccheck-tool"', # tag if all regex match
},
'yersinia': {
'name': 'yersinia',
'regex1': r'yersinia@yersinia.net',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="yersinia-tool"', # tag if all regex match
},
'armitage': {
'name': 'armitage',
'regex1': r'\[\*\] Starting msfrpcd for you',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="armitage-tool"', # tag if all regex match
},
'backdoorfactory': {
'name': 'backdoorfactory',
'regex1': r'\[\*\] In the backdoor module',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="backdoorfactory-tool"', # tag if all regex match
},
'beef': {
'name': 'beef',
'regex1': r'Please wait as BeEF services are started',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="beef-tool"', # tag if all regex match
},
'cat': {
'name': 'cat',
'regex1': r'Cisco Auditing Tool.+?g0ne',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="cat-tool"', # tag if all regex match
},
'cge': {
'name': 'cge',
'regex1': r'Vulnerability successful exploited with \[',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="cge-tool"', # tag if all regex match
},
'john': {
'name': 'john',
'regex1': r'John the Ripper password cracker, ver:|Loaded \d+ password hash \(',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="john-tool"', # tag if all regex match
},
'keimpx': {
'name': 'keimpx',
'regex1': r'\bkeimpx [\d.]+',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="keimpx-tool"', # tag if all regex match
},
'maskprocessor': {
'name': 'maskprocessor',
'regex1': r'mp by atom, High-Performance word generator',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="maskprocessor-tool"', # tag if all regex match
},
'ncrack': {
'name': 'ncrack',
'regex1': r'Starting Ncrack[^\n]+http://ncrack.org',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="ncrack-tool"', # tag if all regex match
},
'patator': {
'name': 'patator',
'regex1': r'http://code.google.com/p/patator/|Starting Patator v',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="patator-tool"', # tag if all regex match
},
'phrasendrescher': {
'name': 'phrasendrescher',
'regex1': r'phrasen\|drescher [\d.]+',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="phrasendrescher-tool"', # tag if all regex match
},
'polenum': {
'name': 'polenum',
'regex1': r'\[\+\] Password Complexity Flags:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="polenum-tool"', # tag if all regex match
},
'rainbowcrack': {
'name': 'rainbowcrack',
'regex1': r'Official Website: http://project-rainbowcrack.com/',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="rainbowcrack-tool"', # tag if all regex match
},
'rcracki_mt': {
'name': 'rcracki_mt',
'regex1': r'Found \d+ rainbowtable files\.\.\.',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="rcracki_mt-tool"', # tag if all regex match
},
'tcpdump': {
'name': 'tcpdump',
'regex1': r'tcpdump: listening on.+capture size \d+|\d+ packets received by filter',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="tcpdump-tool"', # tag if all regex match
},
'hydra': {
'name': 'hydra',
'regex1': r'Hydra \(http://www.thc.org/thc-hydra\)',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="hydra-tool"', # tag if all regex match
},
'netcat': {
'name': 'netcat',
'regex1': r'Listening on \[[\d.]+\] \(family',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="netcat-tool"', # tag if all regex match
},
'nslookup': {
'name': 'nslookup',
'regex1': r'Non-authoritative answer:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="nslookup-tool"', # tag if all regex match
},
'dig': {
'name': 'dig',
'regex1': r'; <<>> DiG [\d.]+',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dig-tool"', # tag if all regex match
},
'whois': {
'name': 'whois',
'regex1': r'(?i)Registrar WHOIS Server:|Registrar URL: http://|DNSSEC: unsigned|information on Whois status codes|REGISTERED, DELEGATED|[Rr]egistrar:|%[^\n]+(WHOIS|2016/679)',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="whois-tool"', # tag if all regex match
},
'nessus': {
'name': 'nessus',
'regex1': r'nessus_(report_(get|list|exploits)|scan_(new|status))|nessuscli|nessusd|nessus-service',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="nessus-tool"', # tag if all regex match
},
'openvas': {
'name': 'openvas',
'regex1': r'/openvas/',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="openvas-tool"', # tag if all regex match
},
'golismero': {
'name': 'golismero',
'regex1': r'GoLismero[\n]+The Web Knife',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="golismero-tool"', # tag if all regex match
},
'wpscan': {
'name': 'wpscan',
'regex1': r'WordPress Security Scanner by the WPScan Team|\[\+\] Interesting header:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="wpscan-tool"', # tag if all regex match
},
'skipfish': {
'name': 'skipfish',
'regex1': r'\[\+\] Sorting and annotating crawl nodes:|skipfish version [\d.]+',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="skipfish-tool"', # tag if all regex match
},
'arachni': {
'name': 'arachni',
'regex1': r'With the support of the community and the Arachni Team|\[\*\] Waiting for plugins to settle\.\.\.',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="arachni-tool"', # tag if all regex match
},
'dirb': {
'name': 'dirb',
'regex1': r'==> DIRECTORY:|\bDIRB v[\d.]+',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dirb-tool"', # tag if all regex match
},
'joomscan': {
'name': 'joomscan',
'regex1': r'OWASP Joomla! Vulnerability Scanner v[\d.]+',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="joomscan-tool"', # tag if all regex match
},
'jbossautopwn': {
'name': 'jbossautopwn',
'regex1': r'\[x\] Now creating BSH script\.\.\.|\[x\] Now deploying \.war file:',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="jbossautopwn-tool"', # tag if all regex match
},
'grabber': {
'name': 'grabber',
'regex1': r'runSpiderScan @',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="grabber-tool"', # tag if all regex match
},
'fimap': {
'name': 'fimap',
'regex1': r'Automatic LFI/RFI scanner and exploiter',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="fimap-tool"', # tag if all regex match
},
'dsxs': {
'name': 'dsxs',
'regex1': r'Damn Small XSS Scanner \(DSXS\)',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dsxs-tool"', # tag if all regex match
},
'dsss': {
'name': 'dsss',
'regex1': r'Damn Small SQLi Scanner \(DSSS\)',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dsss-tool"', # tag if all regex match
},
'dsjs': {
'name': 'dsjs',
'regex1': r'Damn Small JS Scanner \(DSJS\)',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dsjs-tool"', # tag if all regex match
},
'dsfs': {
'name': 'dsfs',
'regex1': r'Damn Small FI Scanner \(DSFS\)',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="dsfs-tool"', # tag if all regex match
},
'identywaf': {
'name': 'identywaf',
'regex1': r'\[o\] initializing handlers\.\.\.',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="identywaf-tool"', # tag if all regex match
},
'whatwaf': {
'name': 'whatwaf',
'regex1': r'<sCRIPT>ALeRt.+?WhatWaf\?',
'nb_regex': 1,
'max_execution_time': default_max_execution_time,
'tag': 'infoleak:automatic-detection="whatwaf-tool"', # tag if all regex match
}
}
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Tools'
# # TODO: add duplicate
# Setup the I/O queues
p = Process(config_section)
# Sent to the logging a description of the module
publisher.info("Run Tools module ")
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
item_id = p.get_from_set()
if item_id is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
# Do something with the message from the queue
item_content = Item.get_item_content(item_id)
search_tools(item_id, item_content)

View file

@ -295,10 +295,10 @@ def get_item_all_trackers_uuid(obj_id):
#obj_type = 'item'
return r_serv_tracker.smembers(f'obj:trackers:item:{obj_id}')
def is_obj_tracked(obj_type, subtype, id):
def is_obj_tracked(obj_type, subtype, obj_id):
return r_serv_tracker.exists(f'obj:trackers:{obj_type}:{obj_id}')
def get_obj_all_trackers(obj_type, subtype, id):
def get_obj_all_trackers(obj_type, subtype, obj_id):
return r_serv_tracker.smembers(f'obj:trackers:{obj_type}:{obj_id}')
# # TODO: ADD all Objects + Subtypes
@ -664,6 +664,10 @@ def get_all_tracked_yara_files(filter_disabled=False):
pass
return yara_files
def get_yara_rule_by_uuid(tracker_uuid):
yar_path = get_tracker_by_uuid(tracker_uuid)
return yara.compile(filepath=os.path.join(get_yara_rules_dir(), yar_path))
def reload_yara_rules():
yara_files = get_all_tracked_yara_files()
# {uuid: filename}

View file

@ -11,46 +11,85 @@ sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_db_conn("Kvrocks_Objects")
r_obj = config_loader.get_db_conn("Kvrocks_Objects")
r_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None
def get_first_object_date(object_type, subtype, field=''):
first_date = r_serv_db.zscore('objs:first_date', f'{object_type}:{subtype}:{field}')
if not first_date:
first_date = 99999999
return int(first_date)
# TODO HOW TO HANDLE SCREENSHOTS ????
# SCREENSHOT ID -> MEMBER OF ITEMS -> DATES
# META SCREENSHOT -> NB DOMAINS + FIRST/LAST SEEN ???
def get_last_object_date(object_type, subtype, field=''):
last_date = r_serv_db.zscore('objs:last_date', f'{object_type}:{subtype}:{field}')
if not last_date:
last_date = 0
return int(last_date)
def _set_first_object_date(object_type, subtype, date, field=''):
return r_serv_db.zadd('objs:first_date', {f'{object_type}:{subtype}:{field}': date})
def _set_last_object_date(object_type, subtype, date, field=''):
return r_serv_db.zadd('objs:last_date', {f'{object_type}:{subtype}:{field}': float(date)})
def update_first_object_date(object_type, subtype, date, field=''):
first_date = get_first_object_date(object_type, subtype, field=field)
if int(date) < first_date:
_set_first_object_date(object_type, subtype, date, field=field)
return date
else:
return first_date
def update_last_object_date(object_type, subtype, date, field=''):
last_date = get_last_object_date(object_type, subtype, field=field)
if int(date) > last_date:
_set_last_object_date(object_type, subtype, date, field=field)
return date
else:
return last_date
def update_object_date(object_type, subtype, date, field=''):
update_first_object_date(object_type, subtype, date, field=field)
update_last_object_date(object_type, subtype, date, field=field)
# TAG /!\ DIFF TAG CREDENTIAL ITEM != DOMAIN:CREDENTIAL
# -> IN OBJECT TYPE ?????
# OR SPECIAL FIRST SEEN / LAST SEEN IN TAG LIB
# DOMAIN -> subtype = domain type
# TAG -> type = "TAG"
# TAG -> subtype = "OBJ:"tag
def load_obj_date_first_last():
# LOAD FIRST DATE
dates = r_obj.hgetall(f'date:first')
for str_row in dates:
obj_type, subtype = str_row.split(':', 1)
date = dates[str_row]
_set_obj_date_first(date, obj_type, subtype=subtype)
# LOAD LAST DATE
dates = r_obj.hgetall(f'date:last')
for str_row in dates:
obj_type, subtype = str_row.split(':', 1)
date = dates[str_row]
_set_obj_date_last(date, obj_type, subtype=subtype)
# MAKE IT WORK WITH TAGS
def get_obj_date_first(obj_type, subtype='', r_int=False):
first = r_cache.hget(f'date:first', f'{obj_type}:{subtype}')
if not first:
first = r_obj.hget(f'date:first', f'{obj_type}:{subtype}')
if r_int:
if not first:
return 0
else:
return int(first)
return first
def get_obj_date_last(obj_type, subtype='', r_int=False):
last = r_cache.hget(f'date:last', f'{obj_type}:{subtype}')
if not last:
last = r_obj.hget(f'date:last', f'{obj_type}:{subtype}')
if r_int:
if not last:
return 99999999
else:
return int(last)
return last
# FIRST
def _set_obj_date_first(date, obj_type, subtype=''):
r_cache.hset(f'date:first', f'{obj_type}:{subtype}', date)
def set_obj_date_first(date, obj_type, subtype=''):
_set_obj_date_first(date, obj_type, subtype=subtype)
r_obj.hset(f'date:first', f'{obj_type}:{subtype}', date)
# LAST
def _set_obj_date_last(date, obj_type, subtype=''):
r_cache.hset(f'date:last', f'{obj_type}:{subtype}', date)
def set_obj_date_last(date, obj_type, subtype=''):
_set_obj_date_last(date, obj_type, subtype=subtype)
r_obj.hset(f'date:last', f'{obj_type}:{subtype}', date)
def update_obj_date(date, obj_type, subtype=''):
date = int(date)
first = get_obj_date_first(obj_type, subtype=subtype, r_int=True)
last = get_obj_date_last(obj_type, subtype=subtype, r_int=True)
if date < first:
set_obj_date_first(date, obj_type, subtype=subtype)
if date > last:
set_obj_date_last(date, obj_type, subtype=subtype)
###############################################################

148
bin/lib/module_extractor.py Executable file
View file

@ -0,0 +1,148 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import yara
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
import lib.objects.ail_objects
from lib.objects.Items import Item
from lib import correlations_engine
from lib import regex_helper
from lib.ConfigLoader import ConfigLoader
from lib import Tracker
from modules.CreditCards import CreditCards
from modules.Iban import Iban
from modules.Mail import Mail
from modules.Onion import Onion
from modules.Tools import Tools
creditCards = CreditCards()
ibans = Iban()
mails = Mail()
onions = Onion()
tools = Tools()
config_loader = ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None
r_key = regex_helper.generate_redis_cache_key('extractor')
MODULES = {
'infoleak:automatic-detection="credit-card"': creditCards,
'infoleak:automatic-detection="iban"': ibans,
'infoleak:automatic-detection="mail"': mails,
'infoleak:automatic-detection="onion"': onions,
# APIkey ???
# Credentials
# Zerobins
# CERTIFICATE + KEYS ???
# SQL Injetction / Libinjection ???
}
for tool_name in tools.get_tools():
MODULES[f'infoleak:automatic-detection="{tool_name}-tool"'] = tools
def get_correl_match(extract_type, obj_id, content, filter_subtypes=['']):
correl = correlations_engine.get_correlation_by_correl_type('item', '', obj_id, extract_type)
to_extract = []
for c in correl:
subtype, value = c.split(':', 1)
# if subtype in filter_subtypes:
to_extract.append(value)
if to_extract:
return regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
else:
return []
def _get_yara_match(data):
for row in data.get('strings'):
start, i, value = row
value = value.decode()
end = start + len(value)
r_cache.sadd(f'extractor:yara:match:{r_key}', f'{start}:{end}:{value}')
r_cache.expire(f'extractor:yara:match:{r_key}', 300)
return yara.CALLBACK_CONTINUE
# TODO RETRO HUNTS
def get_tracker_match(obj_id, content):
trackers = Tracker.get_obj_all_trackers('item', '', obj_id)
for tracker_uuid in trackers:
tracker_type = Tracker.get_tracker_type(tracker_uuid)
tracker = Tracker.get_tracker_by_uuid(tracker_uuid)
if tracker_type == 'regex':
return regex_helper.regex_finditer(r_key, tracker, obj_id, content)
elif tracker_type == 'yara':
rule = Tracker.get_yara_rule_by_uuid(tracker_uuid)
rule.match(data=content, callback=_get_yara_match,
which_callbacks=yara.CALLBACK_MATCHES, timeout=30)
yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}')
r_cache.delete(f'extractor:yara:match:{r_key}')
extracted = []
for match in yara_match:
start, end, value = match.split(':', 2)
extracted.append((int(start), int(end), value))
return extracted
# elif tracker_type == 'term': # TODO
#
# elif tracker_type == '':
return []
def extract(obj_id, content=None):
item = Item(obj_id)
if not content:
content = item.get_content()
extracted = []
extracted = extracted + get_tracker_match(obj_id, content)
# print(item.get_tags())
for tag in item.get_tags():
if MODULES.get(tag):
# print(tag)
module = MODULES.get(tag)
matches = module.extract(obj_id, content, tag)
if matches:
extracted = extracted + matches
for obj_t in ['cve', 'cryptocurrency', 'username']: # Decoded, PGP->extract bloc
matches = get_correl_match(obj_t, obj_id, content)
if matches:
extracted = extracted + matches
from operator import itemgetter
extracted = sorted(extracted, key=itemgetter(0))
print(extracted)
return extracted
if __name__ == '__main__':
t0 = time.time()
obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
# obj_id = 'tests/2021/01/01/credit_cards.gz'
# obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
extract(obj_id)
# get_obj_correl('cve', obj_id, content)
# r = get_tracker_match(obj_id, content)
# print(r)
print(time.time() - t0)

View file

@ -17,7 +17,7 @@ from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
config_loader = ConfigLoader()
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'

View file

@ -21,7 +21,7 @@ from lib.objects.abstract_object import AbstractObject
from lib.ail_core import paginate_iterator
from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_domain, get_item_har
from lib import data_retention_engine
from lib.data_retention_engine import update_obj_date
from packages import Date
@ -426,7 +426,7 @@ class Domain(AbstractObject):
except (ValueError, TypeError):
status = True
data_retention_engine.update_object_date('domain', self.domain_type, date)
update_obj_date(date, 'domain', self.domain_type)
# UP
if status:
r_crawler.srem(f'full_{self.domain_type}_down', self.id)

View file

@ -22,6 +22,7 @@ from lib.ail_core import get_ail_uuid
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib import item_basic
from lib.data_retention_engine import update_obj_date
from flask import url_for
@ -245,7 +246,7 @@ class Item(AbstractObject):
return None
def get_url(self):
return r_object.hset(f'meta:item::{self.id}', 'url')
return r_object.hget(f'meta:item::{self.id}', 'url')
def set_crawled(self, url, parent_id):
r_object.hset(f'meta:item::{self.id}', 'url', url)
@ -375,6 +376,24 @@ def get_items_by_source(source):
l_items.append(item_id)
return l_items
def _manual_set_items_date_first_last():
first = 9999
last = 0
sources = get_items_sources()
for source in sources:
dir_source = os.path.join(os.environ['AIL_HOME'], ITEMS_FOLDER, source)
for dir_name in os.listdir(dir_source):
if os.path.isdir(os.path.join(dir_source, dir_name)):
date = int(dir_name)
if date < first:
first = date
if date > last:
last = date
if first != 9999:
update_obj_date(first, 'item')
if last != 0:
update_obj_date(last, 'item')
################################################################################
################################################################################
################################################################################

View file

@ -15,6 +15,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_object import AbstractObject
# from lib import data_retention_engine
config_loader = ConfigLoader()
r_serv_metadata = config_loader.get_db_conn("Kvrocks_Objects")

View file

@ -19,6 +19,7 @@ sys.path.append(os.environ['AIL_BIN'])
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib.item_basic import is_crawled, get_item_domain
from lib.data_retention_engine import update_obj_date
from packages import Date
@ -41,10 +42,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
super().__init__(obj_type, id)
def exists(self):
return r_object.exists(f'{self.type}:meta:{self.id}')
return r_object.exists(f'meta:{self.type}:{self.id}')
def get_first_seen(self, r_int=False):
first_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'first_seen')
first_seen = r_object.hget(f'meta:{self.type}:{self.id}', 'first_seen')
if r_int:
if first_seen:
return int(first_seen)
@ -54,7 +55,7 @@ class AbstractDaterangeObject(AbstractObject, ABC):
return first_seen
def get_last_seen(self, r_int=False):
last_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'last_seen')
last_seen = r_object.hget(f'meta:{self.type}:{self.id}', 'last_seen')
if r_int:
if last_seen:
return int(last_seen)
@ -64,7 +65,7 @@ class AbstractDaterangeObject(AbstractObject, ABC):
return last_seen
def get_nb_seen(self):
return r_object.hget(f'{self.type}:meta:{self.id}', 'nb')
return r_object.hget(f'meta:{self.type}:{self.id}', 'nb')
def get_nb_seen_by_date(self, date):
nb = r_object.hget(f'{self.type}:date:{date}', self.id)
@ -82,10 +83,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
return meta_dict
def set_first_seen(self, first_seen):
r_object.hset(f'{self.type}:meta:{self.id}', 'first_seen', first_seen)
r_object.hset(f'meta:{self.type}:{self.id}', 'first_seen', first_seen)
def set_last_seen(self, last_seen):
r_object.hset(f'{self.type}:meta:{self.id}', 'last_seen', last_seen)
r_object.hset(f'meta:{self.type}:{self.id}', 'last_seen', last_seen)
def update_daterange(self, date):
date = int(date)
@ -114,12 +115,13 @@ class AbstractDaterangeObject(AbstractObject, ABC):
r_object.sadd(f'{self.type}:all', self.id)
else:
self.update_daterange(date)
update_obj_date(date, self.type)
# NB Object seen by day
r_object.hincrby(f'{self.type}:date:{date}', self.id, 1)
r_object.zincrby(f'{self.type}:date:{date}', 1, self.id) # # # # # # # # # #
# NB Object seen
r_object.hincrby(f'{self.type}:meta:{self.id}', 'nb', 1)
r_object.hincrby(f'meta:{self.type}:{self.id}', 'nb', 1)
# Correlations
self.add_correlation('item', '', item_id)

View file

@ -19,12 +19,14 @@ sys.path.append(os.environ['AIL_BIN'])
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib.item_basic import is_crawled, get_item_domain
from lib.data_retention_engine import update_obj_date
from packages import Date
# LOAD CONFIG
config_loader = ConfigLoader()
r_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
config_loader = None
# # TODO: ADD CORRELATION ENGINE
@ -47,7 +49,7 @@ class AbstractSubtypeObject(AbstractObject):
self.subtype = subtype
def exists(self):
return r_metadata.exists(f'{self.type}_metadata_{self.subtype}:{self.id}')
return r_object.exists(f'meta:{self.type}:{self.subtype}:{self.id}')
# def exists(self):
# res = r_metadata.zscore(f'{self.type}_all:{self.subtype}', self.id)
@ -57,7 +59,7 @@ class AbstractSubtypeObject(AbstractObject):
# return False
def get_first_seen(self, r_int=False):
first_seen = r_metadata.hget(f'{self.type}_metadata_{self.subtype}:{self.id}', 'first_seen')
first_seen = r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'first_seen')
if r_int:
if first_seen:
return int(first_seen)
@ -67,7 +69,7 @@ class AbstractSubtypeObject(AbstractObject):
return first_seen
def get_last_seen(self, r_int=False):
last_seen = r_metadata.hget(f'{self.type}_metadata_{self.subtype}:{self.id}', 'last_seen')
last_seen = r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'last_seen')
if r_int:
if last_seen:
return int(last_seen)
@ -94,10 +96,10 @@ class AbstractSubtypeObject(AbstractObject):
return meta_dict
def set_first_seen(self, first_seen):
r_metadata.hset(f'{self.type}_metadata_{self.subtype}:{self.id}', 'first_seen', first_seen)
r_object.hset(f'meta:{self.type}:{self.subtype}:{self.id}', 'first_seen', first_seen)
def set_last_seen(self, last_seen):
r_metadata.hset(f'{self.type}_metadata_{self.subtype}:{self.id}', 'last_seen', last_seen)
r_object.hset(f'meta:{self.type}:{self.subtype}:{self.id}', 'last_seen', last_seen)
def update_daterange(self, date):
date = int(date)
@ -124,12 +126,13 @@ class AbstractSubtypeObject(AbstractObject):
# NEW field => first record(last record)
# by subtype ??????
# => data Retention + efficicent search
# => data Retention + efficient search
#
#
def add(self, date, item_id):
self.update_daterange(date)
update_obj_date(date, self.type, self.subtype)
# daily
r_metadata.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
# all subtypes

View file

@ -71,7 +71,7 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
if r_set:
all_items = r_serv_cache.smembers(redis_key)
else:
all_items = r_serv_cache.lrange(redis_key, 0 ,-1)
all_items = r_serv_cache.lrange(redis_key, 0, -1)
r_serv_cache.delete(redis_key)
proc.terminate()
return all_items
@ -80,29 +80,66 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
proc.terminate()
sys.exit(0)
def _regex_search(redis_key, regex, item_content):
first_occ = regex.search(item_content)
if first_occ:
r_serv_cache.set(redis_key, first_occ)
def _regex_finditer(r_key, regex, content):
iterator = re.finditer(regex, content)
for match in iterator:
value = match.group()
start = match.start()
end = match.end()
r_serv_cache.rpush(r_key, f'{start}:{end}:{value}')
r_serv_cache.expire(r_key, 360)
def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30):
proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, ))
def regex_finditer(r_key, regex, item_id, content, max_time=30):
proc = Proc(target=_regex_finditer, args=(r_key, regex, content))
try:
proc.start()
proc.join(max_time)
if proc.is_alive():
proc.terminate()
Statistics.incr_module_timeout_statistic(module_name)
err_mess = f"{module_name}: processing timeout: {item_id}"
Statistics.incr_module_timeout_statistic(r_key)
err_mess = f"{r_key}: processing timeout: {item_id}"
print(err_mess)
publisher.info(err_mess)
return None
return []
else:
first_occ = r_serv_cache.get(redis_key)
r_serv_cache.delete(redis_key)
res = r_serv_cache.lrange(r_key, 0, -1)
r_serv_cache.delete(r_key)
proc.terminate()
return first_occ
all_match = []
for match in res:
start, end, value = match.split(':', 2)
all_match.append((int(start), int(end), value))
return all_match
except KeyboardInterrupt:
print("Caught KeyboardInterrupt, terminating workers")
print("Caught KeyboardInterrupt, terminating regex worker")
proc.terminate()
sys.exit(0)
def _regex_search(r_key, regex, content):
if re.search(regex, content):
r_serv_cache.set(r_key, 1)
r_serv_cache.expire(r_key, 360)
def regex_search(r_key, regex, item_id, content, max_time=30):
proc = Proc(target=_regex_search, args=(r_key, regex, content))
try:
proc.start()
proc.join(max_time)
if proc.is_alive():
proc.terminate()
Statistics.incr_module_timeout_statistic(r_key)
err_mess = f"{r_key}: processing timeout: {item_id}"
print(err_mess)
publisher.info(err_mess)
return False
else:
if r_serv_cache.exists(r_key):
r_serv_cache.delete(r_key)
return True
else:
r_serv_cache.delete(r_key)
return False
except KeyboardInterrupt:
print("Caught KeyboardInterrupt, terminating regex worker")
proc.terminate()
sys.exit(0)

View file

@ -45,41 +45,57 @@ class CreditCards(AbstractModule):
]
self.regex = re.compile('|'.join(cards))
self.re_clean_card = r'[^0-9]'
# Waiting time in secondes between to message proccessed
# Waiting time in seconds between to message processed
self.pending_seconds = 10
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def get_valid_card(self, card):
clean_card = re.sub(self.re_clean_card, '', card)
if lib_refine.is_luhn_valid(clean_card):
return clean_card
def extract(self, obj_id, content, tag):
extracted = []
cards = self.regex_finditer(self.regex, obj_id, content)
for card in cards:
start, end, value = card
if self.get_valid_card(value):
extracted.append(card)
return extracted
def compute(self, message, r_result=False):
item_id, score = message.split()
item = Item(item_id)
content = item.get_content()
all_cards = re.findall(self.regex, content)
all_cards = self.regex_findall(self.regex, item.id, content)
if len(all_cards) > 0:
# self.redis_logger.debug(f'All matching {all_cards}')
creditcard_set = set([])
creditcard_set = set()
for card in all_cards:
clean_card = re.sub('[^0-9]', '', card)
if lib_refine.is_luhn_valid(clean_card):
self.redis_logger.debug(f'{clean_card} is valid')
creditcard_set.add(clean_card)
print(card)
valid_card = self.get_valid_card(card)
if valid_card:
creditcard_set.add(valid_card)
# pprint.pprint(creditcard_set)
# print(creditcard_set)
to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
if len(creditcard_set) > 0:
self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}')
if creditcard_set:
mess = f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.id}'
print(mess)
self.redis_logger.warning(mess)
msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}'
msg = f'infoleak:automatic-detection="credit-card";{item.id}'
self.send_message_to_queue(msg, 'Tags')
if r_result:
return creditcard_set
else:
self.redis_logger.info(f'{to_print}CreditCard related;{item.get_id()}')
self.redis_logger.info(f'{to_print}CreditCard related;{item.id}')
if __name__ == '__main__':

View file

@ -26,13 +26,13 @@ from lib.objects import Cves
from lib.objects.Items import Item
class Cve(AbstractModule):
class CveModule(AbstractModule):
"""
Cve module for AIL framework
CveModule for AIL framework
"""
def __init__(self):
super(Cve, self).__init__()
super(CveModule, self).__init__()
# regex to find CVE
self.reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,5}')
@ -68,6 +68,6 @@ class Cve(AbstractModule):
if __name__ == '__main__':
module = Cve()
module = CveModule()
# module.run()
module.compute('crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd 9')

View file

@ -41,7 +41,8 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from lib.data_retention_engine import update_obj_date
from lib import item_basic
class Global(AbstractModule):
"""
@ -85,7 +86,7 @@ class Global(AbstractModule):
if len(splitted) == 2:
item, gzip64encoded = splitted
# Remove PASTES_FOLDER from item path (crawled item + submited)
# Remove PASTES_FOLDER from item path (crawled item + submitted)
if self.PASTES_FOLDERS in item:
item = item.replace(self.PASTES_FOLDERS, '', 1)
@ -125,6 +126,8 @@ class Global(AbstractModule):
if self.PASTES_FOLDERS in item_id:
item_id = item_id.replace(self.PASTES_FOLDERS, '', 1)
update_obj_date(item_basic.get_item_date(item_id), 'item')
self.send_message_to_queue(item_id)
self.processed_item += 1
print(item_id)

View file

@ -62,6 +62,17 @@ class Iban(AbstractModule):
return True
return False
def extract(self, obj_id, content, tag):
extracted = []
ibans = self.regex_finditer(self.iban_regex, obj_id, content)
for iban in ibans:
start, end, value = iban
value = ''.join(e for e in value if e.isalnum())
if self.is_valid_iban(value):
print(value)
extracted.append(iban)
return extracted
def compute(self, message):
item = Item(message)
item_id = item.get_id()

View file

@ -57,7 +57,7 @@ class Mail(AbstractModule):
return self.r_cache.exists(f'mxdomain:{mxdomain}')
def save_mxdomain_in_cache(self, mxdomain):
self.r_cache.setex(f'mxdomain:{mxdomain}', 1, datetime.timedelta(days=1))
self.r_cache.setex(f'mxdomain:{mxdomain}', datetime.timedelta(days=1), 1)
def check_mx_record(self, set_mxdomains):
"""Check if emails MX domains are responding.
@ -118,6 +118,21 @@ class Mail(AbstractModule):
print(e)
return valid_mxdomain
def extract(self, obj_id, content, tag):
extracted = []
mxdomains = {}
mails = self.regex_finditer(self.email_regex, obj_id, content)
for mail in mails:
start, end, value = mail
mxdomain = value.rsplit('@', 1)[1].lower()
if mxdomain not in mxdomains:
mxdomains[mxdomain] = []
mxdomains[mxdomain].append(mail)
for mx in self.check_mx_record(mxdomains.keys()):
for row in mxdomains[mx]:
extracted.append(row)
return extracted
# # TODO: sanitize mails
def compute(self, message):
item_id, score = message.split()

View file

@ -55,6 +55,17 @@ class Onion(AbstractModule):
# TEMP var: SAVE I2P Domain (future I2P crawler)
# self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
def extract(self, obj_id, content, tag):
extracted = []
onions = self.regex_finditer(self.onion_regex, obj_id, content)
for onion in onions:
start, end, value = onion
url_unpack = crawlers.unpack_url(value)
domain = url_unpack['domain']
if crawlers.is_valid_onion_domain(domain):
extracted.append(onion)
return extracted
def compute(self, message):
onion_urls = []
domains = []

434
bin/modules/Tools.py Executable file
View file

@ -0,0 +1,434 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Tools Module
============================
Search tools outpout
"""
import os
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib.objects.Items import Item
TOOLS = {
'sqlmap': {
'regex': r'Usage of sqlmap for attacking targets without|all tested parameters do not appear to be injectable|sqlmap identified the following injection point|Title:[^\n]*((error|time|boolean)-based|stacked queries|UNION query)',
'tag': 'infoleak:automatic-detection="sqlmap-tool"',
},
'wig': {
'regex': r'(?s)wig - WebApp Information Gatherer.+?_{10,}',
'tag': 'infoleak:automatic-detection="wig-tool"',
},
'dmytry': {
'regex': r'(?s)Gathered (TCP Port|Inet-whois|Netcraft|Subdomain|E-Mail) information for.+?-{10,}',
'tag': 'infoleak:automatic-detection="dmitry-tool"',
},
'inurlbr': {
'regex': r'Usage of INURLBR for attacking targets without prior mutual consent is illegal',
'tag': 'infoleak:automatic-detection="inurlbr-tool"',
},
'wafw00f': {
'regex': r'(?s)WAFW00F - Web Application Firewall Detection Tool.+?Checking',
'tag': 'infoleak:automatic-detection="wafw00f-tool"',
},
'sslyze': {
'regex': r'(?s)PluginSessionRenegotiation.+?SCAN RESULTS FOR',
'tag': 'infoleak:automatic-detection="sslyze-tool"',
},
'nmap': {
'regex': r'(?s)Nmap scan report for.+?Host is',
'tag': 'infoleak:automatic-detection="nmap-tool"',
},
'dnsenum': {
'regex': r'(?s)dnsenum(\.pl)? VERSION:.+?Trying Zone Transfer',
'tag': 'infoleak:automatic-detection="dnsenum-tool"',
},
'knock': {
'regex': r'I scannig with my internal wordlist',
'tag': 'infoleak:automatic-detection="knock-tool"',
},
'nikto': {
'regex': r'(?s)\+ Target IP:.+?\+ Start Time:',
'tag': 'infoleak:automatic-detection="nikto-tool"',
},
'dnscan': {
'regex': r'(?s)\[\*\] Processing domain.+?\[\+\] Getting nameservers.+?records found',
'tag': 'infoleak:automatic-detection="dnscan-tool"',
},
'dnsrecon': {
'regex': r'Performing General Enumeration of Domain:|Performing TLD Brute force Enumeration against',
'tag': 'infoleak:automatic-detection="dnsrecon-tool"',
},
'striker': {
'regex': r'Crawling the target for fuzzable URLs|Honeypot Probabilty:',
'tag': 'infoleak:automatic-detection="striker-tool"',
},
'rhawk': {
'regex': r'S U B - D O M A I N F I N D E R',
'tag': 'infoleak:automatic-detection="rhawk-tool"',
},
'uniscan': {
'regex': r'\| \[\+\] E-mail Found:',
'tag': 'infoleak:automatic-detection="uniscan-tool"',
},
'masscan': {
'regex': r'(?s)Starting masscan [\d.]+.+?Scanning|bit.ly/14GZzcT',
'tag': 'infoleak:automatic-detection="masscan-tool"',
},
'msfconsole': {
'regex': r'=\[ metasploit v[\d.]+.+?msf >',
'tag': 'infoleak:automatic-detection="msfconsole-tool"',
},
'amap': {
'regex': r'\bamap v[\d.]+ \(www.thc.org/thc-amap\)',
'tag': 'infoleak:automatic-detection="amap-tool"',
},
'automater': {
'regex': r'(?s)\[\*\] Checking.+?_+ Results found for:',
'tag': 'infoleak:automatic-detection="automater-tool"',
},
'braa': {
'regex': r'\bbraa public@[\d.]+',
'tag': 'infoleak:automatic-detection="braa-tool"',
},
'ciscotorch': {
'regex': r'Becase we need it',
'tag': 'infoleak:automatic-detection="ciscotorch-tool"',
},
'theharvester': {
'regex': r'Starting harvesting process for domain:',
'tag': 'infoleak:automatic-detection="theharvester-tool"',
},
'sslstrip': {
'regex': r'sslstrip [\d.]+ by Moxie Marlinspike running',
'tag': 'infoleak:automatic-detection="sslstrip-tool"',
},
'sslcaudit': {
'regex': r'# filebag location:',
'tag': 'infoleak:automatic-detection="sslcaudit-tool"',
},
'smbmap': {
'regex': r'\[\+\] Finding open SMB ports\.\.\.',
'tag': 'infoleak:automatic-detection="smbmap-tool"',
},
'reconng': {
'regex': r'\[\*\] Status: unfixed|\[recon-ng\]\[default\]',
'tag': 'infoleak:automatic-detection="reconng-tool"',
},
'p0f': {
'regex': r'\bp0f [^ ]+ by Michal Zalewski',
'tag': 'infoleak:automatic-detection="p0f-tool"',
},
'hping3': {
'regex': r'\bHPING [^ ]+ \([^)]+\): [^ ]+ mode set',
'tag': 'infoleak:automatic-detection="hping3-tool"',
},
'enum4linux': {
'regex': r'Starting enum4linux v[\d.]+|\| Target Information \|',
'tag': 'infoleak:automatic-detection="enum4linux-tool"',
},
'dnstracer': {
'regex': r'(?s)Tracing to.+?DNS HEADER \(send\)',
'tag': 'infoleak:automatic-detection="dnstracer-tool"',
},
'dnmap': {
'regex': r'dnmap_(client|server)|Nmap output files stored in \'nmap_output\' directory',
'tag': 'infoleak:automatic-detection="dnmap-tool"',
},
'arpscan': {
'regex': r'Starting arp-scan [^ ]+ with \d+ hosts',
'tag': 'infoleak:automatic-detection="arpscan-tool"',
},
'cdpsnarf': {
'regex': r'(?s)CDPSnarf v[^ ]+.+?Waiting for a CDP packet\.\.\.',
'tag': 'infoleak:automatic-detection="cdpsnarf-tool"',
},
'dnsmap': {
'regex': r'DNS Network Mapper by pagvac',
'tag': 'infoleak:automatic-detection="dnsmap-tool"',
},
'dotdotpwn': {
'regex': r'DotDotPwn v[^ ]+|dotdotpwn@sectester.net|\[\+\] Creating Traversal patterns',
'tag': 'infoleak:automatic-detection="dotdotpwn-tool"',
},
'searchsploit': {
'regex': r'(exploits|shellcodes)/|searchsploit_rc|Exploit Title',
'tag': 'infoleak:automatic-detection="searchsploit-tool"',
},
'fierce': {
'regex': r'(?s)Trying zone transfer first.+Checking for wildcard DNS',
'tag': 'infoleak:automatic-detection="fierce-tool"',
},
'firewalk': {
'regex': r'Firewalk state initialization completed successfully|Ramping phase source port',
'tag': 'infoleak:automatic-detection="firewalk-tool"',
},
'fragroute': {
'regex': r'\bfragroute: tcp_seg -> ip_frag',
'tag': 'infoleak:automatic-detection="fragroute-tool"',
},
'fragrouter': {
'regex': r'fragrouter: frag-\d+:',
'tag': 'infoleak:automatic-detection="fragrouter-tool"',
},
'goofile': {
'regex': r'code.google.com/p/goofile\b',
'tag': 'infoleak:automatic-detection="goofile-tool"',
},
'intrace': {
'regex': r'\bInTrace [\d.]+ \-\-',
'tag': 'infoleak:automatic-detection="intrace-tool"',
},
'ismtp': {
'regex': r'Testing SMTP server \[user enumeration\]',
'tag': 'infoleak:automatic-detection="ismtp-tool"',
},
'lbd': {
'regex': r'Checking for (DNS|HTTP)-Loadbalancing',
'tag': 'infoleak:automatic-detection="lbd-tool"',
},
'miranda': {
'regex': r'Entering discovery mode for \'upnp:',
'tag': 'infoleak:automatic-detection="miranda-tool"',
},
'ncat': {
'regex': r'nmap.org/ncat',
'tag': 'infoleak:automatic-detection="ncat-tool"',
},
'ohrwurm': {
'regex': r'\bohrwurm-[\d.]+',
'tag': 'infoleak:automatic-detection="ohrwurm-tool"',
},
'oscanner': {
'regex': r'Loading services/sids from service file',
'tag': 'infoleak:automatic-detection="oscanner-tool"',
},
'sfuzz': {
'regex': r'AREALLYBADSTRING|sfuzz/sfuzz',
'tag': 'infoleak:automatic-detection="sfuzz-tool"',
},
'sidguess': {
'regex': r'SIDGuesser v[\d.]+',
'tag': 'infoleak:automatic-detection="sidguess-tool"',
},
'sqlninja': {
'regex': r'Sqlninja rel\. [\d.]+',
'tag': 'infoleak:automatic-detection="sqlninja-tool"',
},
'sqlsus': {
'regex': r'sqlsus version [\d.]+',
'tag': 'infoleak:automatic-detection="sqlsus-tool"',
},
'dnsdict6': {
'regex': r'Starting DNS enumeration work on',
'tag': 'infoleak:automatic-detection="dnsdict6-tool"',
},
'unixprivesccheck': {
'regex': r'Recording Interface IP addresses',
'tag': 'infoleak:automatic-detection="unixprivesccheck-tool"',
},
'yersinia': {
'regex': r'yersinia@yersinia.net',
'tag': 'infoleak:automatic-detection="yersinia-tool"',
},
'armitage': {
'regex': r'\[\*\] Starting msfrpcd for you',
'tag': 'infoleak:automatic-detection="armitage-tool"',
},
'backdoorfactory': {
'regex': r'\[\*\] In the backdoor module',
'tag': 'infoleak:automatic-detection="backdoorfactory-tool"',
},
'beef': {
'regex': r'Please wait as BeEF services are started',
'tag': 'infoleak:automatic-detection="beef-tool"',
},
'cat': {
'regex': r'Cisco Auditing Tool.+?g0ne',
'tag': 'infoleak:automatic-detection="cat-tool"',
},
'cge': {
'regex': r'Vulnerability successful exploited with \[',
'tag': 'infoleak:automatic-detection="cge-tool"',
},
'john': {
'regex': r'John the Ripper password cracker, ver:|Loaded \d+ password hash \(',
'tag': 'infoleak:automatic-detection="john-tool"',
},
'keimpx': {
'regex': r'\bkeimpx [\d.]+',
'tag': 'infoleak:automatic-detection="keimpx-tool"',
},
'maskprocessor': {
'regex': r'mp by atom, High-Performance word generator',
'tag': 'infoleak:automatic-detection="maskprocessor-tool"',
},
'ncrack': {
'regex': r'Starting Ncrack[^\n]+http://ncrack.org',
'tag': 'infoleak:automatic-detection="ncrack-tool"',
},
'patator': {
'regex': r'http://code.google.com/p/patator/|Starting Patator v',
'tag': 'infoleak:automatic-detection="patator-tool"',
},
'phrasendrescher': {
'regex': r'phrasen\|drescher [\d.]+',
'tag': 'infoleak:automatic-detection="phrasendrescher-tool"',
},
'polenum': {
'regex': r'\[\+\] Password Complexity Flags:',
'tag': 'infoleak:automatic-detection="polenum-tool"',
},
'rainbowcrack': {
'regex': r'Official Website: http://project-rainbowcrack.com/',
'tag': 'infoleak:automatic-detection="rainbowcrack-tool"',
},
'rcracki_mt': {
'regex': r'Found \d+ rainbowtable files\.\.\.',
'tag': 'infoleak:automatic-detection="rcracki_mt-tool"',
},
'tcpdump': {
'regex': r'tcpdump: listening on.+capture size \d+|\d+ packets received by filter',
'tag': 'infoleak:automatic-detection="tcpdump-tool"',
},
'hydra': {
'regex': r'Hydra \(http://www.thc.org/thc-hydra\)',
'tag': 'infoleak:automatic-detection="hydra-tool"',
},
'netcat': {
'regex': r'Listening on \[[\d.]+\] \(family',
'tag': 'infoleak:automatic-detection="netcat-tool"',
},
'nslookup': {
'regex': r'Non-authoritative answer:',
'tag': 'infoleak:automatic-detection="nslookup-tool"',
},
'dig': {
'regex': r'; <<>> DiG [\d.]+',
'tag': 'infoleak:automatic-detection="dig-tool"',
},
'whois': {
'regex': r'(?i)Registrar WHOIS Server:|Registrar URL: http://|DNSSEC: unsigned|information on Whois status codes|REGISTERED, DELEGATED|[Rr]egistrar:|%[^\n]+(WHOIS|2016/679)',
'tag': 'infoleak:automatic-detection="whois-tool"',
},
'nessus': {
'regex': r'nessus_(report_(get|list|exploits)|scan_(new|status))|nessuscli|nessusd|nessus-service',
'tag': 'infoleak:automatic-detection="nessus-tool"',
},
'openvas': {
'regex': r'/openvas/',
'tag': 'infoleak:automatic-detection="openvas-tool"',
},
'golismero': {
'regex': r'GoLismero[\n]+The Web Knife',
'tag': 'infoleak:automatic-detection="golismero-tool"',
},
'wpscan': {
'regex': r'WordPress Security Scanner by the WPScan Team|\[\+\] Interesting header:',
'tag': 'infoleak:automatic-detection="wpscan-tool"',
},
'skipfish': {
'regex': r'\[\+\] Sorting and annotating crawl nodes:|skipfish version [\d.]+',
'tag': 'infoleak:automatic-detection="skipfish-tool"',
},
'arachni': {
'regex': r'With the support of the community and the Arachni Team|\[\*\] Waiting for plugins to settle\.\.\.',
'tag': 'infoleak:automatic-detection="arachni-tool"',
},
'dirb': {
'regex': r'==> DIRECTORY:|\bDIRB v[\d.]+',
'tag': 'infoleak:automatic-detection="dirb-tool"',
},
'joomscan': {
'regex': r'OWASP Joomla! Vulnerability Scanner v[\d.]+',
'tag': 'infoleak:automatic-detection="joomscan-tool"',
},
'jbossautopwn': {
'regex': r'\[x\] Now creating BSH script\.\.\.|\[x\] Now deploying \.war file:',
'tag': 'infoleak:automatic-detection="jbossautopwn-tool"',
},
'grabber': {
'regex': r'runSpiderScan @',
'tag': 'infoleak:automatic-detection="grabber-tool"',
},
'fimap': {
'regex': r'Automatic LFI/RFI scanner and exploiter',
'tag': 'infoleak:automatic-detection="fimap-tool"',
},
'dsxs': {
'regex': r'Damn Small XSS Scanner \(DSXS\)',
'tag': 'infoleak:automatic-detection="dsxs-tool"',
},
'dsss': {
'regex': r'Damn Small SQLi Scanner \(DSSS\)',
'tag': 'infoleak:automatic-detection="dsss-tool"',
},
'dsjs': {
'regex': r'Damn Small JS Scanner \(DSJS\)',
'tag': 'infoleak:automatic-detection="dsjs-tool"',
},
'dsfs': {
'regex': r'Damn Small FI Scanner \(DSFS\)',
'tag': 'infoleak:automatic-detection="dsfs-tool"',
},
'identywaf': {
'regex': r'\[o\] initializing handlers\.\.\.',
'tag': 'infoleak:automatic-detection="identywaf-tool"',
},
'whatwaf': {
'regex': r'<sCRIPT>ALeRt.+?WhatWaf\?',
'tag': 'infoleak:automatic-detection="whatwaf-tool"',
}
}
class Tools(AbstractModule):
"""
Tools module for AIL framework
"""
def __init__(self):
super(Tools, self).__init__()
self.max_execution_time = 30
# Waiting time in seconds between to message processed
self.pending_seconds = 10
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def get_tools(self):
return TOOLS.keys()
def extract(self, obj_id, content, tag):
tool_name = tag.rsplit('"', 2)[1][:-5]
return self.regex_finditer(TOOLS[tool_name]['regex'], obj_id, content)
def compute(self, message):
item = Item(message)
content = item.get_content()
for tool_name in TOOLS:
tool = TOOLS[tool_name]
match = self.regex_search(tool['regex'], item.id, content)
if match:
print(f'{item.id} found: {tool_name}')
# Tag Item
msg = f"{tool['tag']};{item.id}"
self.send_message_to_queue(msg, 'Tags')
# TODO ADD LOGS
if __name__ == '__main__':
module = Tools()
# module.run()
module.compute('crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b')

View file

@ -47,7 +47,7 @@ class AbstractModule(ABC):
self.redis_logger.channel = logger_channel
#Cache key
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
self.r_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
self.max_execution_time = 30
# Run module endlessly
@ -81,6 +81,12 @@ class AbstractModule(ABC):
self.process.populate_set_out(message, queue_name)
# add to new set_module
def regex_search(self, regex, obj_id, content):
return regex_helper.regex_search(self.r_cache_key, regex, obj_id, content, max_time=self.max_execution_time)
def regex_finditer(self, regex, obj_id, content):
return regex_helper.regex_finditer(self.r_cache_key, regex, obj_id, content, max_time=self.max_execution_time)
def regex_findall(self, regex, id, content):
"""
regex findall helper (force timeout)
@ -90,7 +96,7 @@ class AbstractModule(ABC):
ex: send_to_queue(item_id, 'Global')
"""
return regex_helper.regex_findall(self.module_name, self.redis_cache_key, regex, id, content, max_time=self.max_execution_time)
return regex_helper.regex_findall(self.module_name, self.r_cache_key, regex, id, content, max_time=self.max_execution_time)
def run(self):
"""

View file

@ -126,7 +126,7 @@ subscribe = Redis_Global
subscribe = Redis_Credential
publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
[Cve]
[CveModule]
subscribe = Redis_Cve
publish = Redis_Tags

@ -1 +1 @@
Subproject commit aba1321b34e18122ec1825b54e2fc8176a4bd25c
Subproject commit de12f46ba6305d457b1e248cfeeec89827ec93c9

View file

@ -248,7 +248,7 @@ services:
depends_on:
- redis-log
entrypoint:
- /opt/AIL/bin/Cve.py
- /opt/AIL/bin/CveModule.py
network_mode: service:flask
image: ail-framework
volumes:

View file

@ -26,6 +26,8 @@ from lib.objects.Screenshots import Screenshot
from lib import Tag
from export import Export
from lib import module_extractor
# ============ BLUEPRINT ============
objects_item = Blueprint('objects_item', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/item'))
@ -73,10 +75,12 @@ def showItem(): # # TODO: support post
# # TODO: ADD in Export SECTION
meta['hive_case'] = Export.get_item_hive_cases(item_id)
extracted = module_extractor.extract(item.id, content=meta['content'])
return render_template("show_item.html", bootstrap_label=bootstrap_label,
modal_add_tags=Tag.get_modal_add_tags(meta['id'], object_type='item'),
is_hive_connected=Export.get_item_hive_cases(item_id),
meta=meta)
meta=meta, extracted=extracted)
# kvrocks data

View file

@ -16,6 +16,8 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects import ail_objects
from packages.Date import Date
# ============ VARIABLES ============
@ -167,22 +169,9 @@ def get_all_types_id(correlation_type):
else:
return []
def is_valid_type_id(correlation_type, type_id):
all_type_id = get_all_types_id(correlation_type)
if type_id in all_type_id:
return True
else:
return False
def get_key_id_metadata(correlation_type, type_id, key_id):
key_id_metadata = {}
if r_serv_metadata.exists('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id)):
key_id_metadata['first_seen'] = r_serv_metadata.hget('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id), 'first_seen')
key_id_metadata['first_seen'] = '{}/{}/{}'.format(key_id_metadata['first_seen'][0:4], key_id_metadata['first_seen'][4:6], key_id_metadata['first_seen'][6:8])
key_id_metadata['last_seen'] = r_serv_metadata.hget('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id), 'last_seen')
key_id_metadata['last_seen'] = '{}/{}/{}'.format(key_id_metadata['last_seen'][0:4], key_id_metadata['last_seen'][4:6], key_id_metadata['last_seen'][6:8])
key_id_metadata['nb_seen'] = r_serv_metadata.scard('set_{}_{}:{}'.format(correlation_type, type_id, key_id))
return key_id_metadata
def get_key_id_metadata(obj_type, subtype, obj_id):
obj = ail_objects.get_object_meta(obj_type, subtype, obj_id)
return obj._get_meta()
def list_sparkline_type_id_values(date_range_sparkline, correlation_type, type_id, key_id):
sparklines_value = []
@ -250,7 +239,7 @@ def main_correlation_page(correlation_type, type_id, date_from, date_to, show_de
if type_id is not None:
#retrieve char
type_id = type_id.replace(' ', '')
if not is_valid_type_id(correlation_type, type_id):
if not ail_objects.is_valid_object_subtype(correlation_type, type_id):
type_id = None
date_range = []
@ -897,7 +886,7 @@ def pgpdump_graph_line_json():
def correlation_graph_line_json(correlation_type, type_id, key_id, date_from, date_to):
# verify input
if key_id is not None and is_valid_type_id(correlation_type, type_id) and r_serv_metadata.exists('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id)):
if key_id is not None and ail_objects.is_valid_object_subtype(correlation_type, type_id) and ail_objects.exists_obj(correlation_type, type_id, key_id):
if date_from is None or date_to is None:
nb_days_seen_in_pastes = 30

View file

@ -10,6 +10,7 @@
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/tags.css') }}" rel="stylesheet" type="text/css" />
<link href="{{ url_for('static', filename='css/ail-project.css') }}" rel="stylesheet">
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
@ -256,6 +257,8 @@
</div>
</div>
</div>
</div>
{% endif %}
@ -346,6 +349,13 @@
{% endif %}
{% if extracted %}
{% for row in extracted %}
<div><a href="#{{ row[0] }}:{{ row[1] }}">{{ row[2] }}</a></div>
{% endfor %}
{% endif %}
<!-- nav-pills nav-justified nav-tabs-->
<div class="card">
@ -367,15 +377,21 @@
</li>
</ul>
<div class="tab-content" id="pills-tabContent">
<div class="tab-pane fade show active" id="pills-content" role="tabpanel" aria-labelledby="pills-content-tab">
<p class="my-0"> <pre class="border">{{ meta['content'] }}</pre></p>
{% if not extracted %}
<p class="my-0"> <pre class="border">{{ meta['content'] }}</pre></p>
{% else %}
<p class="my-0"> <pre class="border">{{ meta['content'][:extracted[0][0]] }}{% for row in extracted %}<span class="hg-text" id="{{ row[0] }}:{{ row[1] }}">{{ meta['content'][row[0]:row[1]] }}</span>{% if loop.index + 1 > extracted|length %}{{ meta['content'][extracted[-1][1]:] }}{% else %}{{ meta['content'][row[1]:extracted[loop.index][0]] }}{% endif %}{% endfor %}</pre></p>
{% endif %}
</div>
<div class="tab-pane fade" id="pills-html2text" role="tabpanel" aria-labelledby="pills-html2text-tab">
<p class="my-0"> <pre id="html2text-container" class="border"></pre></p>
</div>
</div>
</div>
</div>