From 7a4989ce10737b59dd232818fd438465c4ec3677 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 12 Feb 2019 15:45:58 +0100 Subject: [PATCH 1/8] fix: [Global Crawler] max filename size --- bin/Global.py | 20 ++++++++++---------- bin/Onion.py | 8 ++++++-- bin/torcrawler/TorSplashCrawler.py | 6 +++++- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/bin/Global.py b/bin/Global.py index 32a3656b..2e4595eb 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -23,23 +23,17 @@ Requirements import base64 import os import time +import uuid from pubsublogger import publisher from Helper import Process import magic -import io -#import gzip -''' -def gunzip_bytes_obj(bytes_obj): - in_ = io.BytesIO() - in_.write(bytes_obj) - in_.seek(0) - with gzip.GzipFile(fileobj=in_, mode='rb') as fo: - gunzipped_bytes_obj = fo.read() +def rreplace(s, old, new, occurrence): + li = s.rsplit(old, occurrence) + return new.join(li) - return gunzipped_bytes_obj.decode()''' if __name__ == '__main__': publisher.port = 6380 @@ -77,6 +71,12 @@ if __name__ == '__main__': processed_paste = 0 time.sleep(1) continue + + file_name_paste = paste.split('/')[-1] + if len(file_name_paste)>255: + new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4())) + paste = rreplace(paste, file_name_paste, new_file_name_paste, 1) + # Creating the full filepath filename = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"), paste) diff --git a/bin/Onion.py b/bin/Onion.py index 801118d5..d15875e4 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -198,8 +198,12 @@ if __name__ == "__main__": print(len(domains_list)) if len(domains_list) > 0: - publisher.warning('{}Detected {} .onion(s);{}'.format( - to_print, len(domains_list),PST.p_path)) + if not activate_crawler: + publisher.warning('{}Detected {} .onion(s);{}'.format( + to_print, len(domains_list),PST.p_path)) + else: + publisher.info('{}Detected {} .onion(s);{}'.format( + to_print, len(domains_list),PST.p_path)) now = datetime.datetime.now() path = os.path.join('onions', str(now.year).zfill(4), str(now.month).zfill(2), diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 99a4f3b3..dbe6bbd6 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -126,7 +126,11 @@ class TorSplashCrawler(): print('Connection to proxy refused') else: - UUID = self.domains[0]+str(uuid.uuid4()) + #avoid filename too big + if self.domains[0] > 225: + UUID = self.domains[0][-215:]+str(uuid.uuid4()) + else + UUID = self.domains[0]+str(uuid.uuid4()) filename_paste = os.path.join(self.crawled_paste_filemame, UUID) relative_filename_paste = os.path.join(self.crawler_path, UUID) filename_screenshot = os.path.join(self.crawled_screenshot, UUID +'.png') From 7cb03fc76995ccb86dff4b02b2f1256564d7c528 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 12 Feb 2019 15:51:19 +0100 Subject: [PATCH 2/8] fix: [Crawler] typo --- bin/torcrawler/TorSplashCrawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index dbe6bbd6..894275de 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -129,7 +129,7 @@ class TorSplashCrawler(): #avoid filename too big if self.domains[0] > 225: UUID = self.domains[0][-215:]+str(uuid.uuid4()) - else + else: UUID = self.domains[0]+str(uuid.uuid4()) filename_paste = os.path.join(self.crawled_paste_filemame, UUID) relative_filename_paste = os.path.join(self.crawler_path, UUID) From 37276e52a3fe9b75fb83c07ca556a13ccccd9d50 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 12 Feb 2019 15:53:40 +0100 Subject: [PATCH 3/8] fix: [Crawler] typo --- bin/torcrawler/TorSplashCrawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 894275de..24b7138e 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -127,7 +127,7 @@ class TorSplashCrawler(): else: #avoid filename too big - if self.domains[0] > 225: + if len(self.domains[0]) > 225: UUID = self.domains[0][-215:]+str(uuid.uuid4()) else: UUID = self.domains[0]+str(uuid.uuid4()) From b87707e8bc313f30dcb083eed70bcb30dd8e3777 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 12 Feb 2019 15:54:42 +0100 Subject: [PATCH 4/8] fix: [Crawler] typo --- bin/torcrawler/TorSplashCrawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 24b7138e..6bb4d938 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -127,7 +127,7 @@ class TorSplashCrawler(): else: #avoid filename too big - if len(self.domains[0]) > 225: + if len(self.domains[0]) > 215: UUID = self.domains[0][-215:]+str(uuid.uuid4()) else: UUID = self.domains[0]+str(uuid.uuid4()) From 709d5487b86af3171e0278c139886767492dd255 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 13 Feb 2019 16:53:38 +0100 Subject: [PATCH 5/8] fix: [Onion] filter too many subdomain --- bin/Onion.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bin/Onion.py b/bin/Onion.py index d15875e4..d55d3506 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -224,6 +224,10 @@ if __name__ == "__main__": else: continue + # too many subdomain + if len(domain.split('.')) > 5: + continue + if not r_onion.sismember('month_onion_up:{}'.format(date_month), domain) and not r_onion.sismember('onion_down:'+date , domain): if not r_onion.sismember('onion_domain_crawler_queue', domain): print('send to onion crawler') From 0dc27c37e06336b95105600a4783313c32aed6d0 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Sun, 17 Feb 2019 09:31:59 +0100 Subject: [PATCH 6/8] chg: [doc] badge release updated --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e4161f30..9ca65d2a 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ AIL is a modular framework to analyse potential information leaks from unstructu - + From 1114aa2ffdc41c465863bfc8f92beb0c5175340c Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 21 Feb 2019 09:58:10 +0100 Subject: [PATCH 7/8] chg: [CVE] add stat script --- bin/CVE_check.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100755 bin/CVE_check.py diff --git a/bin/CVE_check.py b/bin/CVE_check.py new file mode 100755 index 00000000..d8a83e0b --- /dev/null +++ b/bin/CVE_check.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +from packages import Paste +from Helper import Process + +import os +import re +import time +import redis +import configparser + +from collections import defaultdict + +def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False): + dict_keyword = {} + + for paste_cve in list_paste_cve: + paste_content = Paste.Paste(paste_cve).get_p_content() + + cve_list = reg_cve.findall(paste_content) + if only_one_same_cve_by_paste: + cve_list = set(cve_list) + + for cve in reg_cve.findall(paste_content): + try: + dict_keyword[cve] += 1 + except KeyError: + dict_keyword[cve] = 1 + + print('------------------------------------------------') + if dict_keyword: + res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)] + for item in res: + pass + print(item) + + + +if __name__ == '__main__': + + # CONFIG # + configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + + cfg = configparser.ConfigParser() + cfg.read(configfile) + + serv_metadata = redis.StrictRedis( + host=cfg.get("ARDB_Metadata", "host"), + port=cfg.getint("ARDB_Metadata", "port"), + db=cfg.getint("ARDB_Metadata", "db"), + decode_responses=True) + + serv_tags = redis.StrictRedis( + host=cfg.get("ARDB_Tags", "host"), + port=cfg.get("ARDB_Tags", "port"), + db=cfg.get("ARDB_Tags", "db"), + decode_responses=True) + + reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,5}') + + #all_past_cve = serv_tags.smembers('infoleak:automatic-detection="cve"') + #all_past_cve_regular = serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"') + #all_past_cve_crawler = serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"') + + #print('{} + {} = {}'.format(len(all_past_cve_regular), len(all_past_cve_crawler), len(all_past_cve))) + + print('ALL_CVE') + get_dict_cve(serv_tags.smembers('infoleak:automatic-detection="cve"'), True) + print() + print() + print() + print('REGULAR_CVE') + get_dict_cve(serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True) + print() + print() + print() + print('CRAWLER_CVE') + get_dict_cve(serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True) From 3d36ddbc1e7b34f24aadafa47930d184c8617c13 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 21 Feb 2019 10:51:05 +0100 Subject: [PATCH 8/8] fix: [CVE] fix stat regex --- bin/CVE_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/CVE_check.py b/bin/CVE_check.py index d8a83e0b..63f611de 100755 --- a/bin/CVE_check.py +++ b/bin/CVE_check.py @@ -61,7 +61,7 @@ if __name__ == '__main__': db=cfg.get("ARDB_Tags", "db"), decode_responses=True) - reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,5}') + reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}') #all_past_cve = serv_tags.smembers('infoleak:automatic-detection="cve"') #all_past_cve_regular = serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
Latest ReleaseGitHub version
Contributors