From 5872cf9196da2567be49dddcc75ddf6a77e9ef56 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 2 Nov 2018 16:07:27 +0100 Subject: [PATCH 01/85] fix: [Scripts] Remove absolute path --- bin/ApiKey.py | 6 +++--- bin/Attributes.py | 4 ++-- bin/BankAccount.py | 4 ++-- bin/Bitcoin.py | 4 ++-- bin/Categ.py | 10 ++-------- bin/Credential.py | 2 +- bin/CreditCards.py | 6 +++--- bin/Decoder.py | 2 +- bin/DomClassifier.py | 6 +++--- bin/Duplicates.py | 10 +++++----- bin/Global.py | 13 +++++++++---- bin/LibInjection.py | 2 +- bin/Lines.py | 9 +++++---- bin/MISP_The_Hive_feeder.py | 2 +- bin/Mail.py | 2 +- bin/Mixer.py | 8 ++++++-- bin/Onion.py | 14 +++++++------- bin/RegexForTermsFrequency.py | 2 +- bin/Release.py | 4 ++-- bin/SQLInjectionDetection.py | 4 ++-- bin/Tokenize.py | 4 ++-- bin/Web.py | 2 +- bin/packages/Paste.py | 5 ++++- var/www/modules/showpaste/Flask_showpaste.py | 18 ++++++++++-------- 24 files changed, 76 insertions(+), 67 deletions(-) diff --git a/bin/ApiKey.py b/bin/ApiKey.py index faf4b2d9..bab2745c 100755 --- a/bin/ApiKey.py +++ b/bin/ApiKey.py @@ -40,7 +40,7 @@ def search_api_key(message): print('found google api key') print(to_print) publisher.warning('{}Checked {} found Google API Key;{}'.format( - to_print, len(google_api_key), paste.p_path)) + to_print, len(google_api_key), paste.p_rel_path)) msg = 'infoleak:automatic-detection="google-api-key";{}'.format(filename) p.populate_set_out(msg, 'Tags') @@ -49,7 +49,7 @@ def search_api_key(message): print(to_print) total = len(aws_access_key) + len(aws_secret_key) publisher.warning('{}Checked {} found AWS Key;{}'.format( - to_print, total, paste.p_path)) + to_print, total, paste.p_rel_path)) msg = 'infoleak:automatic-detection="aws-key";{}'.format(filename) p.populate_set_out(msg, 'Tags') @@ -86,7 +86,7 @@ if __name__ == "__main__": if message is not None: - search_api_key(message) + search_api_key(message) else: publisher.debug("Script ApiKey is Idling 10s") diff --git a/bin/Attributes.py b/bin/Attributes.py index a29f34b3..74357065 100755 --- a/bin/Attributes.py +++ b/bin/Attributes.py @@ -43,8 +43,8 @@ if __name__ == "__main__": # FIXME why not all saving everything there. PST.save_all_attributes_redis() # FIXME Not used. - PST.store.sadd("Pastes_Objects", PST.p_path) + PST.store.sadd("Pastes_Objects", PST.p_rel_path) except IOError: - print("CRC Checksum Failed on :", PST.p_path) + print("CRC Checksum Failed on :", PST.p_rel_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/BankAccount.py b/bin/BankAccount.py index 06e86d06..cd58e3c3 100755 --- a/bin/BankAccount.py +++ b/bin/BankAccount.py @@ -67,7 +67,7 @@ def check_all_iban(l_iban, paste, filename): if(nb_valid_iban > 0): to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) publisher.warning('{}Checked found {} IBAN;{}'.format( - to_print, nb_valid_iban, paste.p_path)) + to_print, nb_valid_iban, paste.p_rel_path)) msg = 'infoleak:automatic-detection="iban";{}'.format(filename) p.populate_set_out(msg, 'Tags') @@ -113,7 +113,7 @@ if __name__ == "__main__": try: l_iban = iban_regex.findall(content) except TimeoutException: - print ("{0} processing timeout".format(paste.p_path)) + print ("{0} processing timeout".format(paste.p_rel_path)) continue else: signal.alarm(0) diff --git a/bin/Bitcoin.py b/bin/Bitcoin.py index 1b7694b7..da1fc22a 100755 --- a/bin/Bitcoin.py +++ b/bin/Bitcoin.py @@ -32,7 +32,7 @@ def decode_base58(bc, length): for char in bc: n = n * 58 + digits58.index(char) return n.to_bytes(length, 'big') - + def check_bc(bc): try: bcbytes = decode_base58(bc, 25) @@ -75,7 +75,7 @@ def search_key(content, message, paste): to_print = 'Bitcoin;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) publisher.warning('{}Detected {} Bitcoin private key;{}'.format( - to_print, len(bitcoin_private_key),paste.p_path)) + to_print, len(bitcoin_private_key),paste.p_rel_path)) if __name__ == "__main__": publisher.port = 6380 diff --git a/bin/Categ.py b/bin/Categ.py index cf78f90f..3ebc42ea 100755 --- a/bin/Categ.py +++ b/bin/Categ.py @@ -89,16 +89,10 @@ if __name__ == "__main__": paste = Paste.Paste(filename) content = paste.get_p_content() - #print('-----------------------------------------------------') - #print(filename) - #print(content) - #print('-----------------------------------------------------') - for categ, pattern in tmp_dict.items(): found = set(re.findall(pattern, content)) if len(found) >= matchingThreshold: - msg = '{} {}'.format(paste.p_path, len(found)) - #msg = " ".join( [paste.p_path, bytes(len(found))] ) + msg = '{} {}'.format(paste.p_rel_path, len(found)) print(msg, categ) p.populate_set_out(msg, categ) @@ -106,4 +100,4 @@ if __name__ == "__main__": publisher.info( 'Categ;{};{};{};Detected {} as {};{}'.format( paste.p_source, paste.p_date, paste.p_name, - len(found), categ, paste.p_path)) + len(found), categ, paste.p_rel_path)) diff --git a/bin/Credential.py b/bin/Credential.py index 7f665227..417b30eb 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -97,7 +97,7 @@ if __name__ == "__main__": if sites_set: message += ' Related websites: {}'.format( (', '.join(sites_set)) ) - to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path) + to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_rel_path) print('\n '.join(creds)) diff --git a/bin/CreditCards.py b/bin/CreditCards.py index 260d1345..a7921a6e 100755 --- a/bin/CreditCards.py +++ b/bin/CreditCards.py @@ -77,9 +77,9 @@ if __name__ == "__main__": paste.p_source, paste.p_date, paste.p_name) if (len(creditcard_set) > 0): publisher.warning('{}Checked {} valid number(s);{}'.format( - to_print, len(creditcard_set), paste.p_path)) + to_print, len(creditcard_set), paste.p_rel_path)) print('{}Checked {} valid number(s);{}'.format( - to_print, len(creditcard_set), paste.p_path)) + to_print, len(creditcard_set), paste.p_rel_path)) #Send to duplicate p.populate_set_out(filename, 'Duplicate') #send to Browse_warning_paste @@ -89,7 +89,7 @@ if __name__ == "__main__": msg = 'infoleak:automatic-detection="credit-card";{}'.format(filename) p.populate_set_out(msg, 'Tags') else: - publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path)) + publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_rel_path)) else: publisher.debug("Script creditcard is idling 1m") time.sleep(10) diff --git a/bin/Decoder.py b/bin/Decoder.py index abbf760b..fa18e5e6 100755 --- a/bin/Decoder.py +++ b/bin/Decoder.py @@ -229,7 +229,7 @@ if __name__ == '__main__': except TimeoutException: encoded_list = [] p.incr_module_timeout_statistic() # add encoder type - print ("{0} processing timeout".format(paste.p_path)) + print ("{0} processing timeout".format(paste.p_rel_path)) continue else: signal.alarm(0) diff --git a/bin/DomClassifier.py b/bin/DomClassifier.py index aed87a55..1ae5ba13 100755 --- a/bin/DomClassifier.py +++ b/bin/DomClassifier.py @@ -54,14 +54,14 @@ def main(): if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( - PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_rel_path)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( - PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_rel_path)) except IOError: - print("CRC Checksum Failed on :", PST.p_path) + print("CRC Checksum Failed on :", PST.p_rel_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/Duplicates.py b/bin/Duplicates.py index 0c24bec1..611368a1 100755 --- a/bin/Duplicates.py +++ b/bin/Duplicates.py @@ -142,17 +142,17 @@ if __name__ == "__main__": paste_date = paste_date paste_date = paste_date if paste_date != None else "No date available" if paste_path != None: - if paste_path != PST.p_path: + if paste_path != PST.p_rel_path: hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date) - print('['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent)) + print('['+hash_type+'] '+'comparing: ' + str(PST.p_rel_path) + ' and ' + str(paste_path) + ' percentage: ' + str(percent)) except Exception: print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash) # Add paste in DB after checking to prevent its analysis twice # hash_type_i -> index_i AND index_i -> PST.PATH - r_serv1.set(index, PST.p_path) + r_serv1.set(index, PST.p_rel_path) r_serv1.set(index+'_date', PST._get_p_date()) r_serv1.sadd("INDEX", index) # Adding hashes in Redis @@ -180,7 +180,7 @@ if __name__ == "__main__": PST.__setattr__("p_duplicate", dupl) PST.save_attribute_duplicate(dupl) PST.save_others_pastes_attribute_duplicate(dupl) - publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path)) + publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_rel_path)) print('{}Detected {}'.format(to_print, len(dupl))) print('') @@ -191,5 +191,5 @@ if __name__ == "__main__": except IOError: to_print = 'Duplicate;{};{};{};'.format( PST.p_source, PST.p_date, PST.p_name) - print("CRC Checksum Failed on :", PST.p_path) + print("CRC Checksum Failed on :", PST.p_rel_path) publisher.error('{}CRC Checksum Failed'.format(to_print)) diff --git a/bin/Global.py b/bin/Global.py index 32a3656b..22b4c4e7 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -51,6 +51,9 @@ if __name__ == '__main__': p = Process(config_section) + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) + print(PASTES_FOLDER) + # LOGGING # publisher.info("Feed Script started to receive & publish.") @@ -78,8 +81,9 @@ if __name__ == '__main__': time.sleep(1) continue # Creating the full filepath - filename = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "pastes"), paste) + filename = os.path.join(PASTES_FOLDER, paste) + print(filename) + print(paste) dirname = os.path.dirname(filename) if not os.path.exists(dirname): @@ -102,6 +106,7 @@ if __name__ == '__main__': print(filename) print(type) print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------') - ''' - p.populate_set_out(filename) + ''' + + p.populate_set_out(paste) processed_paste+=1 diff --git a/bin/LibInjection.py b/bin/LibInjection.py index 283bba00..5088d9c5 100755 --- a/bin/LibInjection.py +++ b/bin/LibInjection.py @@ -47,7 +47,7 @@ def analyse(url, path): paste = Paste.Paste(path) print("Detected (libinjection) SQL in URL: ") print(urllib.request.unquote(url)) - to_print = 'LibInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) + to_print = 'LibInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_rel_path) publisher.warning(to_print) #Send to duplicate p.populate_set_out(path, 'Duplicate') diff --git a/bin/Lines.py b/bin/Lines.py index 8c9f6827..e4187dc7 100755 --- a/bin/Lines.py +++ b/bin/Lines.py @@ -75,10 +75,11 @@ if __name__ == '__main__': PST.save_attribute_redis("p_max_length_line", lines_infos[1]) # FIXME Not used. - PST.store.sadd("Pastes_Objects", PST.p_path) + PST.store.sadd("Pastes_Objects", PST.p_rel_path) + print(PST.p_rel_path) if lines_infos[1] < args.max: - p.populate_set_out( PST.p_path , 'LinesShort') + p.populate_set_out( PST.p_rel_path , 'LinesShort') else: - p.populate_set_out( PST.p_path , 'LinesLong') + p.populate_set_out( PST.p_rel_path , 'LinesLong') except IOError: - print("CRC Checksum Error on : ", PST.p_path) + print("CRC Checksum Error on : ", PST.p_rel_path) diff --git a/bin/MISP_The_Hive_feeder.py b/bin/MISP_The_Hive_feeder.py index 0a8f1791..c1ef414d 100755 --- a/bin/MISP_The_Hive_feeder.py +++ b/bin/MISP_The_Hive_feeder.py @@ -180,7 +180,7 @@ if __name__ == "__main__": if flag_the_hive or flag_misp: tag, path = message.split(';') paste = Paste.Paste(path) - source = '/'.join(paste.p_path.split('/')[-6:]) + source = '/'.join(paste.p_rel_path.split('/')[-6:]) full_path = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"), path) diff --git a/bin/Mail.py b/bin/Mail.py index 1f682661..33d8de43 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -78,7 +78,7 @@ if __name__ == "__main__": to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\ format(PST.p_source, PST.p_date, PST.p_name, - MX_values[0], PST.p_path) + MX_values[0], PST.p_rel_path) if MX_values[0] > is_critical: publisher.warning(to_print) #Send to duplicate diff --git a/bin/Mixer.py b/bin/Mixer.py index e1656b8e..e41e8e0d 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -81,6 +81,8 @@ if __name__ == '__main__': operation_mode = cfg.getint("Module_Mixer", "operation_mode") ttl_key = cfg.getint("Module_Mixer", "ttl_duplicate") + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) + # STATS # processed_paste = 0 processed_paste_per_feeder = {} @@ -103,11 +105,12 @@ if __name__ == '__main__': feeder_name.replace(" ","") if 'import_dir' in feeder_name: feeder_name = feeder_name.split('/')[1] - paste_name = complete_paste except ValueError as e: feeder_name = "unnamed_feeder" - paste_name = complete_paste + + # remove absolute path + paste_name = complete_paste.replace(PASTES_FOLDER, '', 1) # Processed paste processed_paste += 1 @@ -118,6 +121,7 @@ if __name__ == '__main__': processed_paste_per_feeder[feeder_name] = 1 duplicated_paste_per_feeder[feeder_name] = 0 + relay_message = "{0} {1}".format(paste_name, gzip64encoded) #relay_message = b" ".join( [paste_name, gzip64encoded] ) diff --git a/bin/Onion.py b/bin/Onion.py index 1f233fcf..e38f363a 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -164,7 +164,7 @@ if __name__ == "__main__": r_onion.sadd('i2p_domain', domain) r_onion.sadd('i2p_link', url) r_onion.sadd('i2p_domain_crawler_queue', domain) - msg = '{};{}'.format(url,PST.p_path) + msg = '{};{}'.format(url,PST.p_rel_path) r_onion.sadd('i2p_crawler_queue', msg) ''' @@ -178,7 +178,7 @@ if __name__ == "__main__": if len(domains_list) > 0: publisher.warning('{}Detected {} .onion(s);{}'.format( - to_print, len(domains_list),PST.p_path)) + to_print, len(domains_list),PST.p_rel_path)) now = datetime.datetime.now() path = os.path.join('onions', str(now.year).zfill(4), str(now.month).zfill(2), @@ -203,19 +203,19 @@ if __name__ == "__main__": if not r_onion.sismember('onion_domain_crawler_queue', domain): print('send to onion crawler') r_onion.sadd('onion_domain_crawler_queue', domain) - msg = '{};{}'.format(url,PST.p_path) + msg = '{};{}'.format(url,PST.p_rel_path) r_onion.sadd('onion_crawler_queue', msg) #p.populate_set_out(msg, 'Crawler') else: for url in fetch(p, r_cache, urls, domains_list, path): - publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_path)) - p.populate_set_out('onion;{}'.format(PST.p_path), 'alertHandler') + publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_rel_path)) + p.populate_set_out('onion;{}'.format(PST.p_rel_path), 'alertHandler') - msg = 'infoleak:automatic-detection="onion";{}'.format(PST.p_path) + msg = 'infoleak:automatic-detection="onion";{}'.format(PST.p_rel_path) p.populate_set_out(msg, 'Tags') else: - publisher.info('{}Onion related;{}'.format(to_print, PST.p_path)) + publisher.info('{}Onion related;{}'.format(to_print, PST.p_rel_path)) prec_filename = filename else: diff --git a/bin/RegexForTermsFrequency.py b/bin/RegexForTermsFrequency.py index fae7a03a..4e98edcc 100755 --- a/bin/RegexForTermsFrequency.py +++ b/bin/RegexForTermsFrequency.py @@ -106,7 +106,7 @@ if __name__ == "__main__": try: matched = compiled_regex.search(content) except TimeoutException: - print ("{0} processing timeout".format(paste.p_path)) + print ("{0} processing timeout".format(paste.p_rel_path)) continue else: signal.alarm(0) diff --git a/bin/Release.py b/bin/Release.py index 43c84b04..d2f18441 100755 --- a/bin/Release.py +++ b/bin/Release.py @@ -54,7 +54,7 @@ if __name__ == "__main__": if len(releases) == 0: continue - to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path) + to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_rel_path) print(to_print) if len(releases) > 30: publisher.warning(to_print) @@ -63,7 +63,7 @@ if __name__ == "__main__": except TimeoutException: p.incr_module_timeout_statistic() - print ("{0} processing timeout".format(paste.p_path)) + print ("{0} processing timeout".format(paste.p_rel_path)) continue else: signal.alarm(0) diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index f03d7555..9464fd8a 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -78,7 +78,7 @@ def analyse(url, path): if (result_path > 1) or (result_query > 1): print("Detected SQL in URL: ") print(urllib.request.unquote(url)) - to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) + to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_rel_path) publisher.warning(to_print) #Send to duplicate p.populate_set_out(path, 'Duplicate') @@ -97,7 +97,7 @@ def analyse(url, path): else: print("Potential SQL injection:") print(urllib.request.unquote(url)) - to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path) + to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_rel_path) publisher.info(to_print) diff --git a/bin/Tokenize.py b/bin/Tokenize.py index 698b4fbc..4e13b9ff 100755 --- a/bin/Tokenize.py +++ b/bin/Tokenize.py @@ -57,11 +57,11 @@ if __name__ == "__main__": try: for word, score in paste._get_top_words().items(): if len(word) >= 4: - msg = '{} {} {}'.format(paste.p_path, word, score) + msg = '{} {} {}'.format(paste.p_rel_path, word, score) p.populate_set_out(msg) except TimeoutException: p.incr_module_timeout_statistic() - print ("{0} processing timeout".format(paste.p_path)) + print ("{0} processing timeout".format(paste.p_rel_path)) continue else: signal.alarm(0) diff --git a/bin/Web.py b/bin/Web.py index 3d53e306..7cc96822 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -153,7 +153,7 @@ if __name__ == "__main__": pprint.pprint(A_values) publisher.info('Url;{};{};{};Checked {} URL;{}'.format( - PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_path)) + PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path)) prec_filename = filename else: diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index d02a92f5..c5dcc0a6 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -101,7 +101,7 @@ class Paste(object): var = self.p_path.split('/') self.p_date = Date(var[-4], var[-3], var[-2]) - self.p_rel_path = os.path.join(var[-4], var[-3], var[-2], self.p_name) + self.p_date_path = os.path.join(var[-4], var[-3], var[-2], self.p_name) self.p_source = var[-5] self.supposed_url = 'https://{}/{}'.format(self.p_source.replace('_pro', ''), var[-1].split('.gz')[0]) @@ -304,6 +304,9 @@ class Paste(object): def get_p_rel_path(self): return self.p_rel_path + def get_p_date_path(self): + return self.p_date_path + def save_all_attributes_redis(self, key=None): """ Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index 188af759..c24e3335 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -41,12 +41,10 @@ showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templa # ============ FUNCTIONS ============ def showpaste(content_range, requested_path): - relative_path = None - if PASTES_FOLDER not in requested_path: - relative_path = requested_path - requested_path = os.path.join(PASTES_FOLDER, requested_path) - # remove old full path - #requested_path = requested_path.replace(PASTES_FOLDER, '') + if PASTES_FOLDER in requested_path: + # remove full path + requested_path = requested_path.replace(PASTES_FOLDER, '', 1) + #requested_path = os.path.join(PASTES_FOLDER, requested_path) # escape directory transversal if os.path.commonprefix((os.path.realpath(requested_path),PASTES_FOLDER)) != PASTES_FOLDER: return 'path transversal detected' @@ -124,8 +122,12 @@ def showpaste(content_range, requested_path): active_taxonomies = r_serv_tags.smembers('active_taxonomies') l_tags = r_serv_metadata.smembers('tag:'+requested_path) + print(l_tags) if relative_path is not None: - l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) ) + print('union') + print(relative_path) + print(r_serv_metadata.smembers('tag:'+relative_path)) + l_tags = l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) ) #active galaxies active_galaxies = r_serv_tags.smembers('active_galaxies') @@ -189,7 +191,7 @@ def showpaste(content_range, requested_path): crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain') crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father') crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link') - crawler_metadata['screenshot'] = paste.get_p_rel_path() + crawler_metadata['screenshot'] = paste.get_p_date_path() else: crawler_metadata['get_metadata'] = False From 60ff0b9cf7a62886640cff44c827151927c76e05 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 12 Nov 2018 17:10:31 +0100 Subject: [PATCH 02/85] chg: [Update] add update script --- bin/Update.py | 297 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100755 bin/Update.py diff --git a/bin/Update.py b/bin/Update.py new file mode 100755 index 00000000..73bec058 --- /dev/null +++ b/bin/Update.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import configparser +import os +import sys + +import subprocess + +def auto_update_enabled(cfg): + auto_update = cfg.get('Update', 'auto_update') + if auto_update == 'True' or auto_update == 'true': + return True + else: + return False + +# check if files are modify locally +def check_if_files_modified(): + process = subprocess.run(['git', 'ls-files' ,'-m'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if process.returncode == 0: + modified_files = process.stdout + if modified_files: + return False + else: + return True + else: + print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) + return False + +def repo_is_fork(): + process = subprocess.run(['git', 'ls-remote', '--tags'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if process.returncode == 0: + # remove url origin + local_remote = process.stdout + process = subprocess.run(['git', 'ls-remote' ,'--tags', AIL_REPO], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if process.returncode == 0: + ail_remote = process.stdout + print(local_remote) + print(ail_remote) + if local_remote == ail_remote: + return False + else: + return True + else: + print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) + return False + +def is_upstream_created(upstream): + process = subprocess.run(['git', 'remote', '-v'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if process.returncode == 0: + output = process.stdout.decode() + if upstream in output: + return True + else: + return False + else: + print(process.stderr.decode()) + return None + +def create_fork_upstream(upstream): + process = subprocess.run(['git', 'remote', 'add', upstream, AIL_REPO], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if process.returncode == 0: + print(process.stdout.decode()) + if is_upstream_created(): + print('fork created') + else: + print('error, fork not created') + else: + print(process.stderr.decode()) + return None + +def update_fork(): + if cfg.get('Update', 'update-fork') == 'True' or cfg.get('Update', 'update-fork') == 'true': + upstream = cfg.get('Update', 'upstream') + if not is_upstream_created(upstream): + create_fork_upstream(upstream) + process = subprocess.run(['git', 'fetch', upstream], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if process.returncode == 0: + print(process.stdout.decode()) + process = subprocess.run(['git', 'checkout', 'master'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if process.returncode == 0: + print(process.stdout.decode()) + process = subprocess.run(['git', 'merge', '{}/master'.format(upstream)], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if process.returncode == 0: + print(process.stdout.decode()) + else: + print(process.stderr.decode()) + return None + else: + print(process.stderr.decode()) + return None + else: + print(process.stderr.decode()) + return None + + else: + print('auto update fork disabled, you can active it in ...') + + +def get_git_current_tag(current_version_path): + with open(current_version_path, 'r') as version_content: + version = version_content.read() + version = version.replace(" ", "").splitlines() + return version[0] + + ''' + process = subprocess.run(['git', 'describe' ,'--tags'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if process.returncode == 0: + current_tag = process.stdout + current_tag = current_tag.split(b'-')[0] + return current_tag.decode() + else: + print(process.stderr.decode()) + return None + ''' + +def get_git_upper_tags_remote(current_tag): + process = subprocess.run(['git', 'ls-remote' ,'--tags'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if process.returncode == 0: + list_all_tags = process.stdout.decode().splitlines() + list_all_tags.append('aaaaaaaaaaaaaaaaaaaaaaaaaaaaa\trefs/tags/v1.5') + list_all_tags.append('eeeeeeeeeeeeeeeeeeeeeeeeeeee\trefs/tags/v1.5^{}') + list_all_tags.append('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\trefs/tags/v1.6') + list_all_tags.append('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\trefs/tags/v1.6^{}') + list_all_tags.append('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz\trefs/tags/v1.7') + last_tag = list_all_tags[-1].split('\trefs/tags/') + last_commit = last_tag[0] + last_tag = last_tag[1].split('^{}')[0] + list_upper_tags = [] + if last_tag[1:] == current_tag: + list_upper_tags.append( (last_tag, last_commit) ) + return list_upper_tags + else: + for mess_tag in list_all_tags: + commit, tag = mess_tag.split('\trefs/tags/') + + # add tag with last commit + if float(tag.split('^{}')[0][1:]) >= float(current_tag): + if '^{}' in tag: + list_upper_tags.append( (tag.split('^{}')[0], commit) ) + # add last commit + if last_tag not in list_upper_tags[-1][0]: + list_upper_tags.append( (last_tag, last_commit) ) + return list_upper_tags + + else: + print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) + return None + +def update_ail(current_tag, list_upper_tags_remote, current_version_path): + print('git checkout master:') + process = subprocess.run(['git', 'checkout', 'master'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if process.returncode == 0: + print('git pull:') + process = subprocess.run(['git', 'pull'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if process.returncode == 0: + output = process.stdout.decode() + print(output) + + if len(list_upper_tags_remote) == 1: + print('AIL Updated') + # # FIXME: # TODO: exit sucess + + else: + # map version with roll back commit + list_update = [] + previous_commit = list_upper_tags_remote[0][1] + for tuple in list_upper_tags_remote[1:]: + tag = tuple[0] + list_update.append( (tag, previous_commit) ) + previous_commit = tuple[1] + print(list_update) + + for update in list_update: + launch_update_version(update[0], update[1], current_version_path, is_fork) + else: + print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) + return None + else: + print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) + return None + +def launch_update_version(version, roll_back_commit, current_version_path, is_fork): + update_path = os.path.join(os.environ['AIL_HOME'], 'update', version, 'Update.sh') + print('------------------------------------------------------------------') + print('- Launching Update: {} -'.format(version)) + print('------------------------------------------------------------------') + process = subprocess.run(['bash', update_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if process.returncode == 0: + output = process.stdout + print(output) + + with open(current_version_path, 'w') as version_content: + version_content.write(version) + + else: + print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) + if not is_fork: + roll_back_update(roll_back_commit) + +def roll_back_update(roll_back_commit): + process = subprocess.run(['git', 'checkout', roll_back_commit], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if process.returncode == 0: + output = process.stdout + print(output) + sys.exit() + else: + print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) + sys.exit(1) + +''' + + if len(sys.argv) != 2: + print('usage:', 'Update-conf.py', 'Automatic (boolean)') + exit(1) + else: + automatic = sys.argv[1] + if automatic == 'True': + automatic = True + else: + automatic = False + + + if automatic: + resp = 'y' + else: + resp = input("Do you want to auto fix it? [y/n] ") + + if resp != 'y': + return False + else: + if automatic: + resp2 = 'y' + else: + resp2 = input("Do you want to keep a backup of the old configuration file? [y/n] ") +''' + +if __name__ == "__main__": + + TERMINAL_RED = '\033[91m' + TERMINAL_YELLOW = '\33[93m' + TERMINAL_DEFAULT = '\033[0m' + + AIL_REPO = 'https://github.com/CIRCL/AIL-framework.git' + + configfile = os.path.join(os.environ['AIL_HOME'], 'configs/update.cfg') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + cfg = configparser.ConfigParser() + cfg.read(configfile) + + current_version_path = os.path.join(os.environ['AIL_HOME'], 'update/current_version') + + print('******************************************************************') + print('* Updating AIL ... *') + print('******************************************************************') + + if auto_update_enabled(cfg): + if check_if_files_modified(): + is_fork = repo_is_fork() + if is_fork: + update_fork() + + current_tag = get_git_current_tag(current_version_path) + print('Current Version: {}'.format(current_tag)) + print() + list_upper_tags_remote = get_git_upper_tags_remote(current_tag[1:]) + # new realease + if len(list_upper_tags_remote) > 1: + print('New Releases:') + for upper_tag in list_upper_tags_remote: + print(' {}{}{}: {}'.format(TERMINAL_YELLOW, upper_tag[0], TERMINAL_DEFAULT, upper_tag[1])) + print() + update_ail(current_tag, list_upper_tags_remote, current_version_path, is_fork) + #else: + # print('your fork is outdated') + else: + print('please commit your change') + else: + print(' AIL Auto update is disabled') + print(' AIL not Updated') + print('******************************************************************') + ''' + if main(): + sys.exit() + else: + sys.exit(1) + ''' From 7aff45c50764406003e8dcea58f4cbb9160a4680 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 13 Nov 2018 14:54:56 +0100 Subject: [PATCH 03/85] chg [Update] update AIL clone and fork --- bin/Update.py | 291 +++++++++++++++++++++++++++++--------------------- 1 file changed, 170 insertions(+), 121 deletions(-) diff --git a/bin/Update.py b/bin/Update.py index 73bec058..d525d228 100755 --- a/bin/Update.py +++ b/bin/Update.py @@ -1,6 +1,14 @@ #!/usr/bin/env python3 # -*-coding:UTF-8 -* +""" +Update AIL +============================ + +Update AIL clone and fork + +""" + import configparser import os import sys @@ -21,14 +29,18 @@ def check_if_files_modified(): if process.returncode == 0: modified_files = process.stdout if modified_files: + print('Modified Files:') + print('{}{}{}'.format(TERMINAL_BLUE, modified_files.decode(), TERMINAL_DEFAULT)) return False + #return True else: return True else: - print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) - return False + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + sys.exit(1) def repo_is_fork(): + print('Check if this repository is a fork:') process = subprocess.run(['git', 'ls-remote', '--tags'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: @@ -38,15 +50,21 @@ def repo_is_fork(): if process.returncode == 0: ail_remote = process.stdout - print(local_remote) - print(ail_remote) if local_remote == ail_remote: + print(' This repository is a {}clone of {}{}'.format(TERMINAL_BLUE, AIL_REPO, TERMINAL_DEFAULT)) return False else: + print(' This repository is a {}fork{}'.format(TERMINAL_BLUE, TERMINAL_DEFAULT)) + print() return True + else: + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(0) else: - print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) - return False + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(0) def is_upstream_created(upstream): process = subprocess.run(['git', 'remote', '-v'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -57,47 +75,64 @@ def is_upstream_created(upstream): else: return False else: - print(process.stderr.decode()) - return None + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(0) def create_fork_upstream(upstream): + print('{}... Creating upstream ...{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) + print('git remote add {} {}'.format(upstream, AIL_REPO)) process = subprocess.run(['git', 'remote', 'add', upstream, AIL_REPO], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: print(process.stdout.decode()) - if is_upstream_created(): - print('fork created') + if is_upstream_created(upstream): + print('Fork upstream created') + print('{}... ...{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) else: - print('error, fork not created') + print('Fork not created') + aborting_update() + sys.exit(0) else: - print(process.stderr.decode()) - return None + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(0) def update_fork(): + print('{}... Updating fork ...{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) if cfg.get('Update', 'update-fork') == 'True' or cfg.get('Update', 'update-fork') == 'true': upstream = cfg.get('Update', 'upstream') if not is_upstream_created(upstream): create_fork_upstream(upstream) + print('{}git fetch {}:{}'.format(TERMINAL_YELLOW, upstream, TERMINAL_DEFAULT)) process = subprocess.run(['git', 'fetch', upstream], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: print(process.stdout.decode()) + print('{}git checkout master:{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) process = subprocess.run(['git', 'checkout', 'master'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: print(process.stdout.decode()) + print('{}git merge {}/master:{}'.format(TERMINAL_YELLOW, upstream, TERMINAL_DEFAULT)) process = subprocess.run(['git', 'merge', '{}/master'.format(upstream)], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: print(process.stdout.decode()) + print('{}... ...{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) else: - print(process.stderr.decode()) - return None + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(1) else: - print(process.stderr.decode()) - return None + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(0) else: - print(process.stderr.decode()) - return None + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(0) else: - print('auto update fork disabled, you can active it in ...') + print('{}Fork Auto-Update disabled in config file{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) + aborting_update() + sys.exit(0) def get_git_current_tag(current_version_path): @@ -106,58 +141,68 @@ def get_git_current_tag(current_version_path): version = version.replace(" ", "").splitlines() return version[0] - ''' - process = subprocess.run(['git', 'describe' ,'--tags'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) +def get_git_upper_tags_remote(current_tag, is_fork): + if is_fork: + process = subprocess.run(['git', 'tag'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if process.returncode == 0: + list_all_tags = process.stdout.decode().splitlines() - if process.returncode == 0: - current_tag = process.stdout - current_tag = current_tag.split(b'-')[0] - return current_tag.decode() - else: - print(process.stderr.decode()) - return None - ''' - -def get_git_upper_tags_remote(current_tag): - process = subprocess.run(['git', 'ls-remote' ,'--tags'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - - if process.returncode == 0: - list_all_tags = process.stdout.decode().splitlines() - list_all_tags.append('aaaaaaaaaaaaaaaaaaaaaaaaaaaaa\trefs/tags/v1.5') - list_all_tags.append('eeeeeeeeeeeeeeeeeeeeeeeeeeee\trefs/tags/v1.5^{}') - list_all_tags.append('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\trefs/tags/v1.6') - list_all_tags.append('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\trefs/tags/v1.6^{}') - list_all_tags.append('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz\trefs/tags/v1.7') - last_tag = list_all_tags[-1].split('\trefs/tags/') - last_commit = last_tag[0] - last_tag = last_tag[1].split('^{}')[0] - list_upper_tags = [] - if last_tag[1:] == current_tag: - list_upper_tags.append( (last_tag, last_commit) ) + list_upper_tags = [] + if list_all_tags[-1][1:] == current_tag: + list_upper_tags.append( (list_all_tags[-1], None) ) + return list_upper_tags + for tag in list_all_tags: + if float(tag[1:]) >= float(current_tag): + list_upper_tags.append( (tag, None) ) return list_upper_tags else: - for mess_tag in list_all_tags: - commit, tag = mess_tag.split('\trefs/tags/') - - # add tag with last commit - if float(tag.split('^{}')[0][1:]) >= float(current_tag): - if '^{}' in tag: - list_upper_tags.append( (tag.split('^{}')[0], commit) ) - # add last commit - if last_tag not in list_upper_tags[-1][0]: - list_upper_tags.append( (last_tag, last_commit) ) - return list_upper_tags - + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(0) else: - print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) - return None + process = subprocess.run(['git', 'ls-remote' ,'--tags'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) -def update_ail(current_tag, list_upper_tags_remote, current_version_path): - print('git checkout master:') + if process.returncode == 0: + list_all_tags = process.stdout.decode().splitlines() + list_all_tags.append('aaaaaaaaaaaaaaaaaaaaaaaaaaaaa\trefs/tags/v1.5') + list_all_tags.append('eeeeeeeeeeeeeeeeeeeeeeeeeeee\trefs/tags/v1.5^{}') + list_all_tags.append('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\trefs/tags/v1.6') + list_all_tags.append('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\trefs/tags/v1.6^{}') + #list_all_tags.append('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz\trefs/tags/v1.7') + last_tag = list_all_tags[-1].split('\trefs/tags/') + last_commit = last_tag[0] + last_tag = last_tag[1].split('^{}')[0] + list_upper_tags = [] + if last_tag[1:] == current_tag: + list_upper_tags.append( (last_tag, last_commit) ) + return list_upper_tags + else: + for mess_tag in list_all_tags: + commit, tag = mess_tag.split('\trefs/tags/') + + # add tag with last commit + if float(tag.split('^{}')[0][1:]) >= float(current_tag): + if '^{}' in tag: + list_upper_tags.append( (tag.split('^{}')[0], commit) ) + # add last commit + if last_tag not in list_upper_tags[-1][0]: + list_upper_tags.append( (last_tag, last_commit) ) + return list_upper_tags + + else: + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(0) + +def update_ail(current_tag, list_upper_tags_remote, current_version_path, is_fork): + print('{}git checkout master:{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) process = subprocess.run(['git', 'checkout', 'master'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + #process = subprocess.run(['git', 'status'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: - print('git pull:') + print(process.stdout.decode()) + print() + print('{}git pull:{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) process = subprocess.run(['git', 'pull'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: @@ -165,8 +210,10 @@ def update_ail(current_tag, list_upper_tags_remote, current_version_path): print(output) if len(list_upper_tags_remote) == 1: - print('AIL Updated') - # # FIXME: # TODO: exit sucess + print() + print('{}**************** AIL Sucessfully Updated *****************{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) + print() + exit(0) else: # map version with roll back commit @@ -176,76 +223,78 @@ def update_ail(current_tag, list_upper_tags_remote, current_version_path): tag = tuple[0] list_update.append( (tag, previous_commit) ) previous_commit = tuple[1] - print(list_update) for update in list_update: launch_update_version(update[0], update[1], current_version_path, is_fork) + # Sucess + print('{}**************** AIL Sucessfully Updated *****************{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) + print() + sys.exit(0) else: - print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) - return None + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(1) else: - print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) - return None + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(0) def launch_update_version(version, roll_back_commit, current_version_path, is_fork): update_path = os.path.join(os.environ['AIL_HOME'], 'update', version, 'Update.sh') - print('------------------------------------------------------------------') - print('- Launching Update: {} -'.format(version)) - print('------------------------------------------------------------------') + print() + print('{}------------------------------------------------------------------'.format(TERMINAL_YELLOW)) + print('- Launching Update: {}{}{} -'.format(TERMINAL_BLUE, version, TERMINAL_YELLOW)) + print('-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --{}'.format(TERMINAL_DEFAULT)) process = subprocess.run(['bash', update_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: - output = process.stdout + output = process.stdout.decode() print(output) with open(current_version_path, 'w') as version_content: version_content.write(version) + print('{}-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --'.format(TERMINAL_YELLOW)) + print('- Sucessfully Updated: {}{}{} -'.format(TERMINAL_BLUE, version, TERMINAL_YELLOW)) + print('------------------------------------------------------------------{}'.format(TERMINAL_DEFAULT)) + print() else: - print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) + print(process.stdout.decode()) + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + print('------------------------------------------------------------------') + print(' {}Update Error: {}{}{}'.format(TERMINAL_RED, TERMINAL_BLUE, version, TERMINAL_DEFAULT)) + print('------------------------------------------------------------------') if not is_fork: roll_back_update(roll_back_commit) + else: + aborting_update() + sys.exit(1) def roll_back_update(roll_back_commit): + print('Rolling back to safe commit: {}{}{}'.format(TERMINAL_BLUE ,roll_back_commit, TERMINAL_DEFAULT)) process = subprocess.run(['git', 'checkout', roll_back_commit], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: output = process.stdout print(output) - sys.exit() + sys.exit(0) else: print(TERMINAL_RED+process.stderr.decode()+TERMINAL_DEFAULT) + aborting_update() sys.exit(1) -''' - - if len(sys.argv) != 2: - print('usage:', 'Update-conf.py', 'Automatic (boolean)') - exit(1) - else: - automatic = sys.argv[1] - if automatic == 'True': - automatic = True - else: - automatic = False - - - if automatic: - resp = 'y' - else: - resp = input("Do you want to auto fix it? [y/n] ") - - if resp != 'y': - return False - else: - if automatic: - resp2 = 'y' - else: - resp2 = input("Do you want to keep a backup of the old configuration file? [y/n] ") -''' +def aborting_update(): + print() + print('{}Aborting ...{}'.format(TERMINAL_RED, TERMINAL_DEFAULT)) + print('{}******************************************************************'.format(TERMINAL_RED)) + print('* AIL Not Updated *') + print('******************************************************************{}'.format(TERMINAL_DEFAULT)) + print() if __name__ == "__main__": TERMINAL_RED = '\033[91m' TERMINAL_YELLOW = '\33[93m' + TERMINAL_BLUE = '\33[94m' + TERMINAL_BLINK = '\33[6m' TERMINAL_DEFAULT = '\033[0m' AIL_REPO = 'https://github.com/CIRCL/AIL-framework.git' @@ -260,9 +309,9 @@ if __name__ == "__main__": current_version_path = os.path.join(os.environ['AIL_HOME'], 'update/current_version') - print('******************************************************************') + print('{}******************************************************************'.format(TERMINAL_YELLOW)) print('* Updating AIL ... *') - print('******************************************************************') + print('******************************************************************{}'.format(TERMINAL_DEFAULT)) if auto_update_enabled(cfg): if check_if_files_modified(): @@ -271,27 +320,27 @@ if __name__ == "__main__": update_fork() current_tag = get_git_current_tag(current_version_path) - print('Current Version: {}'.format(current_tag)) print() - list_upper_tags_remote = get_git_upper_tags_remote(current_tag[1:]) + print('Current Version: {}{}{}'.format( TERMINAL_YELLOW, current_tag, TERMINAL_DEFAULT)) + print() + list_upper_tags_remote = get_git_upper_tags_remote(current_tag[1:], is_fork) # new realease if len(list_upper_tags_remote) > 1: print('New Releases:') - for upper_tag in list_upper_tags_remote: - print(' {}{}{}: {}'.format(TERMINAL_YELLOW, upper_tag[0], TERMINAL_DEFAULT, upper_tag[1])) + if is_fork: + for upper_tag in list_upper_tags_remote: + print(' {}{}{}'.format(TERMINAL_BLUE, upper_tag[0], TERMINAL_DEFAULT)) + else: + for upper_tag in list_upper_tags_remote: + print(' {}{}{}: {}'.format(TERMINAL_BLUE, upper_tag[0], TERMINAL_DEFAULT, upper_tag[1])) print() update_ail(current_tag, list_upper_tags_remote, current_version_path, is_fork) - #else: - # print('your fork is outdated') + else: - print('please commit your change') + print('Please, commit your changes or stash them before you can update AIL') + aborting_update() + sys.exit(0) else: - print(' AIL Auto update is disabled') - print(' AIL not Updated') - print('******************************************************************') - ''' - if main(): - sys.exit() - else: - sys.exit(1) - ''' + print(' {}AIL Auto update is disabled{}'.format(TERMINAL_RED, TERMINAL_DEFAULT)) + aborting_update() + sys.exit(0) From 347986a2718a7f0bb3fac526c093ceb5c374a060 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 13 Nov 2018 15:45:22 +0100 Subject: [PATCH 04/85] chg: [LAUNCH] add AIL update by default --- bin/LAUNCH.sh | 18 +++++++++++++++++- configs/update.cfg | 4 ++++ update/current_version | 1 + update/v1.5/Update.sh | 4 ++++ 4 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 configs/update.cfg create mode 100644 update/current_version create mode 100755 update/v1.5/Update.sh diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 684af83b..58497e01 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -382,6 +382,16 @@ function shutdown { bash -c "./Shutdown.py" } +function update() { + bash -c "./Update.py" + + exitStatus=$? + if [ $exitStatus -ge 1 ]; then + echo -e $RED"\t* Update Error"$DEFAULT + exit + fi +} + function update_thirdparty { echo -e "\t* Updating thirdparty..." bash -c "(cd ${AIL_FLASK}; ./update_thirdparty.sh)" @@ -395,6 +405,7 @@ function update_thirdparty { } function launch_all { + update; launch_redis; launch_ardb; launch_logs; @@ -408,7 +419,7 @@ function launch_all { helptext; - options=("Redis" "Ardb" "Logs" "Queues" "Scripts" "Flask" "Killall" "Shutdown" "Update-config" "Update-thirdparty") + options=("Redis" "Ardb" "Logs" "Queues" "Scripts" "Flask" "Killall" "Shutdown" "Update" "Update-config" "Update-thirdparty") menu() { echo "What do you want to Launch?:" @@ -459,6 +470,9 @@ function launch_all { Shutdown) shutdown; ;; + Update) + update; + ;; Update-config) checking_configuration "manual"; ;; @@ -478,6 +492,8 @@ while [ "$1" != "" ]; do ;; -k | --killAll ) killall; ;; + -u | --update ) update; + ;; -t | --thirdpartyUpdate ) update_thirdparty; ;; -c | --crawler ) launching_crawler; diff --git a/configs/update.cfg b/configs/update.cfg new file mode 100644 index 00000000..a72d8bc0 --- /dev/null +++ b/configs/update.cfg @@ -0,0 +1,4 @@ +[Update] +auto_update = True +upstream = upstream +update-fork = False diff --git a/update/current_version b/update/current_version new file mode 100644 index 00000000..b1f74215 --- /dev/null +++ b/update/current_version @@ -0,0 +1 @@ +v1.4 diff --git a/update/v1.5/Update.sh b/update/v1.5/Update.sh new file mode 100755 index 00000000..e69e3b31 --- /dev/null +++ b/update/v1.5/Update.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +echo $AIL_HOME + From 94fcf66d2027380938990bca55f3f3d86d5f5d14 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 13 Nov 2018 15:52:33 +0100 Subject: [PATCH 05/85] fix: [Update] cleaning --- bin/Update.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/bin/Update.py b/bin/Update.py index d525d228..fe29c936 100755 --- a/bin/Update.py +++ b/bin/Update.py @@ -32,7 +32,6 @@ def check_if_files_modified(): print('Modified Files:') print('{}{}{}'.format(TERMINAL_BLUE, modified_files.decode(), TERMINAL_DEFAULT)) return False - #return True else: return True else: @@ -164,11 +163,6 @@ def get_git_upper_tags_remote(current_tag, is_fork): if process.returncode == 0: list_all_tags = process.stdout.decode().splitlines() - list_all_tags.append('aaaaaaaaaaaaaaaaaaaaaaaaaaaaa\trefs/tags/v1.5') - list_all_tags.append('eeeeeeeeeeeeeeeeeeeeeeeeeeee\trefs/tags/v1.5^{}') - list_all_tags.append('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\trefs/tags/v1.6') - list_all_tags.append('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\trefs/tags/v1.6^{}') - #list_all_tags.append('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz\trefs/tags/v1.7') last_tag = list_all_tags[-1].split('\trefs/tags/') last_commit = last_tag[0] last_tag = last_tag[1].split('^{}')[0] @@ -197,7 +191,6 @@ def get_git_upper_tags_remote(current_tag, is_fork): def update_ail(current_tag, list_upper_tags_remote, current_version_path, is_fork): print('{}git checkout master:{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) process = subprocess.run(['git', 'checkout', 'master'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - #process = subprocess.run(['git', 'status'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: print(process.stdout.decode()) From 912b977bb4809a818282bfdfd98ea724cf7e18fd Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 13 Nov 2018 16:54:39 +0100 Subject: [PATCH 06/85] chg: [LAUNCH] update: launch + check BDD --- bin/LAUNCH.sh | 45 ++++++--------------------------------------- bin/check_ardb.sh | 27 +++++++++++++++++++++++++++ bin/check_redis.sh | 39 +++++++++++++++++++++++++++++++++++++++ bin/launch_ardb.sh | 21 +++++++++++++++++++++ bin/launch_lvldb.sh | 29 ----------------------------- bin/launch_redis.sh | 24 ++++++++++++------------ 6 files changed, 105 insertions(+), 80 deletions(-) create mode 100755 bin/check_ardb.sh create mode 100755 bin/check_redis.sh create mode 100755 bin/launch_ardb.sh delete mode 100755 bin/launch_lvldb.sh diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 58497e01..7e6fdfc8 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -64,27 +64,11 @@ function helptext { } function launching_redis { - conf_dir="${AIL_HOME}/configs/" - - screen -dmS "Redis_AIL" - sleep 0.1 - echo -e $GREEN"\t* Launching Redis servers"$DEFAULT - screen -S "Redis_AIL" -X screen -t "6379" bash -c 'redis-server '$conf_dir'6379.conf ; read x' - sleep 0.1 - screen -S "Redis_AIL" -X screen -t "6380" bash -c 'redis-server '$conf_dir'6380.conf ; read x' - sleep 0.1 - screen -S "Redis_AIL" -X screen -t "6381" bash -c 'redis-server '$conf_dir'6381.conf ; read x' + bash -c "bash ${AIL_BIN}/launch_redis.sh" } function launching_ardb { - conf_dir="${AIL_HOME}/configs/" - - screen -dmS "ARDB_AIL" - sleep 0.1 - echo -e $GREEN"\t* Launching ARDB servers"$DEFAULT - - sleep 0.1 - screen -S "ARDB_AIL" -X screen -t "6382" bash -c 'cd '${AIL_HOME}'; ardb-server '$conf_dir'6382.conf ; read x' + bash -c "bash ${AIL_BIN}/launch_ardb.sh" } function launching_logs { @@ -245,36 +229,18 @@ function shutting_down_ardb { function checking_redis { flag_redis=0 - redis_dir=${AIL_HOME}/redis/src/ - bash -c $redis_dir'redis-cli -p 6379 PING | grep "PONG" &> /dev/null' + bash -c "bash ${AIL_BIN}/check_redis.sh" if [ ! $? == 0 ]; then - echo -e $RED"\t6379 not ready"$DEFAULT flag_redis=1 fi - sleep 0.1 - bash -c $redis_dir'redis-cli -p 6380 PING | grep "PONG" &> /dev/null' - if [ ! $? == 0 ]; then - echo -e $RED"\t6380 not ready"$DEFAULT - flag_redis=1 - fi - sleep 0.1 - bash -c $redis_dir'redis-cli -p 6381 PING | grep "PONG" &> /dev/null' - if [ ! $? == 0 ]; then - echo -e $RED"\t6381 not ready"$DEFAULT - flag_redis=1 - fi - sleep 0.1 return $flag_redis; } function checking_ardb { flag_ardb=0 - redis_dir=${AIL_HOME}/redis/src/ - sleep 0.2 - bash -c $redis_dir'redis-cli -p 6382 PING | grep "PONG" &> /dev/null' + bash -c "bash ${AIL_BIN}/check_ardb.sh" if [ ! $? == 0 ]; then - echo -e $RED"\t6382 ARDB not ready"$DEFAULT flag_ardb=1 fi @@ -383,8 +349,9 @@ function shutdown { } function update() { - bash -c "./Update.py" + bin_dir=${AIL_HOME}/bin + bash -c "python3 $bin_dir/Update.py" exitStatus=$? if [ $exitStatus -ge 1 ]; then echo -e $RED"\t* Update Error"$DEFAULT diff --git a/bin/check_ardb.sh b/bin/check_ardb.sh new file mode 100755 index 00000000..d73ac3e9 --- /dev/null +++ b/bin/check_ardb.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" +RED="\\033[1;31m" +ROSE="\\033[1;35m" +BLUE="\\033[1;34m" +WHITE="\\033[0;02m" +YELLOW="\\033[1;33m" +CYAN="\\033[1;36m" + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; + +flag_ardb=0 +redis_dir=${AIL_HOME}/redis/src/ +sleep 0.2 +bash -c $redis_dir'redis-cli -p 6382 PING | grep "PONG" &> /dev/null' +if [ ! $? == 0 ]; then + echo -e $RED"\t6382 ARDB not ready"$DEFAULT + flag_ardb=1 +fi + +if [ $flag_ardb == 0 ]; then + exit 0 +else + exit 1 +fi diff --git a/bin/check_redis.sh b/bin/check_redis.sh new file mode 100755 index 00000000..0223e1eb --- /dev/null +++ b/bin/check_redis.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" +RED="\\033[1;31m" +ROSE="\\033[1;35m" +BLUE="\\033[1;34m" +WHITE="\\033[0;02m" +YELLOW="\\033[1;33m" +CYAN="\\033[1;36m" + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; + +flag_redis=0 +redis_dir=${AIL_HOME}/redis/src/ +bash -c $redis_dir'redis-cli -p 6379 PING | grep "PONG" &> /dev/null' +if [ ! $? == 0 ]; then + echo -e $RED"\t6379 not ready"$DEFAULT + flag_redis=1 +fi +sleep 0.1 +bash -c $redis_dir'redis-cli -p 6380 PING | grep "PONG" &> /dev/null' +if [ ! $? == 0 ]; then + echo -e $RED"\t6380 not ready"$DEFAULT + flag_redis=1 +fi +sleep 0.1 +bash -c $redis_dir'redis-cli -p 6381 PING | grep "PONG" &> /dev/null' +if [ ! $? == 0 ]; then + echo -e $RED"\t6381 not ready"$DEFAULT + flag_redis=1 +fi +sleep 0.1 + +if [ $flag_redis == 0 ]; then + exit 0 +else + exit 1 +fi diff --git a/bin/launch_ardb.sh b/bin/launch_ardb.sh new file mode 100755 index 00000000..9d6ea1e7 --- /dev/null +++ b/bin/launch_ardb.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" +RED="\\033[1;31m" +ROSE="\\033[1;35m" +BLUE="\\033[1;34m" +WHITE="\\033[0;02m" +YELLOW="\\033[1;33m" +CYAN="\\033[1;36m" + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; + +conf_dir="${AIL_HOME}/configs/" + +screen -dmS "ARDB_AIL" +sleep 0.1 +echo -e $GREEN"\t* Launching ARDB servers"$DEFAULT + +sleep 0.1 +screen -S "ARDB_AIL" -X screen -t "6382" bash -c 'cd '${AIL_HOME}'; ardb-server '$conf_dir'6382.conf ; read x' diff --git a/bin/launch_lvldb.sh b/bin/launch_lvldb.sh deleted file mode 100755 index a534e7ae..00000000 --- a/bin/launch_lvldb.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -set -e -set -x - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_LEVELDB" ] && echo "Needs the env var AIL_LEVELDB. Run the script from the virtual environment." && exit 1; - -lvdbhost='127.0.0.1' -lvdbdir="${AIL_HOME}/LEVEL_DB_DATA/" -nb_db=13 - -db_y=`date +%Y` -#Verify that a dir with the correct year exists, create it otherwise -if [ ! -d "$lvdbdir$db_y" ]; then - mkdir -p "$db_y" -fi - -screen -dmS "LevelDB" -sleep 0.1 -echo -e $GREEN"\t* Launching Levels DB servers"$DEFAULT - -#Launch a DB for each dir -for pathDir in $lvdbdir*/ ; do - yDir=$(basename "$pathDir") - sleep 0.1 - screen -S "LevelDB" -X screen -t "$yDir" bash -c 'redis-leveldb -H '$lvdbhost' -D '$pathDir'/ -P '$yDir' -M '$nb_db'; read x' -done diff --git a/bin/launch_redis.sh b/bin/launch_redis.sh index 69910927..91c35f4d 100755 --- a/bin/launch_redis.sh +++ b/bin/launch_redis.sh @@ -1,23 +1,23 @@ #!/bin/bash -set -e -set -x +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" +RED="\\033[1;31m" +ROSE="\\033[1;35m" +BLUE="\\033[1;34m" +WHITE="\\033[0;02m" +YELLOW="\\033[1;33m" +CYAN="\\033[1;36m" [ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_LEVELDB" ] && echo "Needs the env var AIL_LEVELDB. Run the script from the virtual environment." && exit 1; conf_dir="${AIL_HOME}/configs/" -screen -dmS "Redis" +screen -dmS "Redis_AIL" sleep 0.1 echo -e $GREEN"\t* Launching Redis servers"$DEFAULT -screen -S "Redis" -X screen -t "6379" bash -c '../redis/src/redis-server '$conf_dir'6379.conf ; read x' +screen -S "Redis_AIL" -X screen -t "6379" bash -c 'redis-server '$conf_dir'6379.conf ; read x' sleep 0.1 -screen -S "Redis" -X screen -t "6380" bash -c '../redis/src/redis-server '$conf_dir'6380.conf ; read x' +screen -S "Redis_AIL" -X screen -t "6380" bash -c 'redis-server '$conf_dir'6380.conf ; read x' sleep 0.1 -screen -S "Redis" -X screen -t "6381" bash -c '../redis/src/redis-server '$conf_dir'6381.conf ; read x' - -# For Words and curves -sleep 0.1 -screen -S "Redis" -X screen -t "6382" bash -c '../redis/src/redis-server '$conf_dir'6382.conf ; read x' +screen -S "Redis_AIL" -X screen -t "6381" bash -c 'redis-server '$conf_dir'6381.conf ; read x' From aed8d65aefb2bd2773f524560dd88d0ec0bde8d2 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 14 Nov 2018 15:17:56 +0100 Subject: [PATCH 07/85] fix: [LAUNCH] add update in helper --- bin/LAUNCH.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 7e6fdfc8..0a0e7fbb 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -57,6 +57,7 @@ function helptext { LAUNCH.sh [-l | --launchAuto] [-k | --killAll] + [-u | --update] [-c | --configUpdate] [-t | --thirdpartyUpdate] [-h | --help] From 108fdb868e2615d703ed91443bc3bda50fe11811 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 15 Nov 2018 10:39:41 +0100 Subject: [PATCH 08/85] chg: update Overview --- OVERVIEW.md | 10 ++++++++++ bin/SentimentAnalysis.py | 20 +++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/OVERVIEW.md b/OVERVIEW.md index 3d3a62ab..8b324e21 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -46,6 +46,16 @@ ARDB_DB * DB 3 - Trending * DB 4 - Sentiment + ----------------------------------------- SENTIMENT ------------------------------------ + + SET - 'Provider_set' Provider + + KEY - 'UniqID' INT + + SET - provider_timestamp UniqID + + SET - UniqID avg_score + * DB 5 - TermCred * DB 6 - Tags * DB 7 - Metadata diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py index 8442befa..3a014050 100755 --- a/bin/SentimentAnalysis.py +++ b/bin/SentimentAnalysis.py @@ -45,6 +45,13 @@ cfg = configparser.ConfigParser() cfg.read(configfile) sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file") +time_clean_sentiment_db = 60*60 + +def clean_db() + sevenDays = oneHour*24*7 + dateStart = datetime.datetime.now() + dateStart = dateStart.replace(minute=0, second=0, microsecond=0) + dateStart_timestamp = calendar.timegm(dateStart.timetuple()) def Analyse(message, server): path = message @@ -157,12 +164,19 @@ if __name__ == '__main__': db=p.config.get("ARDB_Sentiment", "db"), decode_responses=True) + time1 = time.time() + while True: message = p.get_from_set() if message is None: - publisher.debug("{} queue is empty, waiting".format(config_section)) - time.sleep(1) - continue + if int(time.time() - time1) > time_clean_sentiment_db: + clean_db() + time1 = time.time() + continue + else: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue signal.alarm(60) try: Analyse(message, server) From 3ae47a8659fb0b5c025662e06885d8b2023c51f0 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 15 Nov 2018 10:43:19 +0100 Subject: [PATCH 09/85] chg: [gitignore] add update version --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2d276111..8dfd23b7 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,7 @@ var/www/submitted bin/packages/config.cfg bin/packages/config.cfg.backup configs/keys +update/current_version files # installed files From f6e86582c84ea33f3df6988896e7fe86260cc648 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 15 Nov 2018 13:48:44 +0100 Subject: [PATCH 10/85] chg: [Update] generate current version --- bin/Update.py | 13 ++++++++++--- update/current_version | 1 - 2 files changed, 10 insertions(+), 4 deletions(-) delete mode 100644 update/current_version diff --git a/bin/Update.py b/bin/Update.py index fe29c936..ab3abf26 100755 --- a/bin/Update.py +++ b/bin/Update.py @@ -31,7 +31,8 @@ def check_if_files_modified(): if modified_files: print('Modified Files:') print('{}{}{}'.format(TERMINAL_BLUE, modified_files.decode(), TERMINAL_DEFAULT)) - return False + #return False + return True else: return True else: @@ -135,8 +136,14 @@ def update_fork(): def get_git_current_tag(current_version_path): - with open(current_version_path, 'r') as version_content: - version = version_content.read() + try: + with open(current_version_path, 'r') as version_content: + version = version_content.read() + except FileNotFoundError: + version = 'v1.4' + with open(current_version_path, 'w') as version_content: + version_content.write(version) + version = version.replace(" ", "").splitlines() return version[0] diff --git a/update/current_version b/update/current_version deleted file mode 100644 index b1f74215..00000000 --- a/update/current_version +++ /dev/null @@ -1 +0,0 @@ -v1.4 From aaa277b8a0b686666fa1783013dcca515b07e006 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 15 Nov 2018 14:26:41 +0100 Subject: [PATCH 11/85] chg: [Update] add additonal update --- bin/Update.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/bin/Update.py b/bin/Update.py index ab3abf26..cc7cb7d1 100755 --- a/bin/Update.py +++ b/bin/Update.py @@ -31,8 +31,7 @@ def check_if_files_modified(): if modified_files: print('Modified Files:') print('{}{}{}'.format(TERMINAL_BLUE, modified_files.decode(), TERMINAL_DEFAULT)) - #return False - return True + return False else: return True else: @@ -210,6 +209,22 @@ def update_ail(current_tag, list_upper_tags_remote, current_version_path, is_for print(output) if len(list_upper_tags_remote) == 1: + # additional update (between 2 commits on the same version) + additional_update_path = os.path.join(os.environ['AIL_HOME'], 'update', current_tag, 'additional_update.sh') + if os.path.isfile(additional_update_path): + print() + print('{}------------------------------------------------------------------'.format(TERMINAL_YELLOW)) + print('- Launching Additional Update: -') + print('-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --{}'.format(TERMINAL_DEFAULT)) + process = subprocess.run(['bash', additional_update_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if process.returncode == 0: + output = process.stdout.decode() + print(output) + else: + print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + aborting_update() + sys.exit(1) + print() print('{}**************** AIL Sucessfully Updated *****************{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) print() From 4e680aabf03750292cb6795c814c4eb80a6b4087 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 20 Nov 2018 14:39:45 +0100 Subject: [PATCH 12/85] chg: [Overview] add doc --- OVERVIEW.md | 18 ++++++++++++++++-- bin/SentimentAnalysis.py | 24 +++++++++--------------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/OVERVIEW.md b/OVERVIEW.md index 8b324e21..32eae1d8 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -58,10 +58,18 @@ ARDB_DB * DB 5 - TermCred * DB 6 - Tags -* DB 7 - Metadata -* DB 8 - Statistics + ---------------------------------------------------------------------------------------- + + SET - tag paste* + + ---------------------------------------------------------------------------------------- * DB 7 - Metadata: + ---------------------------------------------------------------------------------------- + + SET - 'tag:' + paste tag + + ---------------------------------------------------------------------------------------- ----------------------------------------- BASE64 ---------------------------------------- HSET - 'metadata_hash:'+hash 'saved_path' saved_path @@ -99,3 +107,9 @@ ARDB_DB GET - 'base64_decoded:'+date nd_decoded GET - 'binary_decoded:'+date nd_decoded + +* DB 8 - Statistics +* DB 9 - Onion: + ---------------------------------------------------------------------------------------- + + diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py index 3a014050..1305fb4f 100755 --- a/bin/SentimentAnalysis.py +++ b/bin/SentimentAnalysis.py @@ -45,13 +45,7 @@ cfg = configparser.ConfigParser() cfg.read(configfile) sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file") -time_clean_sentiment_db = 60*60 - -def clean_db() - sevenDays = oneHour*24*7 - dateStart = datetime.datetime.now() - dateStart = dateStart.replace(minute=0, second=0, microsecond=0) - dateStart_timestamp = calendar.timegm(dateStart.timetuple()) +#time_clean_sentiment_db = 60*60 def Analyse(message, server): path = message @@ -169,14 +163,14 @@ if __name__ == '__main__': while True: message = p.get_from_set() if message is None: - if int(time.time() - time1) > time_clean_sentiment_db: - clean_db() - time1 = time.time() - continue - else: - publisher.debug("{} queue is empty, waiting".format(config_section)) - time.sleep(1) - continue + #if int(time.time() - time1) > time_clean_sentiment_db: + # clean_db() + # time1 = time.time() + # continue + #else: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue signal.alarm(60) try: Analyse(message, server) From 31a8dfe0b39f213dae64115f76d5e6d2e8048807 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 21 Nov 2018 16:45:25 +0100 Subject: [PATCH 13/85] fix: [AIL] use only relative paths pastes duplicates are fixed on the fly --- bin/Global.py | 3 --- bin/Mixer.py | 2 +- bin/packages/HiddenServices.py | 19 +++--------------- bin/packages/Paste.py | 20 +++++++++++-------- var/www/modules/Flask_config.py | 2 +- var/www/modules/Tags/Flask_Tags.py | 1 - .../browsepastes/Flask_browsepastes.py | 3 +++ .../hiddenServices/Flask_hiddenServices.py | 6 +----- .../hiddenServices/templates/showDomain.html | 2 +- var/www/modules/search/Flask_search.py | 7 ++++--- var/www/modules/showpaste/Flask_showpaste.py | 16 +++++++-------- 11 files changed, 33 insertions(+), 48 deletions(-) diff --git a/bin/Global.py b/bin/Global.py index 22b4c4e7..c1e16496 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -52,7 +52,6 @@ if __name__ == '__main__': p = Process(config_section) PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) - print(PASTES_FOLDER) # LOGGING # publisher.info("Feed Script started to receive & publish.") @@ -82,8 +81,6 @@ if __name__ == '__main__': continue # Creating the full filepath filename = os.path.join(PASTES_FOLDER, paste) - print(filename) - print(paste) dirname = os.path.dirname(filename) if not os.path.exists(dirname): diff --git a/bin/Mixer.py b/bin/Mixer.py index 760a3480..cbb39676 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -82,7 +82,7 @@ if __name__ == '__main__': ttl_key = cfg.getint("Module_Mixer", "ttl_duplicate") default_unnamed_feed_name = cfg.get("Module_Mixer", "default_unnamed_feed_name") - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) + '/' # STATS # processed_paste = 0 diff --git a/bin/packages/HiddenServices.py b/bin/packages/HiddenServices.py index d515c955..170e1dc3 100755 --- a/bin/packages/HiddenServices.py +++ b/bin/packages/HiddenServices.py @@ -99,11 +99,7 @@ class HiddenServices(object): if father is None: return [] l_crawled_pastes = [] - paste_parent = father.replace(self.paste_directory+'/', '') - paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(paste_parent)) - ## TODO: # FIXME: remove me - paste_children = self.r_serv_metadata.smembers('paste_children:{}'.format(father)) - paste_childrens = paste_childrens | paste_children + paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(father)) for children in paste_childrens: if self.domain in children: l_crawled_pastes.append(children) @@ -117,14 +113,9 @@ class HiddenServices(object): set_domain = set() for paste in l_paste: - paste_full = paste.replace(self.paste_directory+'/', '') - paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(paste_full)) - ## TODO: # FIXME: remove me - paste_children = self.r_serv_metadata.smembers('paste_children:{}'.format(paste)) - paste_childrens = paste_childrens | paste_children + paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(paste)) for children in paste_childrens: if not self.domain in children: - print(children) set_domain.add((children.split('.onion')[0]+'.onion').split('/')[-1]) return set_domain @@ -133,11 +124,7 @@ class HiddenServices(object): if father is None: return [] l_crawled_pastes = [] - paste_parent = father.replace(self.paste_directory+'/', '') - paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(paste_parent)) - ## TODO: # FIXME: remove me - paste_children = self.r_serv_metadata.smembers('paste_children:{}'.format(father)) - paste_childrens = paste_childrens | paste_children + paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(father)) for children in paste_childrens: if not self.domain in children: l_crawled_pastes.append(children) diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index c5dcc0a6..f1521d22 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -82,14 +82,14 @@ class Paste(object): db=cfg.getint("ARDB_Metadata", "db"), decode_responses=True) - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) - if PASTES_FOLDER not in p_path: + self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + if self.PASTES_FOLDER not in p_path: self.p_rel_path = p_path - p_path = os.path.join(PASTES_FOLDER, p_path) + self.p_path = os.path.join(self.PASTES_FOLDER, p_path) else: - self.p_rel_path = None + self.p_path = p_path + self.p_rel_path = p_path.replace(self.PASTES_FOLDER+'/', '', 1) - self.p_path = p_path self.p_name = os.path.basename(self.p_path) self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2) self.p_mime = magic.from_buffer("test", mime=True) @@ -286,9 +286,13 @@ class Paste(object): return False, var def _get_p_duplicate(self): - self.p_duplicate = self.store_metadata.smembers('dup:'+self.p_path) - if self.p_rel_path is not None: - self.p_duplicate.union( self.store_metadata.smembers('dup:'+self.p_rel_path) ) + p_duplicate = self.store_metadata.smembers('dup:'+self.p_path) + # remove absolute path #fix-db + if p_duplicate: + for duplicate_string in p_duplicate: + self.store_metadata.srem('dup:'+self.p_path, duplicate_string) + self.store_metadata.sadd('dup:'+self.p_rel_path, duplicate_string.replace(self.PASTES_FOLDER+'/', '', 1)) + self.p_duplicate = self.store_metadata.smembers('dup:'+self.p_rel_path) if self.p_duplicate is not None: return list(self.p_duplicate) else: diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 7cc802f0..104a1c25 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -154,7 +154,7 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') -PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) +PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot")) max_dashboard_logs = int(cfg.get("Flask", "max_dashboard_logs")) diff --git a/var/www/modules/Tags/Flask_Tags.py b/var/www/modules/Tags/Flask_Tags.py index bbc918ed..e79d56fc 100644 --- a/var/www/modules/Tags/Flask_Tags.py +++ b/var/www/modules/Tags/Flask_Tags.py @@ -28,7 +28,6 @@ r_serv_statistics = Flask_config.r_serv_statistics max_preview_char = Flask_config.max_preview_char max_preview_modal = Flask_config.max_preview_modal bootstrap_label = Flask_config.bootstrap_label -PASTES_FOLDER = Flask_config.PASTES_FOLDER Tags = Blueprint('Tags', __name__, template_folder='templates') diff --git a/var/www/modules/browsepastes/Flask_browsepastes.py b/var/www/modules/browsepastes/Flask_browsepastes.py index eb962ffe..96839d78 100644 --- a/var/www/modules/browsepastes/Flask_browsepastes.py +++ b/var/www/modules/browsepastes/Flask_browsepastes.py @@ -23,6 +23,7 @@ max_preview_char = Flask_config.max_preview_char max_preview_modal = Flask_config.max_preview_modal r_serv_metadata = Flask_config.r_serv_metadata bootstrap_label = Flask_config.bootstrap_label +PASTES_FOLDER = Flask_config.PASTES_FOLDER #init all lvlDB servers curYear = datetime.now().year @@ -62,6 +63,7 @@ def event_stream_getImportantPasteByModule(module_name, year): paste_tags = [] for path in all_pastes_list: + path = path.replace(PASTES_FOLDER, '', 1) index += 1 paste = Paste.Paste(path) content = paste.get_p_content() @@ -125,6 +127,7 @@ def importantPasteByModule(): allPastes = getPastebyType(r_serv_db[currentSelectYear], module_name) for path in allPastes[0:10]: + path = path.replace(PASTES_FOLDER, '', 1) all_path.append(path) paste = Paste.Paste(path) content = paste.get_p_content() diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index 47ea56f1..ee5d7ee1 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -22,7 +22,6 @@ baseUrl = Flask_config.baseUrl r_serv_onion = Flask_config.r_serv_onion r_serv_metadata = Flask_config.r_serv_metadata bootstrap_label = Flask_config.bootstrap_label -PASTES_FOLDER = Flask_config.PASTES_FOLDER hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates') @@ -124,15 +123,13 @@ def onion_domain(): origin_paste_name = h.get_origin_paste_name() origin_paste_tags = unpack_paste_tags(r_serv_metadata.smembers('tag:{}'.format(origin_paste))) paste_tags = [] - path_name = [] for path in l_pastes: - path_name.append(path.replace(PASTES_FOLDER+'/', '')) p_tags = r_serv_metadata.smembers('tag:'+path) paste_tags.append(unpack_paste_tags(p_tags)) return render_template("showDomain.html", domain=onion_domain, last_check=last_check, first_seen=first_seen, l_pastes=l_pastes, paste_tags=paste_tags, bootstrap_label=bootstrap_label, - path_name=path_name, origin_paste_tags=origin_paste_tags, status=status, + origin_paste_tags=origin_paste_tags, status=status, origin_paste=origin_paste, origin_paste_name=origin_paste_name, domain_tags=domain_tags, screenshot=screenshot) @@ -143,7 +140,6 @@ def onion_son(): h = HiddenServices(onion_domain, 'onion') l_pastes = h.get_last_crawled_pastes() l_son = h.get_domain_son(l_pastes) - print(l_son) return 'l_son' # ============= JSON ============== diff --git a/var/www/modules/hiddenServices/templates/showDomain.html b/var/www/modules/hiddenServices/templates/showDomain.html index dd6b2056..49f9a5f3 100644 --- a/var/www/modules/hiddenServices/templates/showDomain.html +++ b/var/www/modules/hiddenServices/templates/showDomain.html @@ -105,7 +105,7 @@ {% for path in l_pastes %} - {{ path_name[loop.index0] }} + {{ path }}
{% for tag in paste_tags[loop.index0] %} diff --git a/var/www/modules/search/Flask_search.py b/var/www/modules/search/Flask_search.py index 7f6cd724..7405b1e9 100644 --- a/var/www/modules/search/Flask_search.py +++ b/var/www/modules/search/Flask_search.py @@ -29,7 +29,7 @@ r_serv_metadata = Flask_config.r_serv_metadata max_preview_char = Flask_config.max_preview_char max_preview_modal = Flask_config.max_preview_modal bootstrap_label = Flask_config.bootstrap_label - +PASTES_FOLDER = Flask_config.PASTES_FOLDER baseindexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path")) indexRegister_path = os.path.join(os.environ['AIL_HOME'], @@ -133,8 +133,8 @@ def search(): query = QueryParser("content", ix.schema).parse("".join(q)) results = searcher.search_page(query, 1, pagelen=num_elem_to_get) for x in results: - r.append(x.items()[0][1]) - path = x.items()[0][1] + r.append(x.items()[0][1].replace(PASTES_FOLDER, '', 1)) + path = x.items()[0][1].replace(PASTES_FOLDER, '', 1) paste = Paste.Paste(path) content = paste.get_p_content() content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 @@ -208,6 +208,7 @@ def get_more_search_result(): results = searcher.search_page(query, page_offset, num_elem_to_get) for x in results: path = x.items()[0][1] + path = path.replace(PASTES_FOLDER, '', 1) path_array.append(path) paste = Paste.Paste(path) content = paste.get_p_content() diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index c24e3335..970102ca 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -41,12 +41,15 @@ showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templa # ============ FUNCTIONS ============ def showpaste(content_range, requested_path): - if PASTES_FOLDER in requested_path: + if PASTES_FOLDER not in requested_path: # remove full path + requested_path_full = os.path.join(requested_path, PASTES_FOLDER) + else: + requested_path_full = requested_path requested_path = requested_path.replace(PASTES_FOLDER, '', 1) - #requested_path = os.path.join(PASTES_FOLDER, requested_path) + # escape directory transversal - if os.path.commonprefix((os.path.realpath(requested_path),PASTES_FOLDER)) != PASTES_FOLDER: + if os.path.commonprefix((requested_path_full,PASTES_FOLDER)) != PASTES_FOLDER: return 'path transversal detected' vt_enabled = Flask_config.vt_enabled @@ -122,12 +125,6 @@ def showpaste(content_range, requested_path): active_taxonomies = r_serv_tags.smembers('active_taxonomies') l_tags = r_serv_metadata.smembers('tag:'+requested_path) - print(l_tags) - if relative_path is not None: - print('union') - print(relative_path) - print(r_serv_metadata.smembers('tag:'+relative_path)) - l_tags = l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) ) #active galaxies active_galaxies = r_serv_tags.smembers('active_galaxies') @@ -280,6 +277,7 @@ def send_file_to_vt(): paste = request.form['paste'] hash = request.form['hash'] + ## TODO: # FIXME: path transversal b64_full_path = os.path.join(os.environ['AIL_HOME'], b64_path) b64_content = '' with open(b64_full_path, 'rb') as f: From 3272ffa71486a1802e76f8a2dba00115d793bc5e Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 21 Nov 2018 17:07:29 +0100 Subject: [PATCH 14/85] chg: [Update] add 1.5 update, remove absolute paths from DB --- update/v1.5/Update.py | 160 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100755 update/v1.5/Update.py diff --git a/update/v1.5/Update.py b/update/v1.5/Update.py new file mode 100755 index 00000000..ae52f4a0 --- /dev/null +++ b/update/v1.5/Update.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import time +import redis +import configparser + +if __name__ == '__main__': + + start_deb = time.time() + + configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + cfg = configparser.ConfigParser() + cfg.read(configfile) + + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' + + r_serv_metadata = redis.StrictRedis( + host=cfg.get("ARDB_Metadata", "host"), + port=cfg.getint("ARDB_Metadata", "port"), + db=cfg.getint("ARDB_Metadata", "db"), + decode_responses=True) + + r_serv_tag = redis.StrictRedis( + host=cfg.get("ARDB_Tags", "host"), + port=cfg.getint("ARDB_Tags", "port"), + db=cfg.getint("ARDB_Tags", "db"), + decode_responses=True) + + r_serv_onion = redis.StrictRedis( + host=cfg.get("ARDB_Onion", "host"), + port=cfg.getint("ARDB_Onion", "port"), + db=cfg.getint("ARDB_Onion", "db"), + decode_responses=True) + + ## Update metadata ## + print('Updating ARDB_Metadata ...') + index = 0 + start = time.time() + for key in r_serv_metadata.scan_iter('*'): + + list_data = r_serv_metadata.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + while list_data[1]: + for hash_key, value in list_data[1].items(): + r_serv_metadata.hdel(key, hash_key) + if PASTES_FOLDER in hash_key: + new_hash = hash_key.replace(PASTES_FOLDER, '', 1) + else: + new_hash = hash_key + if PASTES_FOLDER in value: + new_value = value.replace(PASTES_FOLDER, '', 1) + else: + new_value = value + index = index +1 + r_serv_metadata.hset(key, new_hash, new_value) + + list_data = r_serv_metadata.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + + list_data = r_serv_metadata.zscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + while list_data[1]: + for elem in list_data[1]: + zset_key = elem[0] + value = int(elem[1]) + r_serv_metadata.zrem(key, zset_key) + new_key = zset_key.replace(PASTES_FOLDER, '', 1) + index = index +1 + r_serv_metadata.zincrby(key, new_key, value) + + list_data = r_serv_metadata.zscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + + if not 'dup:' in key: + list_data = r_serv_metadata.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + while list_data[1]: + for set_value in list_data[1]: + r_serv_metadata.srem(key, set_value) + r_serv_metadata.sadd(key, set_value.replace(PASTES_FOLDER, '', 1)) + index = index + 1 + + list_data = r_serv_metadata.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + + if PASTES_FOLDER in key: + new_key = key.replace(PASTES_FOLDER, '', 1) + + # a set with this key already exist + if r_serv_metadata.exists(new_key): + # save data + for new_key_value in r_serv_metadata.smembers(new_key): + r_serv_metadata.sadd(key, new_key_value) + r_serv_metadata.rename(key, new_key) + index = index + 1 + + end = time.time() + + + print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start)) + + print() + print('Updating ARDB_Tags ...') + index = 0 + start = time.time() + + tags_list = r_serv_tag.smembers('list_tags') + for tag in tags_list: + res = False + + list_pastes = r_serv_tag.sscan(tag, 0, '*{}*'.format(PASTES_FOLDER), 1000) + while list_pastes[1]: + for paste in list_pastes[1]: + r_serv_tag.srem(tag, paste) + r_serv_tag.sadd(tag, paste.replace(PASTES_FOLDER, '', 1)) + index = index + 1 + + list_pastes = r_serv_tag.sscan(tag, 0, '*{}*'.format(PASTES_FOLDER), 1000) + + end = time.time() + print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start)) + + print() + print('Updating ARDB_Onion ...') + index = 0 + start = time.time() + for key in r_serv_onion.scan_iter('*'): + + if key != 'mess_onion': + list_data = r_serv_onion.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + while list_data[1]: + for hash_key, value in list_data[1].items(): + r_serv_onion.hdel(key, hash_key) + if PASTES_FOLDER in hash_key: + new_hash = hash_key.replace(PASTES_FOLDER, '', 1) + else: + new_hash = hash_key + if PASTES_FOLDER in value: + new_value = value.replace(PASTES_FOLDER, '', 1) + else: + new_value = value + index = index +1 + r_serv_onion.hset(key, new_hash, new_value) + + list_data = r_serv_onion.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + + list_data = r_serv_onion.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + while list_data[1]: + for set_value in list_data[1]: + r_serv_onion.srem(key, set_value) + r_serv_onion.sadd(key, set_value.replace(PASTES_FOLDER, '', 1)) + index = index + 1 + + list_data = r_serv_onion.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + + end = time.time() + print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start)) + print() + print('Done in {} s'.format(end - start_deb)) From 309e150b8b82382e5dcf4a4f0b5b9995fc9a00bb Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 22 Nov 2018 15:24:13 +0100 Subject: [PATCH 15/85] fix: [update 1.4] typo --- update/v1.5/Update.py | 99 +++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 51 deletions(-) diff --git a/update/v1.5/Update.py b/update/v1.5/Update.py index ae52f4a0..2cf62d74 100755 --- a/update/v1.5/Update.py +++ b/update/v1.5/Update.py @@ -44,57 +44,60 @@ if __name__ == '__main__': index = 0 start = time.time() for key in r_serv_metadata.scan_iter('*'): - - list_data = r_serv_metadata.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - while list_data[1]: - for hash_key, value in list_data[1].items(): - r_serv_metadata.hdel(key, hash_key) - if PASTES_FOLDER in hash_key: - new_hash = hash_key.replace(PASTES_FOLDER, '', 1) - else: - new_hash = hash_key - if PASTES_FOLDER in value: - new_value = value.replace(PASTES_FOLDER, '', 1) - else: - new_value = value - index = index +1 - r_serv_metadata.hset(key, new_hash, new_value) - - list_data = r_serv_metadata.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - - list_data = r_serv_metadata.zscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - while list_data[1]: - for elem in list_data[1]: - zset_key = elem[0] - value = int(elem[1]) - r_serv_metadata.zrem(key, zset_key) - new_key = zset_key.replace(PASTES_FOLDER, '', 1) - index = index +1 - r_serv_metadata.zincrby(key, new_key, value) - - list_data = r_serv_metadata.zscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - if not 'dup:' in key: - list_data = r_serv_metadata.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - while list_data[1]: - for set_value in list_data[1]: - r_serv_metadata.srem(key, set_value) - r_serv_metadata.sadd(key, set_value.replace(PASTES_FOLDER, '', 1)) - index = index + 1 - - list_data = r_serv_metadata.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - if PASTES_FOLDER in key: new_key = key.replace(PASTES_FOLDER, '', 1) # a set with this key already exist if r_serv_metadata.exists(new_key): # save data - for new_key_value in r_serv_metadata.smembers(new_key): - r_serv_metadata.sadd(key, new_key_value) - r_serv_metadata.rename(key, new_key) + for new_key_value in r_serv_metadata.smembers(key): + r_serv_metadata.sadd(new_key, new_key_value) + r_serv_metadata.delete(key) index = index + 1 + type = r_serv_metadata.type(key) + print(type) + if type == 'hash': + list_data = r_serv_metadata.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + print(key) + while list_data[1]: + print(list_data[1]) + for hash_key, value in list_data[1].items(): + print('-----------------------------') + print(key) + print(hash_key) + print(value) + r_serv_metadata.hdel(key, hash_key) + new_hash = hash_key.replace(PASTES_FOLDER, '', 1) + new_value = value.replace(PASTES_FOLDER, '', 1) + index = index +1 + r_serv_metadata.hset(key, new_hash, new_value) + + list_data = r_serv_metadata.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + elif type == 'zset': + list_data = r_serv_metadata.zscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + while list_data[1]: + for elem in list_data[1]: + zset_key = elem[0] + value = int(elem[1]) + r_serv_metadata.zrem(key, zset_key) + new_key = zset_key.replace(PASTES_FOLDER, '', 1) + index = index +1 + r_serv_metadata.zincrby(key, new_key, value) + + list_data = r_serv_metadata.zscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + + elif type == 'set': + list_data = r_serv_metadata.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + while list_data[1]: + for set_value in list_data[1]: + r_serv_metadata.srem(key, set_value) + r_serv_metadata.sadd(key, set_value.replace(PASTES_FOLDER, '', 1)) + index = index + 1 + + list_data = r_serv_metadata.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + end = time.time() @@ -132,18 +135,12 @@ if __name__ == '__main__': while list_data[1]: for hash_key, value in list_data[1].items(): r_serv_onion.hdel(key, hash_key) - if PASTES_FOLDER in hash_key: - new_hash = hash_key.replace(PASTES_FOLDER, '', 1) - else: - new_hash = hash_key - if PASTES_FOLDER in value: - new_value = value.replace(PASTES_FOLDER, '', 1) - else: - new_value = value + new_hash = hash_key.replace(PASTES_FOLDER, '', 1) + new_value = value.replace(PASTES_FOLDER, '', 1) index = index +1 r_serv_onion.hset(key, new_hash, new_value) - list_data = r_serv_onion.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + list_data = r_serv_onion.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) list_data = r_serv_onion.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) while list_data[1]: From 0280b0b6474afa5f02afcd2c778ca7a968e834b3 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 19 Dec 2018 09:28:46 +0100 Subject: [PATCH 16/85] fix: [DB fix] performance --- update/v1.5/Update.py | 201 ++++++++++++++++++++++++++++++------------ 1 file changed, 145 insertions(+), 56 deletions(-) diff --git a/update/v1.5/Update.py b/update/v1.5/Update.py index 2cf62d74..b330672d 100755 --- a/update/v1.5/Update.py +++ b/update/v1.5/Update.py @@ -43,63 +43,153 @@ if __name__ == '__main__': print('Updating ARDB_Metadata ...') index = 0 start = time.time() - for key in r_serv_metadata.scan_iter('*'): - if not 'dup:' in key: - if PASTES_FOLDER in key: - new_key = key.replace(PASTES_FOLDER, '', 1) - # a set with this key already exist - if r_serv_metadata.exists(new_key): - # save data - for new_key_value in r_serv_metadata.smembers(key): - r_serv_metadata.sadd(new_key, new_key_value) - r_serv_metadata.delete(key) - index = index + 1 + string_keys_to_rename = ['misp_events:{}*'.format(PASTES_FOLDER), 'hive_cases:{}*'.format(PASTES_FOLDER)] + for key_to_rename in string_keys_to_rename: - type = r_serv_metadata.type(key) - print(type) - if type == 'hash': - list_data = r_serv_metadata.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + keys_to_rename = [] + for key in r_serv_metadata.scan_iter(key_to_rename): + new_key = key.replace(PASTES_FOLDER, '', 1) + keys_to_rename.append( (key, new_key) ) + index = index + 1 + for key, new_key in keys_to_rename: + r_serv_metadata.rename(key, new_key) + + keys_to_rename = None + + set_keys_to_rename = ['tag:{}*'.format(PASTES_FOLDER), 'hash_paste:{}*'.format(PASTES_FOLDER), 'base64_paste:{}*'.format(PASTES_FOLDER), 'binary_paste:{}*'.format(PASTES_FOLDER), 'hexadecimal_paste:{}*'.format(PASTES_FOLDER), 'paste_regular_external_links:{}*'.format(PASTES_FOLDER), 'paste_onion_external_links:{}*'.format(PASTES_FOLDER), 'paste_children:{}*'.format(PASTES_FOLDER)] + for key_to_rename in set_keys_to_rename: + + keys_to_remove = [] + keys_to_rename = [] + for key in r_serv_metadata.scan_iter(key_to_rename): + new_key = key.replace(PASTES_FOLDER, '', 1) + # a set with this key already exist + if r_serv_metadata.exists(new_key): + # save data + for new_key_value in r_serv_metadata.smembers(key): + r_serv_metadata.sadd(new_key, new_key_value) + keys_to_remove.append(key) + else: + keys_to_rename.append( (key, new_key) ) + index = index + 1 + for key in keys_to_remove: + r_serv_metadata.delete(key) + for key, new_key in keys_to_rename: + r_serv_metadata.rename(key, new_key) + + keys_to_remove = None + keys_to_rename = None + + + zset_keys_to_rename = ['nb_seen_hash:*', 'base64_hash:*', 'binary_hash:*'] + for key_to_rename in zset_keys_to_rename: + + keys_to_remove = [] + zkeys_to_remove = [] + keys_to_add = [] + for key in r_serv_metadata.scan_iter(key_to_rename): + temp = [] + for zset_key, value in r_serv_metadata.zscan_iter(key, '*{}*'.format(PASTES_FOLDER)): + #print(key) + #print(zset_key) + #print(value) + new_key = zset_key.replace(PASTES_FOLDER, '', 1) + index = index +1 + temp.append((key, zset_key)) + keys_to_add.append((key, new_key, value)) + if 0 < len(temp) < r_serv_metadata.zcard(key): + #print(key) + #print(len(temp)) + #print(temp) + #print(r_serv_metadata.zcard(key)) + #print('---------------') + zkeys_to_remove.extend(temp) + else: + keys_to_remove.append(key) + for key in keys_to_remove: + r_serv_metadata.delete(key) + for key, zset_key in zkeys_to_remove: + r_serv_metadata.zrem(key, zset_key) + for key, new_key, value in keys_to_add: + r_serv_metadata.zincrby(key, new_key, int(value)) + keys_to_remove = None + zkeys_to_remove = None + keys_to_add = None + + set_keys_to_rename = ['paste_children:*'] + for key_to_rename in set_keys_to_rename: + keys_to_remove = [] + skeys_to_remove = [] + keys_to_add = [] + for key in r_serv_metadata.scan_iter(key_to_rename): + temp = [] + for set_key in r_serv_metadata.sscan_iter(key, '*{}*'.format(PASTES_FOLDER)): + new_key = set_key.replace(PASTES_FOLDER, '', 1) + index = index +1 + temp.append((key, set_key)) + keys_to_add.append((key, new_key)) + if 0 < len(temp) < r_serv_metadata.scard(key): + skeys_to_remove.extend(temp) + else: + keys_to_remove.append(key) + for key in keys_to_remove: + r_serv_metadata.delete(key) + for key, set_key in skeys_to_remove: + r_serv_metadata.srem(key, set_key) + for key, new_key in keys_to_add: + r_serv_metadata.sadd(key, new_key) + keys_to_remove = None + skeys_to_remove = None + keys_to_add = None + + hset_keys_to_rename = ['paste_metadata:{}*'.format(PASTES_FOLDER)] + for key_to_rename in hset_keys_to_rename: + + keys_to_rename = [] + for key in r_serv_metadata.scan_iter(key_to_rename): + new_key = key.replace(PASTES_FOLDER, '', 1) + # a hset with this key already exist + if r_serv_metadata.exists(new_key): print(key) - while list_data[1]: - print(list_data[1]) - for hash_key, value in list_data[1].items(): - print('-----------------------------') - print(key) - print(hash_key) - print(value) - r_serv_metadata.hdel(key, hash_key) - new_hash = hash_key.replace(PASTES_FOLDER, '', 1) - new_value = value.replace(PASTES_FOLDER, '', 1) - index = index +1 - r_serv_metadata.hset(key, new_hash, new_value) + else: + keys_to_rename.append((key, new_key)) + index = index + 1 + for key, new_key in keys_to_rename: + r_serv_metadata.rename(key, new_key) + keys_to_rename = None - list_data = r_serv_metadata.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - elif type == 'zset': - list_data = r_serv_metadata.zscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - while list_data[1]: - for elem in list_data[1]: - zset_key = elem[0] - value = int(elem[1]) - r_serv_metadata.zrem(key, zset_key) - new_key = zset_key.replace(PASTES_FOLDER, '', 1) - index = index +1 - r_serv_metadata.zincrby(key, new_key, value) + # to verify 120/100 try with scan + hset_keys_to_rename = ['paste_metadata:*'] + for key_to_rename in hset_keys_to_rename: + for key in r_serv_metadata.scan_iter(key_to_rename): + father = r_serv_metadata.hget(key, 'father') + super_father = r_serv_metadata.hget(key, 'super_father') - list_data = r_serv_metadata.zscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) + if father: + if PASTES_FOLDER in father: + index = index + 1 + r_serv_metadata.hdel(key, 'father') + r_serv_metadata.hset(key, 'father', father.replace(PASTES_FOLDER, '', 1)) - elif type == 'set': - list_data = r_serv_metadata.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - while list_data[1]: - for set_value in list_data[1]: - r_serv_metadata.srem(key, set_value) - r_serv_metadata.sadd(key, set_value.replace(PASTES_FOLDER, '', 1)) - index = index + 1 + if super_father: + if PASTES_FOLDER in super_father: + index = index + 1 + r_serv_metadata.hdel(key, 'super_father') + r_serv_metadata.hset(key, 'super_father', super_father.replace(PASTES_FOLDER, '', 1)) + + keys_to_rename = None - list_data = r_serv_metadata.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) end = time.time() + '''' + for key in r_serv_metadata.scan_iter('*{}*'.format(PASTES_FOLDER)): + if not 'dup:' in key: + if not 'paste_i2p_external_links:' in key: + if not 'base64:' in key: + print(key) + ''' print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start)) @@ -128,9 +218,10 @@ if __name__ == '__main__': print('Updating ARDB_Onion ...') index = 0 start = time.time() - for key in r_serv_onion.scan_iter('*'): - if key != 'mess_onion': + hset_keys_to_rename = ['onion_metadata:*'] + for key_to_rename in hset_keys_to_rename: + for key in r_serv_onion.scan_iter(key_to_rename): list_data = r_serv_onion.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) while list_data[1]: for hash_key, value in list_data[1].items(): @@ -142,14 +233,12 @@ if __name__ == '__main__': list_data = r_serv_onion.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - list_data = r_serv_onion.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - while list_data[1]: - for set_value in list_data[1]: - r_serv_onion.srem(key, set_value) - r_serv_onion.sadd(key, set_value.replace(PASTES_FOLDER, '', 1)) - index = index + 1 + for elem in r_serv_onion.smembers('onion_crawler_queue'): + if PASTES_FOLDER in elem: + r_serv_onion.srem('onion_crawler_queue', elem) + r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1)) + index = index +1 - list_data = r_serv_onion.sscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) end = time.time() print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start)) From 198ee97d90697ac579de856ce7e2ac42a2f50a83 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 19 Dec 2018 11:41:01 +0100 Subject: [PATCH 17/85] chg: [update 1.5] add update bash --- bin/LAUNCH.sh | 15 +++++++++------ update/v1.5/Update.py | 16 ---------------- update/v1.5/Update.sh | 39 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 5467396f..2e6dd6e6 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -16,7 +16,6 @@ export AIL_HOME="${DIR}" cd ${AIL_HOME} if [ -e "${DIR}/AILENV/bin/python" ]; then - echo "AIL-framework virtualenv seems to exist, good" ENV_PY="${DIR}/AILENV/bin/python" else echo "Please make sure you have a AIL-framework environment, au revoir" @@ -348,11 +347,15 @@ function launch_feeder { function killall { if [[ $isredis || $isardb || $islogged || $isqueued || $isscripted || $isflasked || $isfeeded ]]; then - echo -e $GREEN"Gracefully closing redis servers"$DEFAULT - shutting_down_redis; - sleep 0.2 - echo -e $GREEN"Gracefully closing ardb servers"$DEFAULT - shutting_down_ardb; + if [[ $isredis ]]; then + echo -e $GREEN"Gracefully closing redis servers"$DEFAULT + shutting_down_redis; + sleep 0.2 + fi + if [[ $isardb ]]; then + echo -e $GREEN"Gracefully closing ardb servers"$DEFAULT + shutting_down_ardb; + fi echo -e $GREEN"Killing all"$DEFAULT kill $isredis $isardb $islogged $isqueued $isscripted $isflasked $isfeeded sleep 0.2 diff --git a/update/v1.5/Update.py b/update/v1.5/Update.py index b330672d..2622976f 100755 --- a/update/v1.5/Update.py +++ b/update/v1.5/Update.py @@ -91,19 +91,11 @@ if __name__ == '__main__': for key in r_serv_metadata.scan_iter(key_to_rename): temp = [] for zset_key, value in r_serv_metadata.zscan_iter(key, '*{}*'.format(PASTES_FOLDER)): - #print(key) - #print(zset_key) - #print(value) new_key = zset_key.replace(PASTES_FOLDER, '', 1) index = index +1 temp.append((key, zset_key)) keys_to_add.append((key, new_key, value)) if 0 < len(temp) < r_serv_metadata.zcard(key): - #print(key) - #print(len(temp)) - #print(temp) - #print(r_serv_metadata.zcard(key)) - #print('---------------') zkeys_to_remove.extend(temp) else: keys_to_remove.append(key) @@ -183,14 +175,6 @@ if __name__ == '__main__': end = time.time() - '''' - for key in r_serv_metadata.scan_iter('*{}*'.format(PASTES_FOLDER)): - if not 'dup:' in key: - if not 'paste_i2p_external_links:' in key: - if not 'base64:' in key: - print(key) - ''' - print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start)) print() diff --git a/update/v1.5/Update.sh b/update/v1.5/Update.sh index e69e3b31..f329c7c5 100755 --- a/update/v1.5/Update.sh +++ b/update/v1.5/Update.sh @@ -1,4 +1,41 @@ #!/bin/bash -echo $AIL_HOME +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +echo "Killing all screens ..." +bash -c "bash ${AIL_BIN}/LAUNCH.sh -k" +echo "" +echo "Starting ARDB ..." +bash -c "bash ${AIL_BIN}/launch_ardb.sh" + +flag_ardb=true +while $flag_ardb; do + sleep 1 + bash -c "bash ${AIL_BIN}/check_ardb.sh" + if [ $? == 0 ]; then + flag_ardb=false + else + echo "ARDB not available, waiting 5s before retry" + sleep 5 + fi +done + +echo "" +bash -c "python ${AIL_HOME}/update/v1.5/Update.py" + +echo "Shutting down ARDB ..." +bash -c "bash ${AIL_BIN}/LAUNCH.sh -k" + +echo "" + +exit 0 From ca47764836e44480d1807e0c07f1e3815c49556e Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 19 Dec 2018 15:16:52 +0100 Subject: [PATCH 18/85] fix: [update v1.5] --- bin/Update.py | 19 ++++++++++++------- update/v1.5/Update.py | 7 ++----- update/v1.5/Update.sh | 2 ++ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/bin/Update.py b/bin/Update.py index cc7cb7d1..f9cfbd9e 100755 --- a/bin/Update.py +++ b/bin/Update.py @@ -25,7 +25,6 @@ def auto_update_enabled(cfg): # check if files are modify locally def check_if_files_modified(): process = subprocess.run(['git', 'ls-files' ,'-m'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - if process.returncode == 0: modified_files = process.stdout if modified_files: @@ -197,7 +196,7 @@ def get_git_upper_tags_remote(current_tag, is_fork): def update_ail(current_tag, list_upper_tags_remote, current_version_path, is_fork): print('{}git checkout master:{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) process = subprocess.run(['git', 'checkout', 'master'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - + #process = subprocess.run(['ls'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: print(process.stdout.decode()) print() @@ -260,10 +259,16 @@ def launch_update_version(version, roll_back_commit, current_version_path, is_fo print('{}------------------------------------------------------------------'.format(TERMINAL_YELLOW)) print('- Launching Update: {}{}{} -'.format(TERMINAL_BLUE, version, TERMINAL_YELLOW)) print('-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --{}'.format(TERMINAL_DEFAULT)) - process = subprocess.run(['bash', update_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + process = subprocess.Popen(['bash', update_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + while True: + output = process.stdout.readline().decode() + if output == '' and process.poll() is not None: + break + if output: + print(output.strip()) if process.returncode == 0: - output = process.stdout.decode() - print(output) + #output = process.stdout.decode() + #print(output) with open(current_version_path, 'w') as version_content: version_content.write(version) @@ -273,8 +278,8 @@ def launch_update_version(version, roll_back_commit, current_version_path, is_fo print('------------------------------------------------------------------{}'.format(TERMINAL_DEFAULT)) print() else: - print(process.stdout.decode()) - print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) + #print(process.stdout.read().decode()) + print('{}{}{}'.format(TERMINAL_RED, process.stderr.read().decode(), TERMINAL_DEFAULT)) print('------------------------------------------------------------------') print(' {}Update Error: {}{}{}'.format(TERMINAL_RED, TERMINAL_BLUE, version, TERMINAL_DEFAULT)) print('------------------------------------------------------------------') diff --git a/update/v1.5/Update.py b/update/v1.5/Update.py index 2622976f..6a75b47a 100755 --- a/update/v1.5/Update.py +++ b/update/v1.5/Update.py @@ -142,11 +142,8 @@ if __name__ == '__main__': for key in r_serv_metadata.scan_iter(key_to_rename): new_key = key.replace(PASTES_FOLDER, '', 1) # a hset with this key already exist - if r_serv_metadata.exists(new_key): - print(key) - else: - keys_to_rename.append((key, new_key)) - index = index + 1 + keys_to_rename.append((key, new_key)) + index = index + 1 for key, new_key in keys_to_rename: r_serv_metadata.rename(key, new_key) keys_to_rename = None diff --git a/update/v1.5/Update.sh b/update/v1.5/Update.sh index f329c7c5..3cc45f01 100755 --- a/update/v1.5/Update.sh +++ b/update/v1.5/Update.sh @@ -30,6 +30,8 @@ while $flag_ardb; do fi done +echo "" +echo "Fixing ARDB ..." echo "" bash -c "python ${AIL_HOME}/update/v1.5/Update.py" From 516238025f8a54b3897b93b6b7e7b7562b159b78 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 5 Feb 2019 17:16:44 +0100 Subject: [PATCH 19/85] chg: [Crawler] add bootsrap4 src + refractor crawler --- bin/Crawler.py | 133 ++++++++++++++++++----------- bin/LAUNCH.sh | 2 +- bin/packages/config.cfg.sample | 4 +- bin/torcrawler/TorSplashCrawler.py | 6 +- bin/torcrawler/tor_crawler.py | 7 +- var/www/update_thirdparty.sh | 11 ++- 6 files changed, 106 insertions(+), 57 deletions(-) diff --git a/bin/Crawler.py b/bin/Crawler.py index e6b61a99..e1591d55 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -16,6 +16,24 @@ sys.path.append(os.environ['AIL_BIN']) from Helper import Process from pubsublogger import publisher +def decode_val(value): + if value is not None: + value = value.decode() + return value + +def load_type_blacklist(type_service): + # load domains blacklist + try: + with open(os.path.join(os.environ['AIL_BIN'],'/torcrawler/blacklist_{}.txt'.format(type_service)), 'r') as f: + # # TODO: # FIXME: remove this + r_onion.delete('blacklist_{}'.format(type_service)) + lines = f.read().splitlines() + for line in lines: + r_onion.sadd('blacklist_{}'.format(type_service), line) + except Exception: + pass + + def on_error_send_message_back_in_queue(type_hidden_service, domain, message): # send this msg back in the queue if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain): @@ -91,12 +109,16 @@ def crawl_onion(url, domain, date, date_month, message): if __name__ == '__main__': if len(sys.argv) != 3: - print('usage:', 'Crawler.py', 'type_hidden_service (onion or i2p or regular)', 'splash_port') + #print('usage:', 'Crawler.py', 'type_hidden_service (onion or i2p or regular)', 'splash_port') + print('usage:', 'Crawler.py', 'mode (manual or automatic)', 'splash_port') exit(1) - type_hidden_service = sys.argv[1] + mode = sys.argv[1] splash_port = sys.argv[2] + if mode == 'automatic': + type_hidden_service = 'onion' + publisher.port = 6380 publisher.channel = "Script" @@ -107,6 +129,16 @@ if __name__ == '__main__': # Setup the I/O queues p = Process(config_section) + accepted_services = ['onion', 'regular'] + + dic_regex = {} + dic_regex['onion'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" + re.compile(dic_regex['onion']) + dic_regex['i2p'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" + re.compile(dic_regex['i2p']) + dic_regex['regular'] = dic_regex['i2p'] + + url_onion = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" re.compile(url_onion) url_i2p = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" @@ -114,17 +146,15 @@ if __name__ == '__main__': if type_hidden_service == 'onion': regex_hidden_service = url_onion - splash_url = '{}:{}'.format( p.config.get("Crawler", "splash_url_onion"), splash_port) elif type_hidden_service == 'i2p': regex_hidden_service = url_i2p - splash_url = '{}:{}'.format( p.config.get("Crawler", "splash_url_i2p"), splash_port) elif type_hidden_service == 'regular': regex_hidden_service = url_i2p - splash_url = '{}:{}'.format( p.config.get("Crawler", "splash_url_onion"), splash_port) else: print('incorrect crawler type: {}'.format(type_hidden_service)) exit(0) + splash_url = '{}:{}'.format( p.config.get("Crawler", "splash_url_onion"), splash_port) print('splash url: {}'.format(splash_url)) crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit") @@ -150,19 +180,13 @@ if __name__ == '__main__': db=p.config.getint("ARDB_Onion", "db"), decode_responses=True) + # Crawler status r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting') r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) # load domains blacklist - try: - with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_onion.txt', 'r') as f: - r_onion.delete('blacklist_{}'.format(type_hidden_service)) - lines = f.read().splitlines() - for line in lines: - r_onion.sadd('blacklist_{}'.format(type_hidden_service), line) - except Exception: - pass + load_type_blacklist(type_hidden_service) while True: @@ -180,17 +204,24 @@ if __name__ == '__main__': url, paste = splitted paste = paste.replace(PASTES_FOLDER+'/', '') - url_list = re.findall(regex_hidden_service, url)[0] - if url_list[1] == '': + # extract data from url + faup.decode(url) + url_unpack = faup.get() + url = decode_val(url_unpack['url']) + port = decode_val(url_unpack['port']) + scheme = decode_val(url_unpack['scheme']) + domain = decode_val(url_unpack['domain']) + host = decode_val(url_unpack['domain']) + + # Add Scheme to url + if scheme is None: url= 'http://{}'.format(url) - - link, s, credential, subdomain, domain, host, port, \ - resource_path, query_string, f1, f2, f3, f4 = url_list - domain = url_list[4] - r_onion.srem('{}_domain_crawler_queue'.format(type_hidden_service), domain) - domain_url = 'http://{}'.format(domain) + + # remove url to crawl from queue + r_onion.srem('{}_domain_crawler_queue'.format(type_hidden_service), domain) + print() print() print('\033[92m------------------START CRAWLER------------------\033[0m') @@ -200,10 +231,7 @@ if __name__ == '__main__': print('domain: {}'.format(domain)) print('domain_url: {}'.format(domain_url)) - faup.decode(domain) - onion_domain=faup.get()['domain'].decode() - - if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain) and not r_onion.sismember('blacklist_{}'.format(type_hidden_service), onion_domain): + if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain): date = datetime.datetime.now().strftime("%Y%m%d") date_month = datetime.datetime.now().strftime("%Y%m") @@ -219,17 +247,24 @@ if __name__ == '__main__': # last check r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'last_check', date) + # Launch Scrapy-Splash Crawler crawl_onion(url, domain, date, date_month, message) + # Crawl Domain if url != domain_url: - print(url) + #Crawl Domain with port number + if port is not None: + print('{}:{}'.format(domain_url, port)) + crawl_onion('{}:{}'.format(domain_url, port), domain, date, date_month, message) + #Crawl without port number print(domain_url) crawl_onion(domain_url, domain, date, date_month, message) + # update last check + r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'last_check', date) + # save down onion if not r_onion.sismember('{}_up:{}'.format(type_hidden_service, date), domain): r_onion.sadd('{}_down:{}'.format(type_hidden_service, date), domain) - #r_onion.sadd('{}_down_link:{}'.format(type_hidden_service, date), url) - #r_onion.hincrby('{}_link_down'.format(type_hidden_service), url, 1) else: #r_onion.hincrby('{}_link_up'.format(type_hidden_service), url, 1) if r_onion.sismember('month_{}_up:{}'.format(type_hidden_service, date_month), domain) and r_serv_metadata.exists('paste_children:'+paste): @@ -241,28 +276,28 @@ if __name__ == '__main__': if r_onion.lindex('{}_history:{}'.format(type_hidden_service, domain), 0) != date: r_onion.lpush('{}_history:{}'.format(type_hidden_service, domain), date) # add crawled history by date - r_onion.lpush('{}_history:{}:{}'.format(type_hidden_service, domain, date), paste) #add datetime here + r_onion.lpush('{}_history:{}:{}'.format(type_hidden_service, domain, date), paste) + if mode == 'automatic': + # check external onions links (full_crawl) + external_domains = set() + for link in r_onion.smembers('domain_{}_external_links:{}'.format(type_hidden_service, domain)): + external_domain = re.findall(dic_regex[type_hidden_service], link) + external_domain.extend(re.findall(url_i2p, link)) + if len(external_domain) > 0: + external_domain = external_domain[0][4] + else: + continue + if '.onion' in external_domain and external_domain != domain: + external_domains.add(external_domain) + elif '.i2p' in external_domain and external_domain != domain: + external_domains.add(external_domain) + if len(external_domains) >= 10: + r_onion.sadd('{}_potential_source'.format(type_hidden_service), domain) + r_onion.delete('domain_{}_external_links:{}'.format(type_hidden_service, domain)) + print(r_onion.smembers('domain_{}_external_links:{}'.format(type_hidden_service, domain))) - # check external onions links (full_scrawl) - external_domains = set() - for link in r_onion.smembers('domain_{}_external_links:{}'.format(type_hidden_service, domain)): - external_domain = re.findall(url_onion, link) - external_domain.extend(re.findall(url_i2p, link)) - if len(external_domain) > 0: - external_domain = external_domain[0][4] - else: - continue - if '.onion' in external_domain and external_domain != domain: - external_domains.add(external_domain) - elif '.i2p' in external_domain and external_domain != domain: - external_domains.add(external_domain) - if len(external_domains) >= 10: - r_onion.sadd('{}_potential_source'.format(type_hidden_service), domain) - r_onion.delete('domain_{}_external_links:{}'.format(type_hidden_service, domain)) - print(r_onion.smembers('domain_{}_external_links:{}'.format(type_hidden_service, domain))) - - # update list, last crawled onions + # update list, last crawled sites r_onion.lpush('last_{}'.format(type_hidden_service), domain) r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15) @@ -270,7 +305,7 @@ if __name__ == '__main__': r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting') r_cache.hdel('metadata_crawler:{}'.format(splash_port), 'crawling_domain') else: - print(' Blacklisted Onion') + print(' Blacklisted Site') print() print() diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 549c0425..dd5a0517 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -221,7 +221,7 @@ function launching_scripts { function launching_crawler { if [[ ! $iscrawler ]]; then CONFIG=$AIL_BIN/packages/config.cfg - lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_onion_port/{print $3;exit}' "${CONFIG}") + lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_port/{print $3;exit}' "${CONFIG}") IFS='-' read -ra PORTS <<< "$lport" if [ ${#PORTS[@]} -eq 1 ] diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index ace656cc..f9483476 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -249,5 +249,5 @@ db = 0 [Crawler] activate_crawler = False crawler_depth_limit = 1 -splash_url_onion = http://127.0.0.1 -splash_onion_port = 8050-8052 +splash_url = http://127.0.0.1 +splash_port = 8050-8052 diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 99a4f3b3..b5a5c1f9 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -28,10 +28,10 @@ from Helper import Process class TorSplashCrawler(): - def __init__(self, splash_url, crawler_depth_limit): + def __init__(self, splash_url, crawler_depth_limit, user_agent, closespider_pagecount): self.process = CrawlerProcess({'LOG_ENABLED': False}) self.crawler = Crawler(self.TorSplashSpider, { - 'USER_AGENT': 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0', + 'USER_AGENT': user_agent, 'SPLASH_URL': splash_url, 'ROBOTSTXT_OBEY': False, 'DOWNLOADER_MIDDLEWARES': {'scrapy_splash.SplashCookiesMiddleware': 723, @@ -42,7 +42,7 @@ class TorSplashCrawler(): 'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter', 'HTTPERROR_ALLOW_ALL': True, 'RETRY_TIMES': 2, - 'CLOSESPIDER_PAGECOUNT': 50, + 'CLOSESPIDER_PAGECOUNT': closespider_pagecount, 'DEPTH_LIMIT': crawler_depth_limit }) diff --git a/bin/torcrawler/tor_crawler.py b/bin/torcrawler/tor_crawler.py index 58e8331b..99bda837 100755 --- a/bin/torcrawler/tor_crawler.py +++ b/bin/torcrawler/tor_crawler.py @@ -30,5 +30,10 @@ if __name__ == '__main__': paste = sys.argv[5] super_father = sys.argv[6] - crawler = TorSplashCrawler(splash_url, crawler_depth_limit) + tor_browser_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0' + user_agent = tor_browser_agent + + closespider_pagecount = 50 + + crawler = TorSplashCrawler(splash_url, crawler_depth_limit, user_agent, closespider_pagecount) crawler.crawl(type, url, domain, paste, super_father) diff --git a/var/www/update_thirdparty.sh b/var/www/update_thirdparty.sh index 258fa7ca..01a73136 100755 --- a/var/www/update_thirdparty.sh +++ b/var/www/update_thirdparty.sh @@ -5,12 +5,14 @@ set -e wget http://dygraphs.com/dygraph-combined.js -O ./static/js/dygraph-combined.js SBADMIN_VERSION='3.3.7' +BOOTSTRAP_VERSION='4.2.1' FONT_AWESOME_VERSION='4.7.0' D3_JS_VERSION='5.5.0' rm -rf temp mkdir temp +wget https://github.com/twbs/bootstrap/releases/download/v${BOOTSTRAP_VERSION}/bootstrap-${BOOTSTRAP_VERSION}-dist.zip -O temp/bootstrap${BOOTSTRAP_VERSION}.zip wget https://github.com/BlackrockDigital/startbootstrap-sb-admin/archive/v${SBADMIN_VERSION}.zip -O temp/${SBADMIN_VERSION}.zip wget https://github.com/BlackrockDigital/startbootstrap-sb-admin-2/archive/v${SBADMIN_VERSION}.zip -O temp/${SBADMIN_VERSION}-2.zip wget https://github.com/FortAwesome/Font-Awesome/archive/v${FONT_AWESOME_VERSION}.zip -O temp/FONT_AWESOME_${FONT_AWESOME_VERSION}.zip @@ -20,7 +22,7 @@ wget https://github.com/d3/d3/releases/download/v${D3_JS_VERSION}/d3.zip -O tem wget https://github.com/moment/moment/archive/2.22.2.zip -O temp/moment_2.22.2.zip wget https://github.com/longbill/jquery-date-range-picker/archive/v0.18.0.zip -O temp/daterangepicker_v0.18.0.zip - +unzip temp/bootstrap${BOOTSTRAP_VERSION}.zip -d temp/ unzip temp/${SBADMIN_VERSION}.zip -d temp/ unzip temp/${SBADMIN_VERSION}-2.zip -d temp/ unzip temp/FONT_AWESOME_${FONT_AWESOME_VERSION}.zip -d temp/ @@ -29,6 +31,10 @@ unzip temp/d3_${D3_JS_VERSION}.zip -d temp/ unzip temp/moment_2.22.2.zip -d temp/ unzip temp/daterangepicker_v0.18.0.zip -d temp/ +mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/js/bootstrap.min.js ./static/js/ +mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css ./static/css/ +mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css.map ./static/css/ + mv temp/startbootstrap-sb-admin-${SBADMIN_VERSION} temp/sb-admin mv temp/startbootstrap-sb-admin-2-${SBADMIN_VERSION} temp/sb-admin-2 mv temp/Font-Awesome-${FONT_AWESOME_VERSION} temp/font-awesome @@ -59,6 +65,9 @@ wget https://cdn.datatables.net/1.10.12/js/jquery.dataTables.min.js -O ./static/ wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTables.bootstrap.css -O ./static/css/dataTables.bootstrap.css wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTables.bootstrap.js -O ./static/js/dataTables.bootstrap.js +wget https://cdn.datatables.net/1.10.18/css/dataTables.bootstrap4.min.css -O ./static/css/dataTables.bootstrap4.min.css +wget https://cdn.datatables.net/1.10.18/js/dataTables.bootstrap4.min.js -O ./static/js/dataTables.bootstrap4.min.js + #Ressource for graph wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.js -O ./static/js/jquery.flot.js wget https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb6109df5c1e8003/jquery.flot.pie.js -O ./static/js/jquery.flot.pie.js From c2885589cf6cd57b6082d471f88352fcbf2d871d Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 7 Feb 2019 17:22:44 +0100 Subject: [PATCH 20/85] chg: [UI] basic navbar + sidebar + refractor --- bin/Crawler.py | 118 +++++++++--------- .../hiddenServices/Flask_hiddenServices.py | 4 + .../templates/Crawler_index.html | 82 ++++++++++++ var/www/templates/nav_bar.html | 46 +++++++ var/www/update_thirdparty.sh | 6 +- 5 files changed, 192 insertions(+), 64 deletions(-) create mode 100644 var/www/modules/hiddenServices/templates/Crawler_index.html create mode 100644 var/www/templates/nav_bar.html diff --git a/bin/Crawler.py b/bin/Crawler.py index e1591d55..278ecc05 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -16,6 +16,47 @@ sys.path.append(os.environ['AIL_BIN']) from Helper import Process from pubsublogger import publisher +# ======== GLOBAL VARIABLES ======== +publisher.port = 6380 +publisher.channel = "Script" + +config_section = 'Crawler' + +# Setup the I/O queues +p = Process(config_section) + +accepted_services = ['onion', 'regular'] + +dic_regex = {} +dic_regex['onion'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" +re.compile(dic_regex['onion']) +dic_regex['i2p'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" +re.compile(dic_regex['i2p']) +dic_regex['regular'] = dic_regex['i2p'] + +faup = Faup() + +PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) + +r_serv_metadata = redis.StrictRedis( + host=p.config.get("ARDB_Metadata", "host"), + port=p.config.getint("ARDB_Metadata", "port"), + db=p.config.getint("ARDB_Metadata", "db"), + decode_responses=True) + +r_cache = redis.StrictRedis( + host=p.config.get("Redis_Cache", "host"), + port=p.config.getint("Redis_Cache", "port"), + db=p.config.getint("Redis_Cache", "db"), + decode_responses=True) + +r_onion = redis.StrictRedis( + host=p.config.get("ARDB_Onion", "host"), + port=p.config.getint("ARDB_Onion", "port"), + db=p.config.getint("ARDB_Onion", "db"), + decode_responses=True) + +# ======== FUNCTIONS ======== def decode_val(value): if value is not None: value = value.decode() @@ -105,7 +146,7 @@ def crawl_onion(url, domain, date, date_month, message): r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') exit(1) - +# ======== MAIN ======== if __name__ == '__main__': if len(sys.argv) != 3: @@ -119,83 +160,38 @@ if __name__ == '__main__': if mode == 'automatic': type_hidden_service = 'onion' - publisher.port = 6380 - publisher.channel = "Script" - - publisher.info("Script Crawler started") - - config_section = 'Crawler' - - # Setup the I/O queues - p = Process(config_section) - - accepted_services = ['onion', 'regular'] - - dic_regex = {} - dic_regex['onion'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" - re.compile(dic_regex['onion']) - dic_regex['i2p'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" - re.compile(dic_regex['i2p']) - dic_regex['regular'] = dic_regex['i2p'] - - - url_onion = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" - re.compile(url_onion) - url_i2p = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" - re.compile(url_i2p) - - if type_hidden_service == 'onion': - regex_hidden_service = url_onion - elif type_hidden_service == 'i2p': - regex_hidden_service = url_i2p - elif type_hidden_service == 'regular': - regex_hidden_service = url_i2p - else: + # verify crawler type (type_hidden_service) + if type_hidden_service not in accepted_services: print('incorrect crawler type: {}'.format(type_hidden_service)) exit(0) + else: + publisher.info("Script Crawler started") + + # load domains blacklist + load_type_blacklist(type_hidden_service) splash_url = '{}:{}'.format( p.config.get("Crawler", "splash_url_onion"), splash_port) print('splash url: {}'.format(splash_url)) crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit") - faup = Faup() - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) - - r_serv_metadata = redis.StrictRedis( - host=p.config.get("ARDB_Metadata", "host"), - port=p.config.getint("ARDB_Metadata", "port"), - db=p.config.getint("ARDB_Metadata", "db"), - decode_responses=True) - - r_cache = redis.StrictRedis( - host=p.config.get("Redis_Cache", "host"), - port=p.config.getint("Redis_Cache", "port"), - db=p.config.getint("Redis_Cache", "db"), - decode_responses=True) - - r_onion = redis.StrictRedis( - host=p.config.get("ARDB_Onion", "host"), - port=p.config.getint("ARDB_Onion", "port"), - db=p.config.getint("ARDB_Onion", "db"), - decode_responses=True) # Crawler status r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting') r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) - # load domains blacklist - load_type_blacklist(type_hidden_service) while True: - # Priority Queue - Recovering the streamed message informations. - message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service)) + if mode == 'automatic': + # Priority Queue - Recovering the streamed message informations. + message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service)) - if message is None: - # Recovering the streamed message informations. - message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) + if message is None: + # Recovering the streamed message informations. + message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) + else: + pass if message is not None: diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index cc977976..965255fb 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -75,6 +75,10 @@ def get_onion_status(domain, date): return False # ============= ROUTES ============== +@hiddenServices.route("/hiddenServices/2", methods=['GET']) +def hiddenServices_page_test(): + return render_template("Crawler_index.html") + @hiddenServices.route("/hiddenServices/", methods=['GET']) def hiddenServices_page(): last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) diff --git a/var/www/modules/hiddenServices/templates/Crawler_index.html b/var/www/modules/hiddenServices/templates/Crawler_index.html new file mode 100644 index 00000000..5b2137ff --- /dev/null +++ b/var/www/modules/hiddenServices/templates/Crawler_index.html @@ -0,0 +1,82 @@ + + + + + AIL-Framework + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + + + + + + + + +
+
+					--------------
+
+
+
+					--------------
+
+				
+
+ +
+
+ + + + + diff --git a/var/www/templates/nav_bar.html b/var/www/templates/nav_bar.html new file mode 100644 index 00000000..d10c6cea --- /dev/null +++ b/var/www/templates/nav_bar.html @@ -0,0 +1,46 @@ + diff --git a/var/www/update_thirdparty.sh b/var/www/update_thirdparty.sh index 01a73136..e03d2af7 100755 --- a/var/www/update_thirdparty.sh +++ b/var/www/update_thirdparty.sh @@ -31,9 +31,9 @@ unzip temp/d3_${D3_JS_VERSION}.zip -d temp/ unzip temp/moment_2.22.2.zip -d temp/ unzip temp/daterangepicker_v0.18.0.zip -d temp/ -mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/js/bootstrap.min.js ./static/js/ -mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css ./static/css/ -mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css.map ./static/css/ +mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/js/bootstrap.min.js ./static/js/bootstrap4.min.js +mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css ./static/css/bootstrap4.min.css +mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css.map ./static/css/bootstrap4.min.css.map mv temp/startbootstrap-sb-admin-${SBADMIN_VERSION} temp/sb-admin mv temp/startbootstrap-sb-admin-2-${SBADMIN_VERSION} temp/sb-admin-2 From 674a9a2591391106d4ae8913432d14287298f640 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 8 Feb 2019 17:16:58 +0100 Subject: [PATCH 21/85] chg: [UI] font icon v5 + navbar + sidebar --- .../hiddenServices/Flask_hiddenServices.py | 49 ++++ .../templates/Crawler_Splash_onion.html | 260 ++++++++++++++++++ .../templates/Crawler_index.html | 62 ++--- var/www/templates/nav_bar.html | 38 +-- var/www/update_thirdparty.sh | 11 +- 5 files changed, 368 insertions(+), 52 deletions(-) create mode 100644 var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index 965255fb..d61466ef 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -79,6 +79,55 @@ def get_onion_status(domain, date): def hiddenServices_page_test(): return render_template("Crawler_index.html") +@hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET']) +def hiddenServices_page(): + last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) + list_onion = [] + + now = datetime.datetime.now() + date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d")) + statDomains = {} + statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date)) + statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date)) + statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] + statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue') + + for onion in last_onions: + metadata_onion = {} + metadata_onion['domain'] = onion + metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check') + if metadata_onion['last_check'] is None: + metadata_onion['last_check'] = '********' + metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen') + if metadata_onion['first_seen'] is None: + metadata_onion['first_seen'] = '********' + if get_onion_status(onion, metadata_onion['last_check']): + metadata_onion['status_text'] = 'UP' + metadata_onion['status_color'] = 'Green' + metadata_onion['status_icon'] = 'fa-check-circle' + else: + metadata_onion['status_text'] = 'DOWN' + metadata_onion['status_color'] = 'Red' + metadata_onion['status_icon'] = 'fa-times-circle' + list_onion.append(metadata_onion) + + crawler_metadata=[] + all_onion_crawler = r_cache.smembers('all_crawler:onion') + for crawler in all_onion_crawler: + crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain') + started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time') + status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status') + crawler_info = '{} - {}'.format(crawler, started_time) + if status_info=='Waiting' or status_info=='Crawling': + status=True + else: + status=False + crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status}) + + date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) + return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains, + crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string) + @hiddenServices.route("/hiddenServices/", methods=['GET']) def hiddenServices_page(): last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html new file mode 100644 index 00000000..4ae0b358 --- /dev/null +++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html @@ -0,0 +1,260 @@ + + + + + AIL-Framework + + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + + + + +
+
+					--------------
+
+
+
+					--------------
+
+				
+
+ +
+
+ + + + + + + + + diff --git a/var/www/modules/hiddenServices/templates/Crawler_index.html b/var/www/modules/hiddenServices/templates/Crawler_index.html index 5b2137ff..faccf26a 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_index.html +++ b/var/www/modules/hiddenServices/templates/Crawler_index.html @@ -6,16 +6,12 @@ - + - - @@ -26,35 +22,39 @@
+ diff --git a/var/www/templates/nav_bar.html b/var/www/templates/nav_bar.html index d10c6cea..fe455ee8 100644 --- a/var/www/templates/nav_bar.html +++ b/var/www/templates/nav_bar.html @@ -10,37 +10,39 @@