diff --git a/bin/Attributes.py b/bin/Attributes.py index 66e22f39..96471a79 100755 --- a/bin/Attributes.py +++ b/bin/Attributes.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -33,7 +33,7 @@ if __name__ == "__main__": PST = Paste.Paste(message) else: publisher.debug("Script Attribute is idling 1s") - print 'sleeping' + print('sleeping') time.sleep(1) continue @@ -45,6 +45,6 @@ if __name__ == "__main__": # FIXME Not used. PST.store.sadd("Pastes_Objects", PST.p_path) except IOError: - print "CRC Checksum Failed on :", PST.p_path + print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/Categ.py b/bin/Categ.py index 3bf68664..6bced9ce 100755 --- a/bin/Categ.py +++ b/bin/Categ.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ The ZMQ_PubSub_Categ Module @@ -73,7 +73,7 @@ if __name__ == "__main__": bname = os.path.basename(filename) tmp_dict[bname] = [] with open(os.path.join(args.d, filename), 'r') as f: - patterns = [r'%s' % re.escape(s.strip()) for s in f] + patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f] tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE) prec_filename = None @@ -82,18 +82,25 @@ if __name__ == "__main__": filename = p.get_from_set() if filename is None: publisher.debug("Script Categ is Idling 10s") - print 'Sleeping' + print('Sleeping') time.sleep(10) continue paste = Paste.Paste(filename) content = paste.get_p_content() + print('-----------------------------------------------------') + print(filename) + print(content) + print('-----------------------------------------------------') + for categ, pattern in tmp_dict.items(): found = set(re.findall(pattern, content)) if len(found) >= matchingThreshold: msg = '{} {}'.format(paste.p_path, len(found)) - print msg, categ + #msg = " ".join( [paste.p_path, bytes(len(found))] ) + + print(msg, categ) p.populate_set_out(msg, categ) publisher.info( diff --git a/bin/Credential.py b/bin/Credential.py index 29f80f88..e42e773e 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -48,7 +48,7 @@ if __name__ == "__main__": config_section = "Credential" p = Process(config_section) publisher.info("Find credentials") - + minimumLengthThreshold = p.config.getint("Credential", "minimumLengthThreshold") faup = Faup() @@ -61,26 +61,34 @@ if __name__ == "__main__": minTopPassList = p.config.getint("Credential", "minTopPassList") regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" - regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" + #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" + regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" while True: message = p.get_from_set() if message is None: publisher.debug("Script Credential is Idling 10s") print('sleeping 10s') - time.sleep(10) + time.sleep(1) continue - filepath, count = message.split() + filepath, count = message.split(' ') - if count < minTopPassList: + #if count < minTopPassList: # Less than 5 matches from the top password list, false positive. - print("false positive:", count) - continue + #print("false positive:", count) + #continue paste = Paste.Paste(filepath) content = paste.get_p_content() creds = set(re.findall(regex_cred, content)) + print(len(creds)) + print(creds) + print(content) + print('-----') + + publisher.warning('to_print') + if len(creds) == 0: continue @@ -89,7 +97,7 @@ if __name__ == "__main__": message = 'Checked {} credentials found.'.format(len(creds)) if sites_set: - message += ' Related websites: {}'.format(', '.join(sites_set)) + message += ' Related websites: {}'.format( (', '.join(sites_set)) ) to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path) @@ -97,13 +105,14 @@ if __name__ == "__main__": #num of creds above tresh, publish an alert if len(creds) > criticalNumberToAlert: - print("========> Found more than 10 credentials in this file : {}".format(filepath)) + print("========> Found more than 10 credentials in this file : {}".format( filepath )) publisher.warning(to_print) #Send to duplicate p.populate_set_out(filepath, 'Duplicate') #Send to alertHandler - p.populate_set_out('credential;{}'.format(filepath), 'alertHandler') - + msg = 'credential;{}'.format(filepath) + p.populate_set_out(msg, 'alertHandler') + #Put in form, count occurences, then send to moduleStats creds_sites = {} site_occurence = re.findall(regex_site_for_stats, content) @@ -122,9 +131,10 @@ if __name__ == "__main__": else: creds_sites[domain] = 1 - for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats - print 'credential;{};{};{}'.format(num, site, paste.p_date) - p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') + for site, num in creds_sites.items(): # Send for each different site to moduleStats + mssg = 'credential;{};{};{}'.format(num, site, paste.p_date) + print(mssg) + p.populate_set_out(mssg, 'ModuleStats') if sites_set: print("=======> Probably on : {}".format(', '.join(sites_set))) @@ -148,7 +158,7 @@ if __name__ == "__main__": uniq_num_cred = server_cred.incr(REDIS_KEY_NUM_USERNAME) server_cred.hmset(REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred}) server_cred.hmset(REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred}) - + #Add the mapping between the credential and the path server_cred.sadd(REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path) @@ -158,4 +168,3 @@ if __name__ == "__main__": for partCred in splitedCred: if len(partCred) > minimumLengthThreshold: server_cred.sadd(partCred, uniq_num_cred) - diff --git a/bin/CreditCards.py b/bin/CreditCards.py index 133916fe..4d1f4d1f 100755 --- a/bin/CreditCards.py +++ b/bin/CreditCards.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -18,6 +18,7 @@ from packages import Paste from packages import lib_refine from pubsublogger import publisher import re +import sys from Helper import Process @@ -58,13 +59,14 @@ if __name__ == "__main__": content = paste.get_p_content() all_cards = re.findall(regex, content) if len(all_cards) > 0: - print 'All matching', all_cards + print('All matching', all_cards) creditcard_set = set([]) for card in all_cards: clean_card = re.sub('[^0-9]', '', card) + clean_card = clean_card if lib_refine.is_luhn_valid(clean_card): - print clean_card, 'is valid' + print(clean_card, 'is valid') creditcard_set.add(clean_card) paste.__setattr__(channel, creditcard_set) @@ -76,13 +78,15 @@ if __name__ == "__main__": if (len(creditcard_set) > 0): publisher.warning('{}Checked {} valid number(s);{}'.format( to_print, len(creditcard_set), paste.p_path)) + print('{}Checked {} valid number(s);{}'.format( + to_print, len(creditcard_set), paste.p_path)) #Send to duplicate p.populate_set_out(filename, 'Duplicate') #send to Browse_warning_paste - p.populate_set_out('creditcard;{}'.format(filename), 'alertHandler') + msg = 'creditcard;{}'.format(filename) + p.populate_set_out(msg, 'alertHandler') else: publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path)) else: publisher.debug("Script creditcard is idling 1m") time.sleep(10) - diff --git a/bin/Curve.py b/bin/Curve.py index 712f6006..330987f2 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module. @@ -53,12 +53,12 @@ def check_if_tracked_term(term, path): #add_paste to tracked_word_set set_name = "tracked_" + term server_term.sadd(set_name, path) - print term, 'addded', set_name, '->', path + print(term, 'addded', set_name, '->', path) p.populate_set_out("New Term added", 'CurveManageTopSets') # Send a notification only when the member is in the set if term in server_term.smembers(TrackedTermsNotificationEnabled_Name): - + # Send to every associated email adress for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + term): sendEmailNotification(email, term) @@ -137,7 +137,7 @@ if __name__ == "__main__": server_term.zincrby(curr_set, low_word, float(score)) #1 term per paste server_term.zincrby("per_paste_" + curr_set, low_word, float(1)) - + #Add more info for tracked terms check_if_tracked_term(low_word, filename) @@ -149,15 +149,16 @@ if __name__ == "__main__": if generate_new_graph: generate_new_graph = False - print 'Building graph' + print('Building graph') today = datetime.date.today() year = today.year month = today.month + lib_words.create_curve_with_word_file(r_serv1, csv_path, wordfile_path, year, month) publisher.debug("Script Curve is Idling") - print "sleeping" + print("sleeping") time.sleep(10) message = p.get_from_set() diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py index eea46a8c..1b37d21a 100755 --- a/bin/CurveManageTopSets.py +++ b/bin/CurveManageTopSets.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -16,7 +16,7 @@ from packages import lib_words import datetime import calendar import os -import ConfigParser +import configparser # Config Variables Refresh_rate = 60*5 #sec @@ -68,26 +68,26 @@ def manage_top_set(): # convert dico into sorted array array_month = [] - for w, v in dico.iteritems(): + for w, v in dico.items(): array_month.append((w, v)) array_month.sort(key=lambda tup: -tup[1]) array_month = array_month[0:20] array_week = [] - for w, v in dico_week.iteritems(): + for w, v in dico_week.items(): array_week.append((w, v)) array_week.sort(key=lambda tup: -tup[1]) array_week = array_week[0:20] # convert dico_per_paste into sorted array array_month_per_paste = [] - for w, v in dico_per_paste.iteritems(): + for w, v in dico_per_paste.items(): array_month_per_paste.append((w, v)) array_month_per_paste.sort(key=lambda tup: -tup[1]) array_month_per_paste = array_month_per_paste[0:20] array_week_per_paste = [] - for w, v in dico_week_per_paste.iteritems(): + for w, v in dico_week_per_paste.items(): array_week_per_paste.append((w, v)) array_week_per_paste.sort(key=lambda tup: -tup[1]) array_week_per_paste = array_week_per_paste[0:20] @@ -105,7 +105,7 @@ def manage_top_set(): server_term.zadd(top_termFreq_setName_week[0], float(elem[1]), elem[0]) for elem in array_week_per_paste: server_term.zadd("per_paste_" + top_termFreq_setName_week[0], float(elem[1]), elem[0]) - + for elem in array_month: server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0]) for elem in array_month_per_paste: @@ -114,7 +114,7 @@ def manage_top_set(): timestamp = int(time.mktime(datetime.datetime.now().timetuple())) value = str(timestamp) + ", " + "-" r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) - print "refreshed module" + print("refreshed module") @@ -130,8 +130,8 @@ if __name__ == '__main__': raise Exception('Unable to find the configuration file. \ Did you set environment variables? \ Or activate the virtualenv.') - - cfg = ConfigParser.ConfigParser() + + cfg = configparser.ConfigParser() cfg.read(configfile) @@ -162,4 +162,3 @@ if __name__ == '__main__': # Get one message from the input queue (module only work if linked with a queue) time.sleep(Refresh_rate) # sleep a long time then manage the set manage_top_set() - diff --git a/bin/Cve.py b/bin/Cve.py index 62df0aba..f417d7a9 100755 --- a/bin/Cve.py +++ b/bin/Cve.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ The CVE Module @@ -32,7 +32,8 @@ def search_cve(message): publisher.warning('{} contains CVEs'.format(paste.p_name)) #send to Browse_warning_paste - p.populate_set_out('cve;{}'.format(filepath), 'alertHandler') + msg = 'cve;{}'.format(filepath) + p.populate_set_out(msg, 'alertHandler') #Send to duplicate p.populate_set_out(filepath, 'Duplicate') @@ -63,4 +64,3 @@ if __name__ == '__main__': # Do something with the message from the queue search_cve(message) - diff --git a/bin/Dir.py b/bin/Dir.py index 6156c579..570d3dec 100755 --- a/bin/Dir.py +++ b/bin/Dir.py @@ -1,18 +1,18 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* import argparse import redis from pubsublogger import publisher from packages.lib_words import create_dirfile -import ConfigParser +import configparser def main(): """Main Function""" # CONFIG # - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read('./packages/config.cfg') parser = argparse.ArgumentParser( diff --git a/bin/DomClassifier.py b/bin/DomClassifier.py index c205cb01..f1362365 100755 --- a/bin/DomClassifier.py +++ b/bin/DomClassifier.py @@ -1,11 +1,11 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ The DomClassifier Module ============================ -The DomClassifier modules extract and classify Internet domains/hostnames/IP addresses from +The DomClassifier modules extract and classify Internet domains/hostnames/IP addresses from the out output of the Global module. """ @@ -44,6 +44,7 @@ def main(): continue paste = PST.get_p_content() mimetype = PST._get_p_encoding() + if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() @@ -59,7 +60,7 @@ def main(): publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) except IOError: - print "CRC Checksum Failed on :", PST.p_path + print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/DumpValidOnion.py b/bin/DumpValidOnion.py index dad5ea9b..14efc0ba 100755 --- a/bin/DumpValidOnion.py +++ b/bin/DumpValidOnion.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* from pubsublogger import publisher @@ -23,7 +23,7 @@ if __name__ == "__main__": if message is not None: f = open(dump_file, 'a') while message is not None: - print message + print(message) date = datetime.datetime.now() if message is not None: f.write(date.isoformat() + ' ' + message + '\n') diff --git a/bin/Duplicates.py b/bin/Duplicates.py index 50def29f..98858174 100755 --- a/bin/Duplicates.py +++ b/bin/Duplicates.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -42,15 +42,15 @@ if __name__ == "__main__": threshold_duplicate_ssdeep = int(p.config.get("Modules_Duplicates", "threshold_duplicate_ssdeep")) threshold_duplicate_tlsh = int(p.config.get("Modules_Duplicates", "threshold_duplicate_tlsh")) threshold_set = {} - threshold_set['ssdeep'] = threshold_duplicate_ssdeep - threshold_set['tlsh'] = threshold_duplicate_tlsh + threshold_set['ssdeep'] = threshold_duplicate_ssdeep + threshold_set['tlsh'] = threshold_duplicate_tlsh min_paste_size = float(p.config.get("Modules_Duplicates", "min_paste_size")) # REDIS # dico_redis = {} date_today = datetime.today() - for year in xrange(2013, date_today.year+1): - for month in xrange(0, 13): + for year in range(2013, date_today.year+1): + for month in range(0, 13): dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( host=p.config.get("Redis_Level_DB", "host"), port=year, db=month) @@ -90,7 +90,7 @@ if __name__ == "__main__": # Get the date of the range date_range = date_today - timedelta(days = maximum_month_range*30.4166666) num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month) - for diff_month in xrange(0, num_of_month+1): + for diff_month in range(0, num_of_month+1): curr_date_range = date_today - timedelta(days = diff_month*30.4166666) to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2) dico_range_list.append(to_append) @@ -102,7 +102,7 @@ if __name__ == "__main__": yearly_index = str(date_today.year)+'00' r_serv0 = dico_redis[yearly_index] r_serv0.incr("current_index") - index = r_serv0.get("current_index")+str(PST.p_date) + index = (r_serv0.get("current_index")).decode('utf8') + str(PST.p_date) # Open selected dico range opened_dico = [] @@ -114,11 +114,13 @@ if __name__ == "__main__": # Go throught the Database of the dico (of the month) for curr_dico_name, curr_dico_redis in opened_dico: - for hash_type, paste_hash in paste_hashes.iteritems(): + for hash_type, paste_hash in paste_hashes.items(): for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type): + dico_hash = dico_hash.decode('utf8') + try: if hash_type == 'ssdeep': - percent = 100-ssdeep.compare(dico_hash, paste_hash) + percent = 100-ssdeep.compare(dico_hash, paste_hash) else: percent = tlsh.diffxlen(dico_hash, paste_hash) @@ -130,15 +132,18 @@ if __name__ == "__main__": # index of paste index_current = r_serv_dico.get(dico_hash) + index_current = index_current.decode('utf8') paste_path = r_serv_dico.get(index_current) + paste_path = paste_path.decode('utf8') paste_date = r_serv_dico.get(index_current+'_date') + paste_date = paste_date.decode('utf8') paste_date = paste_date if paste_date != None else "No date available" if paste_path != None: hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date) - print '['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent) - except Exception,e: - print str(e) + print('['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent)) + except Exception: + print(str(e)) #print 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash # Add paste in DB after checking to prevent its analysis twice @@ -147,7 +152,7 @@ if __name__ == "__main__": r_serv1.set(index+'_date', PST._get_p_date()) r_serv1.sadd("INDEX", index) # Adding hashes in Redis - for hash_type, paste_hash in paste_hashes.iteritems(): + for hash_type, paste_hash in paste_hashes.items(): r_serv1.set(paste_hash, index) r_serv1.sadd("HASHS_"+hash_type, paste_hash) @@ -166,7 +171,7 @@ if __name__ == "__main__": PST.__setattr__("p_duplicate", dupl) PST.save_attribute_redis("p_duplicate", dupl) publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path)) - print '{}Detected {}'.format(to_print, len(dupl)) + print('{}Detected {}'.format(to_print, len(dupl))) y = time.time() @@ -176,5 +181,5 @@ if __name__ == "__main__": except IOError: to_print = 'Duplicate;{};{};{};'.format( PST.p_source, PST.p_date, PST.p_name) - print "CRC Checksum Failed on :", PST.p_path + print("CRC Checksum Failed on :", PST.p_path) publisher.error('{}CRC Checksum Failed'.format(to_print)) diff --git a/bin/Duplicates_old.py b/bin/Duplicates_old.py index 59610f83..2ac66333 100755 --- a/bin/Duplicates_old.py +++ b/bin/Duplicates_old.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -33,8 +33,8 @@ if __name__ == "__main__": # DB OBJECT & HASHS ( DISK ) # FIXME increase flexibility dico_redis = {} - for year in xrange(2013, 2017): - for month in xrange(0, 16): + for year in range(2013, 2017): + for month in range(0, 16): dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( host=p.config.get("Redis_Level_DB", "host"), port=year, db=month) @@ -147,7 +147,7 @@ if __name__ == "__main__": if percentage >= 50: dupl.append((paste, percentage)) else: - print 'percentage: ' + str(percentage) + print('percentage: ' + str(percentage)) # Creating the object attribute and save it. to_print = 'Duplicate;{};{};{};'.format( @@ -156,11 +156,11 @@ if __name__ == "__main__": PST.__setattr__("p_duplicate", dupl) PST.save_attribute_redis("p_duplicate", dupl) publisher.info('{}Detected {}'.format(to_print, len(dupl))) - print '{}Detected {}'.format(to_print, len(dupl)) + print('{}Detected {}'.format(to_print, len(dupl))) y = time.time() publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) except IOError: - print "CRC Checksum Failed on :", PST.p_path + print("CRC Checksum Failed on :", PST.p_path) publisher.error('{}CRC Checksum Failed'.format(to_print)) diff --git a/bin/Global.py b/bin/Global.py index 5cd0edf1..ee783f0a 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -67,7 +67,7 @@ if __name__ == '__main__': continue # Creating the full filepath filename = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "pastes"), paste.decode('utf8')) + p.config.get("Directories", "pastes"), paste) #print(filename) dirname = os.path.dirname(filename) if not os.path.exists(dirname): @@ -77,5 +77,5 @@ if __name__ == '__main__': f.write(base64.standard_b64decode(gzip64encoded)) print(filename) - p.populate_set_out(filename.encode('utf8')) + p.populate_set_out(filename) processed_paste+=1 diff --git a/bin/Helper.py b/bin/Helper.py index 315962bb..154d6ea8 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -58,7 +58,7 @@ class PubSub(object): new_sub = context.socket(zmq.SUB) new_sub.connect(address) # bytes64 encode bytes to ascii only bytes - new_sub.setsockopt(zmq.SUBSCRIBE, channel.encode('ascii')) + new_sub.setsockopt_string(zmq.SUBSCRIBE, channel) self.subscribers.append(new_sub) def setup_publish(self, conn_name): @@ -78,15 +78,15 @@ class PubSub(object): self.publishers['ZMQ'].append((p, channel)) def publish(self, message): - m = json.loads(message.decode('ascii')) + m = json.loads(message) channel_message = m.get('channel') for p, channel in self.publishers['Redis']: if channel_message is None or channel_message == channel: - p.publish(channel, ( m['message']).encode('ascii') ) + p.publish(channel, ( m['message']) ) for p, channel in self.publishers['ZMQ']: if channel_message is None or channel_message == channel: - mess = ( m['message'] ).encode('ascii') - p.send(b' '.join( [channel, mess] ) ) + p.send('{} {}'.format(channel, m['message'])) + #p.send(b' '.join( [channel, mess] ) ) def subscribe(self): @@ -99,7 +99,8 @@ class PubSub(object): for sub in self.subscribers: try: msg = sub.recv(zmq.NOBLOCK) - yield msg.split(b" ", 1)[1] + msg = msg.decode('utf8') + yield msg.split(" ", 1)[1] except zmq.error.Again as e: time.sleep(0.2) pass @@ -150,6 +151,12 @@ class Process(object): self.r_temp.hset('queues', self.subscriber_name, int(self.r_temp.scard(in_set))) message = self.r_temp.spop(in_set) + + try: + message = message.decode('utf8') + except AttributeError: + pass + timestamp = int(time.mktime(datetime.datetime.now().timetuple())) dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes') @@ -158,12 +165,12 @@ class Process(object): else: #try: - if b'.gz' in message: - path = message.split(b".")[-2].split(b"/")[-1] + if '.gz' in message: + path = message.split(".")[-2].split("/")[-1] #find start of path with AIL_HOME - index_s = (message.decode('ascii')).find(os.environ['AIL_HOME']) + index_s = message.find(os.environ['AIL_HOME']) #Stop when .gz - index_e = message.find(b".gz")+3 + index_e = message.find(".gz")+3 if(index_s == -1): complete_path = message[0:index_e] else: @@ -173,7 +180,7 @@ class Process(object): path = "-" complete_path = "?" - value = str(timestamp) + ", " + path.decode('ascii') + value = str(timestamp) + ", " + path self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path) self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) @@ -190,13 +197,12 @@ class Process(object): def populate_set_out(self, msg, channel=None): # multiproc - msg = msg.decode('ascii') msg = {'message': msg} if channel is not None: msg.update({'channel': channel}) # bytes64 encode bytes to ascii only bytes - j = (json.dumps(msg)).encode('ascii') + j = json.dumps(msg) self.r_temp.sadd(self.subscriber_name + 'out', j) def publish(self): @@ -209,6 +215,12 @@ class Process(object): self.pubsub.setup_publish(name) while True: message = self.r_temp.spop(self.subscriber_name + 'out') + + try: + message = message.decode('utf8') + except AttributeError: + pass + if message is None: time.sleep(1) continue diff --git a/bin/Indexer.py b/bin/Indexer.py index be4c899c..a4a3e1f2 100755 --- a/bin/Indexer.py +++ b/bin/Indexer.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -49,7 +49,7 @@ if __name__ == "__main__": # Indexer configuration - index dir and schema setup baseindexpath = join(os.environ['AIL_HOME'], p.config.get("Indexer", "path")) - indexRegister_path = join(os.environ['AIL_HOME'], + indexRegister_path = join(os.environ['AIL_HOME'], p.config.get("Indexer", "register")) indexertype = p.config.get("Indexer", "type") INDEX_SIZE_THRESHOLD = int(p.config.get("Indexer", "index_max_size")) @@ -89,7 +89,7 @@ if __name__ == "__main__": ix = create_in(indexpath, schema) else: ix = open_dir(indexpath) - + last_refresh = time_now # LOGGING # @@ -107,10 +107,11 @@ if __name__ == "__main__": continue docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() - print "Indexing - "+indexname+" :", docpath + print("Indexing - " + indexname + " :", docpath) - if time.time() - last_refresh > TIME_WAIT: #avoid calculating the index's size at each message + #avoid calculating the index's size at each message + if( time.time() - last_refresh > TIME_WAIT): last_refresh = time.time() if check_index_size(baseindexpath, indexname) >= INDEX_SIZE_THRESHOLD*(1000*1000): timestamp = int(time.time()) @@ -128,11 +129,11 @@ if __name__ == "__main__": if indexertype == "whoosh": indexwriter = ix.writer() indexwriter.update_document( - title=unicode(docpath, errors='ignore'), - path=unicode(docpath, errors='ignore'), - content=unicode(paste, errors='ignore')) + title=docpath, + path=docpath, + content=paste) indexwriter.commit() except IOError: - print "CRC Checksum Failed on :", PST.p_path + print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/Keys.py b/bin/Keys.py index 71c04602..a8c368ca 100755 --- a/bin/Keys.py +++ b/bin/Keys.py @@ -15,46 +15,49 @@ RSA private key, certificate messages import time from pubsublogger import publisher -from bin.packages import Paste -from bin.Helper import Process +#from bin.packages import Paste +#from bin.Helper import Process + +from packages import Paste +from Helper import Process def search_key(paste): content = paste.get_p_content() find = False - if b'-----BEGIN PGP MESSAGE-----' in content: + if '-----BEGIN PGP MESSAGE-----' in content: publisher.warning('{} has a PGP enc message'.format(paste.p_name)) find = True - if b'-----BEGIN CERTIFICATE-----' in content: + if '-----BEGIN CERTIFICATE-----' in content: publisher.warning('{} has a certificate message'.format(paste.p_name)) find = True - if b'-----BEGIN RSA PRIVATE KEY-----' in content: + if '-----BEGIN RSA PRIVATE KEY-----' in content: publisher.warning('{} has a RSA private key message'.format(paste.p_name)) find = True - if b'-----BEGIN PRIVATE KEY-----' in content: + if '-----BEGIN PRIVATE KEY-----' in content: publisher.warning('{} has a private key message'.format(paste.p_name)) find = True - if b'-----BEGIN ENCRYPTED PRIVATE KEY-----' in content: + if '-----BEGIN ENCRYPTED PRIVATE KEY-----' in content: publisher.warning('{} has an encrypted private key message'.format(paste.p_name)) find = True - if b'-----BEGIN OPENSSH PRIVATE KEY-----' in content: + if '-----BEGIN OPENSSH PRIVATE KEY-----' in content: publisher.warning('{} has an openssh private key message'.format(paste.p_name)) find = True - if b'-----BEGIN DSA PRIVATE KEY-----' in content: + if '-----BEGIN DSA PRIVATE KEY-----' in content: publisher.warning('{} has a dsa private key message'.format(paste.p_name)) find = True - if b'-----BEGIN EC PRIVATE KEY-----' in content: + if '-----BEGIN EC PRIVATE KEY-----' in content: publisher.warning('{} has an ec private key message'.format(paste.p_name)) find = True - if b'-----BEGIN PGP PRIVATE KEY BLOCK-----' in content: + if '-----BEGIN PGP PRIVATE KEY BLOCK-----' in content: publisher.warning('{} has a pgp private key block message'.format(paste.p_name)) find = True @@ -63,7 +66,9 @@ def search_key(paste): #Send to duplicate p.populate_set_out(message, 'Duplicate') #send to Browse_warning_paste - p.populate_set_out('keys;{}'.format(message), 'alertHandler') + msg = ('keys;{}'.format(message)) + print(message) + p.populate_set_out( msg, 'alertHandler') if __name__ == '__main__': diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 84ce8e5c..1cbaea85 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -130,8 +130,8 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Lines" bash -c 'python3 Lines.py; read x' sleep 0.1 - #screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' - #sleep 0.1 + screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' + sleep 0.1 screen -S "Script_AIL" -X screen -t "Categ" bash -c 'python3 Categ.py; read x' sleep 0.1 screen -S "Script_AIL" -X screen -t "Tokenize" bash -c 'python3 Tokenize.py; read x' @@ -142,8 +142,8 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Mail" bash -c './Mail.py; read x' sleep 0.1 - #screen -S "Script_AIL" -X screen -t "Web" bash -c './Web.py; read x' - #sleep 0.1 + screen -S "Script_AIL" -X screen -t "Web" bash -c './Web.py; read x' + sleep 0.1 screen -S "Script_AIL" -X screen -t "Credential" bash -c './Credential.py; read x' sleep 0.1 screen -S "Script_AIL" -X screen -t "Curve" bash -c './Curve.py; read x' @@ -168,8 +168,8 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' sleep 0.1 - #screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' - #sleep 0.1 + screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' + sleep 0.1 screen -S "Script_AIL" -X screen -t "alertHandler" bash -c './alertHandler.py; read x' sleep 0.1 screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' diff --git a/bin/Lines.py b/bin/Lines.py index be1ad635..46f67f24 100755 --- a/bin/Lines.py +++ b/bin/Lines.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -61,7 +61,7 @@ if __name__ == '__main__': while True: try: message = p.get_from_set() - print message + print(message) if message is not None: PST = Paste.Paste(message) else: @@ -77,8 +77,8 @@ if __name__ == '__main__': # FIXME Not used. PST.store.sadd("Pastes_Objects", PST.p_path) if lines_infos[1] < args.max: - p.populate_set_out(PST.p_path, 'LinesShort') + p.populate_set_out( PST.p_path , 'LinesShort') else: - p.populate_set_out(PST.p_path, 'LinesLong') + p.populate_set_out( PST.p_path , 'LinesLong') except IOError: - print "CRC Checksum Error on : ", PST.p_path + print("CRC Checksum Error on : ", PST.p_path) diff --git a/bin/Mail.py b/bin/Mail.py index c608d106..b03061a4 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -73,19 +73,19 @@ if __name__ == "__main__": #Send to duplicate p.populate_set_out(filename, 'Duplicate') p.populate_set_out('mail;{}'.format(filename), 'alertHandler') - + else: publisher.info(to_print) - #Send to ModuleStats + #Send to ModuleStats for mail in MX_values[1]: - print 'mail;{};{};{}'.format(1, mail, PST.p_date) + print('mail;{};{};{}'.format(1, mail, PST.p_date)) p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') prec_filename = filename else: publisher.debug("Script Mails is Idling 10s") - print 'Sleeping' + print('Sleeping') time.sleep(10) message = p.get_from_set() diff --git a/bin/Mixer.py b/bin/Mixer.py index b9bb33c2..4b9725e9 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -91,7 +91,7 @@ if __name__ == '__main__': complete_paste, gzip64encoded = splitted try: - feeder_name = ( complete_paste.replace(b"archive/",b"") ).split(b"/")[0] + feeder_name = ( complete_paste.replace("archive/","") ).split("/")[0] # TODO take real name ? paste_name = complete_paste @@ -109,9 +109,10 @@ if __name__ == '__main__': processed_paste_per_feeder[feeder_name] = 1 duplicated_paste_per_feeder[feeder_name] = 0 - relay_message = b" ".join( [paste_name, gzip64encoded] ) - - digest = hashlib.sha1(gzip64encoded).hexdigest() + relay_message = "{0} {1}".format(paste_name, gzip64encoded) + #relay_message = b" ".join( [paste_name, gzip64encoded] ) + + digest = hashlib.sha1(gzip64encoded.encode('utf8')).hexdigest() # Avoid any duplicate coming from any sources if operation_mode == 1: diff --git a/bin/ModuleInformation.py b/bin/ModuleInformation.py index fc219815..1d88cbc0 100755 --- a/bin/ModuleInformation.py +++ b/bin/ModuleInformation.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* ''' @@ -20,7 +20,7 @@ import os import signal import argparse from subprocess import PIPE, Popen -import ConfigParser +import configparser import json from terminaltables import AsciiTable import textwrap @@ -51,7 +51,7 @@ last_refresh = 0 def getPid(module): p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) for line in p.stdout: - print line + print(line) splittedLine = line.split() if 'python2' in splittedLine: return int(splittedLine[0]) @@ -76,7 +76,7 @@ def cleanRedis(): flag_pid_valid = True if not flag_pid_valid: - print flag_pid_valid, 'cleaning', pid, 'in', k + print(flag_pid_valid, 'cleaning', pid, 'in', k) server.srem(k, pid) inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k]) @@ -85,11 +85,11 @@ def cleanRedis(): def kill_module(module, pid): - print '' - print '-> trying to kill module:', module + print('') + print('-> trying to kill module:', module) if pid is None: - print 'pid was None' + print('pid was None') printarrayGlob.insert(1, [0, module, pid, "PID was None"]) printarrayGlob.pop() pid = getPid(module) @@ -102,15 +102,15 @@ def kill_module(module, pid): try: os.kill(pid, signal.SIGUSR1) except OSError: - print pid, 'already killed' + print(pid, 'already killed') inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"]) printarrayGlob.pop() return time.sleep(1) if getPid(module) is None: - print module, 'has been killed' - print 'restarting', module, '...' + print(module, 'has been killed') + print('restarting', module, '...') p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) @@ -119,7 +119,7 @@ def kill_module(module, pid): printarrayGlob.pop() else: - print 'killing failed, retrying...' + print('killing failed, retrying...') inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."]) printarrayGlob.pop() @@ -128,8 +128,8 @@ def kill_module(module, pid): os.kill(pid, signal.SIGUSR1) time.sleep(1) if getPid(module) is None: - print module, 'has been killed' - print 'restarting', module, '...' + print(module, 'has been killed') + print('restarting', module, '...') p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) @@ -137,12 +137,12 @@ def kill_module(module, pid): printarrayGlob.pop() printarrayGlob.pop() else: - print 'killing failed!' + print('killing failed!') inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"]) printarrayGlob.pop() else: - print 'Module does not exist' + print('Module does not exist') inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"]) printarrayGlob.pop() @@ -174,7 +174,7 @@ def waiting_refresh(): last_refresh = time.time() return True - + if __name__ == "__main__": @@ -192,7 +192,7 @@ if __name__ == "__main__": Did you set environment variables? \ Or activate the virtualenv.') - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) # REDIS # @@ -222,17 +222,17 @@ if __name__ == "__main__": #while key != 'q': # key = stdsrc.getch() # stdscr.refresh() - + all_queue = set() printarray1 = [] printarray2 = [] printarray3 = [] - for queue, card in server.hgetall("queues").iteritems(): + for queue, card in server.hgetall("queues").items(): all_queue.add(queue) key = "MODULE_" + queue + "_" keySet = "MODULE_TYPE_" + queue array_module_type = [] - + for moduleNum in server.smembers(keySet): value = server.get(key + str(moduleNum)) if value is not None: @@ -240,7 +240,7 @@ if __name__ == "__main__": if timestamp is not None and path is not None: startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] - + if int(card) > 0: if int((datetime.datetime.now() - startTime_readable).total_seconds()) > args.treshold: log = open(log_filename, 'a') @@ -251,15 +251,15 @@ if __name__ == "__main__": last_kill_try = kill_retry_threshold+1 if args.autokill == 1 and last_kill_try > kill_retry_threshold : kill_module(queue, int(moduleNum)) - + array_module_type.append([get_color(processed_time_readable, False) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, False)]) - + else: printarray2.append([get_color(processed_time_readable, True) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, True)]) array_module_type.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) for e in array_module_type: printarray1.append(e) - + for curr_queue in module_file_array: if curr_queue not in all_queue: printarray3.append([curr_queue, "Not running"]) @@ -277,16 +277,16 @@ if __name__ == "__main__": printarray3.append([curr_queue, "Stuck or idle, restarting in " + str(abs(args.treshold - (int(time.time()) - no_info_modules[curr_queue]))) + "s"]) else: printarray3.append([curr_queue, "Stuck or idle, restarting disabled"]) - + ## FIXME To add: ## Button KILL Process using Curses - + printarray1.sort(key=lambda x: x[0][9:], reverse=False) printarray2.sort(key=lambda x: x[0][9:], reverse=False) printarray1.insert(0,["Queue", "PID", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"]) printarray2.insert(0,["Queue", "PID","Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"]) printarray3.insert(0,["Queue", "State"]) - + os.system('clear') t1 = AsciiTable(printarray1, title="Working queues") t1.column_max_width(1) @@ -304,7 +304,7 @@ if __name__ == "__main__": temp += l + '\n' content[longest_col] = temp.strip() t1.table_data[i] = content - + t2 = AsciiTable(printarray2, title="Idling queues") t2.column_max_width(1) if not t2.ok: @@ -321,33 +321,33 @@ if __name__ == "__main__": temp += l + '\n' content[longest_col] = temp.strip() t2.table_data[i] = content - + t3 = AsciiTable(printarray3, title="Not running queues") t3.column_max_width(1) - + printarray4 = [] for elem in printarrayGlob: if elem is not None: printarray4.append(elem) - + t4 = AsciiTable(printarray4, title="Last actions") t4.column_max_width(1) - + legend_array = [["Color", "Meaning"], [Back.RED+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold)+Style.RESET_ALL], [Back.MAGENTA+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold)+" while idle"+Style.RESET_ALL], [Back.YELLOW+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold/2)+Style.RESET_ALL], [Back.GREEN+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time <" +str(args.treshold)]] legend = AsciiTable(legend_array, title="Legend") legend.column_max_width(1) - - print legend.table - print '\n' - print t1.table - print '\n' - print t2.table - print '\n' - print t3.table - print '\n' - print t4.table - - if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5: + + print(legend.table) + print('\n') + print(t1.table) + print('\n') + print(t2.table) + print('\n') + print(t3.table) + print('\n') + print(t4.table9) + + if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5: lastTime = datetime.datetime.now() cleanRedis() #time.sleep(args.refresh) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 648649f7..ceec1490 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ This module makes statistics for some modules and providers @@ -39,11 +39,11 @@ def compute_most_posted(server, message): # Compute Most Posted date = get_date_range(0)[0] # check if this keyword is eligible for progression - keyword_total_sum = 0 + keyword_total_sum = 0 curr_value = server.hget(date, module+'-'+keyword) keyword_total_sum += int(curr_value) if curr_value is not None else 0 - + if server.zcard(redis_progression_name_set) < max_set_cardinality: server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) @@ -52,17 +52,17 @@ def compute_most_posted(server, message): # Member set is a list of (value, score) pairs if int(member_set[0][1]) < keyword_total_sum: #remove min from set and add the new one - print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')') server.zrem(redis_progression_name_set, member_set[0][0]) server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) - print redis_progression_name_set + print(redis_progression_name_set) def compute_provider_info(server_trend, server_pasteName, path): redis_all_provider = 'all_provider_set' - + paste = Paste.Paste(path) - + paste_baseName = paste.p_name.split('.')[0] paste_size = paste._get_p_size() paste_provider = paste.p_source @@ -84,7 +84,7 @@ def compute_provider_info(server_trend, server_pasteName, path): # # Compute Most Posted # - + # Size if server_trend.zcard(redis_sum_size_set) < max_set_cardinality or server_trend.zscore(redis_sum_size_set, paste_provider) != "nil": server_trend.zadd(redis_sum_size_set, float(num_paste), paste_provider) @@ -94,7 +94,7 @@ def compute_provider_info(server_trend, server_pasteName, path): # Member set is a list of (value, score) pairs if float(member_set[0][1]) < new_avg: #remove min from set and add the new one - print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + print('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')') server_trend.zrem(redis_sum_size_set, member_set[0][0]) server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider) server_trend.zrem(redis_avg_size_name_set, member_set[0][0]) @@ -110,7 +110,7 @@ def compute_provider_info(server_trend, server_pasteName, path): # Member set is a list of (value, score) pairs if int(member_set[0][1]) < num_paste: #remove min from set and add the new one - print 'Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + print('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')') server_trend.zrem(member_set[0][0]) server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider) @@ -149,7 +149,7 @@ if __name__ == '__main__': if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) - print 'sleeping' + print('sleeping') time.sleep(20) continue diff --git a/bin/ModulesInformationV2.py b/bin/ModulesInformationV2.py index 041f8ec5..7e64aa05 100755 --- a/bin/ModulesInformationV2.py +++ b/bin/ModulesInformationV2.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \ @@ -10,7 +10,7 @@ from asciimatics.event import Event from asciimatics.event import KeyboardEvent, MouseEvent import sys, os import time, datetime -import argparse, ConfigParser +import argparse, configparser import json import redis import psutil @@ -45,7 +45,7 @@ TABLES_PADDING = {"running": [12, 23, 8, 8, 23, 10, 55, 11, 11, 12], "idle": [9, QUEUE_STATUS = {} # Maintain the state of the CPU objects -CPU_TABLE = {} +CPU_TABLE = {} CPU_OBJECT_TABLE = {} # Path of the current paste for a pid @@ -137,7 +137,7 @@ class CListBox(ListBox): # Quit if press q elif event.key_code == ord('q'): Dashboard._quit() - + else: # Ignore any other key press. return event @@ -196,7 +196,7 @@ END EXTENSION ''' SCENE DEFINITION -''' +''' class Dashboard(Frame): def __init__(self, screen): @@ -497,7 +497,7 @@ MANAGE MODULES AND GET INFOS def getPid(module): p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) for line in p.stdout: - print line + print(line) splittedLine = line.split() if 'python2' in splittedLine: return int(splittedLine[0]) @@ -511,21 +511,21 @@ def clearRedisModuleInfo(): def cleanRedis(): for k in server.keys("MODULE_TYPE_*"): - moduleName = k[12:].split('_')[0] + moduleName = (k[12:].decode('utf8')).split('_')[0] for pid in server.smembers(k): flag_pid_valid = False proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) try: for line in proc.stdout: splittedLine = line.split() - if ('python2' in splittedLine or 'python' in splittedLine) and "./"+moduleName+".py" in splittedLine: + if ('python3' in splittedLine or 'python' in splittedLine) and "./"+moduleName + ".py" in splittedLine: flag_pid_valid = True if not flag_pid_valid: #print flag_pid_valid, 'cleaning', pid, 'in', k server.srem(k, pid) inst_time = datetime.datetime.fromtimestamp(int(time.time())) - log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + k], 0)) + log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + (k).decode('utf8')], 0)) #Error due to resize, interrupted sys call except IOError as e: @@ -601,12 +601,14 @@ def fetchQueueData(): printarray_running = [] printarray_idle = [] printarray_notrunning = [] - for queue, card in server.hgetall("queues").iteritems(): + for queue, card in iter(server.hgetall("queues").items()): + queue = queue.decode('utf8') + card = card.decode('utf8') all_queue.add(queue) key = "MODULE_" + queue + "_" keySet = "MODULE_TYPE_" + queue array_module_type = [] - + for moduleNum in server.smembers(keySet): value = server.get(key + str(moduleNum)) complete_paste_path = server.get(key + str(moduleNum) + "_PATH") @@ -624,7 +626,7 @@ def fetchQueueData(): QUEUE_STATUS[moduleNum] = 1 else: QUEUE_STATUS[moduleNum] = 0 - + # Queue contain elements if int(card) > 0: # Queue need to be killed @@ -636,7 +638,7 @@ def fetchQueueData(): last_kill_try = kill_retry_threshold+1 if args.autokill == 1 and last_kill_try > kill_retry_threshold : kill_module(queue, int(moduleNum)) - + # Create CPU objects try: cpu_percent = CPU_OBJECT_TABLE[int(moduleNum)].cpu_percent() @@ -664,9 +666,9 @@ def fetchQueueData(): mem_percent = 0 array_module_type.append( ([" [ ]", str(queue), str(moduleNum), str(card), str(startTime_readable), - str(processed_time_readable), str(path), "{0:.2f}".format(cpu_percent)+"%", + str(processed_time_readable), str(path), "{0:.2f}".format(cpu_percent)+"%", "{0:.2f}".format(mem_percent)+"%", "{0:.2f}".format(cpu_avg)+"%"], moduleNum) ) - + else: printarray_idle.append( ([" ", str(queue), str(moduleNum), str(processed_time_readable), str(path)], moduleNum) ) @@ -674,7 +676,7 @@ def fetchQueueData(): array_module_type.sort(lambda x,y: cmp(x[0][4], y[0][4]), reverse=True) #Sort by num of pastes for e in array_module_type: printarray_running.append(e) - + for curr_queue in module_file_array: if curr_queue not in all_queue: #Module not running by default printarray_notrunning.append( ([" ", curr_queue, "Not running by default"], curr_queue) ) @@ -692,8 +694,8 @@ def fetchQueueData(): printarray_notrunning.append( ([" ", curr_queue, "Stuck or idle, restarting in " + str(abs(args.treshold - (int(time.time()) - no_info_modules[curr_queue]))) + "s"], curr_queue) ) else: printarray_notrunning.append( ([" ", curr_queue, "Stuck or idle, restarting disabled"], curr_queue) ) - - + + printarray_running.sort(key=lambda x: x[0], reverse=False) printarray_idle.sort(key=lambda x: x[0], reverse=False) printarray_notrunning.sort(key=lambda x: x[0][1], reverse=False) @@ -715,6 +717,11 @@ def format_string(tab, padding_row): text="" for ite, elem in enumerate(the_array): + try: + elem = elem.decode('utf8') + except AttributeError: + pass + if len(elem) > padding_row[ite]: text += "*" + elem[-padding_row[ite]+6:] padd_off = " "*5 @@ -761,7 +768,7 @@ def demo(screen): if time.time() - time_cooldown > args.refresh: cleanRedis() - for key, val in fetchQueueData().iteritems(): #fetch data and put it into the tables + for key, val in iter(fetchQueueData().items()): #fetch data and put it into the tables TABLES[key] = val TABLES["logs"] = format_string(printarrayLog, TABLES_PADDING["logs"]) @@ -790,7 +797,7 @@ if __name__ == "__main__": Did you set environment variables? \ Or activate the virtualenv.') - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) # REDIS # @@ -821,7 +828,7 @@ if __name__ == "__main__": module_file_array.add(line[:-1]) cleanRedis() - + TABLES_TITLES["running"] = format_string([([" Action", "Queue name", "PID", "#", "S Time", "R Time", "Processed element", "CPU %", "Mem %", "Avg CPU%"],0)], TABLES_PADDING["running"])[0][0] TABLES_TITLES["idle"] = format_string([([" Action", "Queue", "PID", "Idle Time", "Last paste hash"],0)], TABLES_PADDING["idle"])[0][0] TABLES_TITLES["notRunning"] = format_string([([" Action", "Queue", "State"],0)], TABLES_PADDING["notRunning"])[0][0] diff --git a/bin/NotificationHelper.py b/bin/NotificationHelper.py index dd15499a..441be7e8 100755 --- a/bin/NotificationHelper.py +++ b/bin/NotificationHelper.py @@ -1,10 +1,10 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* -import ConfigParser +import configparser import os import smtplib -from email.MIMEMultipart import MIMEMultipart +from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText """ @@ -22,31 +22,31 @@ TrackedTermsNotificationEnabled_Name = "TrackedNotifications" TrackedTermsNotificationEmailsPrefix_Name = "TrackedNotificationEmails_" def sendEmailNotification(recipient, term): - + if not os.path.exists(configfile): raise Exception('Unable to find the configuration file. \ Did you set environment variables? \ Or activate the virtualenv?') - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) sender = cfg.get("Notifications", "sender"), sender_host = cfg.get("Notifications", "sender_host"), sender_port = cfg.getint("Notifications", "sender_port"), sender_pw = cfg.get("Notifications", "sender_pw"), - + if isinstance(sender, tuple): sender = sender[0] if isinstance(sender_host, tuple): sender_host = sender_host[0] - + if isinstance(sender_port, tuple): sender_port = sender_port[0] - + if isinstance(sender_pw, tuple): - sender_pw = sender_pw[0] + sender_pw = sender_pw[0] # raise an exception if any of these is None if (sender is None or @@ -62,22 +62,19 @@ def sendEmailNotification(recipient, term): smtp_server.login(sender, sender_pw) else: smtp_server = smtplib.SMTP(sender_host, sender_port) - - + + mime_msg = MIMEMultipart() mime_msg['From'] = sender mime_msg['To'] = recipient mime_msg['Subject'] = "AIL Term Alert" - + body = "New occurrence for term: " + term mime_msg.attach(MIMEText(body, 'plain')) - + smtp_server.sendmail(sender, recipient, mime_msg.as_string()) smtp_server.quit() - + except Exception as e: - print str(e) + print(str(e)) # raise e - - - diff --git a/bin/Onion.py b/bin/Onion.py index aaf30a1b..3637de10 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ The ZMQ_Sub_Onion Module @@ -37,7 +37,7 @@ from Helper import Process def fetch(p, r_cache, urls, domains, path): failed = [] downloaded = [] - print len(urls), 'Urls to fetch.' + print(len(urls), 'Urls to fetch.') for url, domain in zip(urls, domains): if r_cache.exists(url) or url in failed: continue @@ -73,9 +73,9 @@ def fetch(p, r_cache, urls, domains, path): r_cache.setbit(url, 0, 0) r_cache.expire(url, 3600) failed.append(url) - print 'Failed at downloading', url - print process.stdout.read() - print 'Failed:', len(failed), 'Downloaded:', len(downloaded) + print('Failed at downloading', url) + print(process.stdout.read()) + print('Failed:', len(failed), 'Downloaded:', len(downloaded)) if __name__ == "__main__": @@ -109,7 +109,7 @@ if __name__ == "__main__": while True: if message is not None: - print message + print(message) filename, score = message.split() # "For each new paste" @@ -152,6 +152,6 @@ if __name__ == "__main__": prec_filename = filename else: publisher.debug("Script url is Idling 10s") - print 'Sleeping' + print('Sleeping') time.sleep(10) message = p.get_from_set() diff --git a/bin/Phone.py b/bin/Phone.py index 7a4811da..e13d0f13 100755 --- a/bin/Phone.py +++ b/bin/Phone.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -30,10 +30,11 @@ def search_phone(message): # if the list is greater than 4, we consider the Paste may contain a list of phone numbers if len(results) > 4: - print results + print(results) publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) #send to Browse_warning_paste - p.populate_set_out('phone;{}'.format(message), 'alertHandler') + msg = 'phone;{}'.format(message) + p.populate_set_out(msg, 'alertHandler') #Send to duplicate p.populate_set_out(message, 'Duplicate') stats = {} diff --git a/bin/QueueIn.py b/bin/QueueIn.py index 683a50ef..3f3325b7 100755 --- a/bin/QueueIn.py +++ b/bin/QueueIn.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* diff --git a/bin/QueueOut.py b/bin/QueueOut.py index d2853274..32a6a307 100755 --- a/bin/QueueOut.py +++ b/bin/QueueOut.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* from pubsublogger import publisher diff --git a/bin/Queues_Monitoring.py b/bin/Queues_Monitoring.py index 20c137fb..5d120eb0 100755 --- a/bin/Queues_Monitoring.py +++ b/bin/Queues_Monitoring.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* import redis import argparse -import ConfigParser +import configparser import time import os from pubsublogger import publisher @@ -14,7 +14,7 @@ def main(): """Main Function""" # CONFIG # - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read('./packages/config.cfg') # SCRIPT PARSER # @@ -49,7 +49,7 @@ def main(): row.sort() table.add_rows(row, header=False) os.system('clear') - print table.draw() + print(table.draw()) if __name__ == "__main__": diff --git a/bin/RegexForTermsFrequency.py b/bin/RegexForTermsFrequency.py index d1534eab..88df0924 100755 --- a/bin/RegexForTermsFrequency.py +++ b/bin/RegexForTermsFrequency.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ This Module is used for term frequency. @@ -76,6 +76,9 @@ if __name__ == "__main__": dico_regex, dico_regexname_to_redis = refresh_dicos() print('dico got refreshed') + print(dico_regex) + print(dico_regexname_to_redis) + filename = message temp = filename.split('/') timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) @@ -115,6 +118,6 @@ if __name__ == "__main__": else: publisher.debug("Script RegexForTermsFrequency is Idling") - print "sleeping" + print("sleeping") time.sleep(5) message = p.get_from_set() diff --git a/bin/Release.py b/bin/Release.py index 98e60a96..6ba2b577 100755 --- a/bin/Release.py +++ b/bin/Release.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* import time from packages import Paste @@ -29,7 +29,7 @@ if __name__ == "__main__": filepath = p.get_from_set() if filepath is None: publisher.debug("Script Release is Idling 10s") - print 'Sleeping' + print('Sleeping') time.sleep(10) continue @@ -40,6 +40,7 @@ if __name__ == "__main__": continue to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path) + print(to_print) if len(releases) > 30: publisher.warning(to_print) else: diff --git a/bin/Repartition_graph.py b/bin/Repartition_graph.py index 38c5e1b6..58480ffd 100755 --- a/bin/Repartition_graph.py +++ b/bin/Repartition_graph.py @@ -1,9 +1,9 @@ -#!/usr/bin/python2.7 +#!/usr/bin/python3.5 # -*-coding:UTF-8 -* import redis import argparse -import ConfigParser +import configparser from datetime import datetime from pubsublogger import publisher @@ -14,7 +14,7 @@ def main(): """Main Function""" # CONFIG # - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read('./packages/config.cfg') # SCRIPT PARSER # diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index 318466c8..3e0dda76 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -13,7 +13,7 @@ It test different possibility to makes some sqlInjection. import time import string -import urllib2 +import urllib.request import re from pubsublogger import publisher from Helper import Process @@ -74,8 +74,8 @@ def analyse(url, path): if (result_path > 0) or (result_query > 0): paste = Paste.Paste(path) if (result_path > 1) or (result_query > 1): - print "Detected SQL in URL: " - print urllib2.unquote(url) + print("Detected SQL in URL: ") + print(urllib.request.unquote(url)) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) publisher.warning(to_print) #Send to duplicate @@ -83,8 +83,8 @@ def analyse(url, path): #send to Browse_warning_paste p.populate_set_out('sqlinjection;{}'.format(path), 'alertHandler') else: - print "Potential SQL injection:" - print urllib2.unquote(url) + print("Potential SQL injection:") + print(urllib.request.unquote(url)) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path) publisher.info(to_print) @@ -92,7 +92,7 @@ def analyse(url, path): # Try to detect if the url passed might be an sql injection by appliying the regex # defined above on it. def is_sql_injection(url_parsed): - line = urllib2.unquote(url_parsed) + line = urllib.request.unquote(url_parsed) line = string.upper(line) result = [] result_suspect = [] @@ -114,10 +114,10 @@ def is_sql_injection(url_parsed): result_suspect.append(line[temp_res:temp_res+len(word)]) if len(result)>0: - print result + print(result) return 2 elif len(result_suspect)>0: - print result_suspect + print(result_suspect) return 1 else: return 0 diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py index 00b15abb..e7534265 100755 --- a/bin/SentimentAnalysis.py +++ b/bin/SentimentAnalysis.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ Sentiment analyser module. @@ -33,7 +33,7 @@ size_threshold = 250 line_max_length_threshold = 1000 import os -import ConfigParser +import configparser configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') if not os.path.exists(configfile): @@ -41,7 +41,7 @@ if not os.path.exists(configfile): Did you set environment variables? \ Or activate the virtualenv.') -cfg = ConfigParser.ConfigParser() +cfg = configparser.ConfigParser() cfg.read(configfile) sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file") @@ -69,7 +69,7 @@ def Analyse(message, server): combined_datetime = datetime.datetime.combine(the_date, the_time) timestamp = calendar.timegm(combined_datetime.timetuple()) - sentences = tokenize.sent_tokenize(p_content.decode('utf-8', 'ignore')) + sentences = tokenize.sent_tokenize(p_content) if len(sentences) > 0: avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} @@ -99,7 +99,7 @@ def Analyse(message, server): avg_score[k] = avg_score[k] / len(sentences) - # In redis-levelDB: {} = set, () = K-V + # In redis-levelDB: {} = set, () = K-V # {Provider_set -> provider_i} # {Provider_TimestampInHour_i -> UniqID_i}_j # (UniqID_i -> PasteValue_i) @@ -109,11 +109,11 @@ def Analyse(message, server): provider_timestamp = provider + '_' + str(timestamp) server.incr('UniqID') UniqID = server.get('UniqID') - print provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines' + print(provider_timestamp, '->', UniqID.decode('utf8'), 'dropped', num_line_removed, 'lines') server.sadd(provider_timestamp, UniqID) server.set(UniqID, avg_score) else: - print 'Dropped:', p_MimeType + print('Dropped:', p_MimeType) def isJSON(content): @@ -121,7 +121,7 @@ def isJSON(content): json.loads(content) return True - except Exception,e: + except Exception: return False import signal @@ -170,4 +170,3 @@ if __name__ == '__main__': continue else: signal.alarm(0) - diff --git a/bin/SetForTermsFrequency.py b/bin/SetForTermsFrequency.py index 014ce10e..a9ee93ea 100755 --- a/bin/SetForTermsFrequency.py +++ b/bin/SetForTermsFrequency.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ This Module is used for term frequency. @@ -126,6 +126,6 @@ if __name__ == "__main__": else: publisher.debug("Script RegexForTermsFrequency is Idling") - print "sleeping" + print("sleeping") time.sleep(5) message = p.get_from_set() diff --git a/bin/Shutdown.py b/bin/Shutdown.py index 8467dafb..0a08e7af 100755 --- a/bin/Shutdown.py +++ b/bin/Shutdown.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ The ZMQ_Feed_Q Module @@ -21,7 +21,7 @@ Requirements """ import redis -import ConfigParser +import configparser import os configfile = os.path.join(os.environ['AIL_BIN'], './packages/config.cfg') @@ -31,7 +31,7 @@ def main(): """Main Function""" # CONFIG # - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) # REDIS diff --git a/bin/SourceCode.py b/bin/SourceCode.py index 41120e69..ba166a67 100644 --- a/bin/SourceCode.py +++ b/bin/SourceCode.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* import time from packages import Paste @@ -26,10 +26,10 @@ if __name__ == "__main__": adr = "0x[a-f0-9]{2}" #asm = "\"((?s).{1}x[0-9a-f]{2}){3,}" ISSUES WITH FINDALL, pattern like \x54\xaf\x23\.. - + languages = [shell, c, php, bash, python, javascript, bash, ruby, adr] regex = '|'.join(languages) - print regex + print(regex) while True: message = p.get_from_set() diff --git a/bin/Tokenize.py b/bin/Tokenize.py index 377cba5a..6e374ee9 100755 --- a/bin/Tokenize.py +++ b/bin/Tokenize.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ The Tokenize Module @@ -50,7 +50,7 @@ if __name__ == "__main__": while True: message = p.get_from_set() - print message + print(message) if message is not None: paste = Paste.Paste(message) signal.alarm(5) @@ -67,4 +67,4 @@ if __name__ == "__main__": else: publisher.debug("Tokeniser is idling 10s") time.sleep(10) - print "sleepin" + print("Sleeping") diff --git a/bin/Update-conf.py b/bin/Update-conf.py index 863ff436..6cea72c2 100755 --- a/bin/Update-conf.py +++ b/bin/Update-conf.py @@ -1,8 +1,8 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* -import ConfigParser -from ConfigParser import ConfigParser as cfgP +import configparser +from configparser import ConfigParser as cfgP import os from collections import OrderedDict import sys @@ -20,14 +20,14 @@ def main(): Or activate the virtualenv.') configfileSample = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample') - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) - cfgSample = ConfigParser.ConfigParser() + cfgSample = configparser.ConfigParser() cfgSample.read(configfileSample) sections = cfgP.sections(cfg) sectionsSample = cfgP.sections(cfgSample) - + missingSection = [] dicoMissingSection = {} missingItem = [] @@ -89,7 +89,7 @@ def main(): ''' Return a new dico with the section ordered as the old configuration with the updated one added ''' def add_items_to_correct_position(sample_dico, old_dico, missingSection, dicoMissingSection): new_dico = OrderedDict() - + positions = {} for pos_i, sec in enumerate(sample_dico): if sec in missingSection: @@ -109,4 +109,3 @@ if __name__ == "__main__": sys.exit() else: sys.exit(1) - diff --git a/bin/Web.py b/bin/Web.py index dc2bf2fd..d4593469 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -100,7 +100,7 @@ if __name__ == "__main__": publisher.debug('{} Published'.format(url)) if f1 == "onion": - print domain + print(domain) hostl = unicode(avoidNone(subdomain)+avoidNone(domain)) try: @@ -121,16 +121,16 @@ if __name__ == "__main__": # EU is not an official ISO 3166 code (but used by RIPE # IP allocation) if cc is not None and cc != "EU": - print hostl, asn, cc, \ - pycountry.countries.get(alpha_2=cc).name + print(hostl, asn, cc, \ + pycountry.countries.get(alpha_2=cc).name) if cc == cc_critical: to_print = 'Url;{};{};{};Detected {} {}'.format( PST.p_source, PST.p_date, PST.p_name, hostl, cc) #publisher.warning(to_print) - print to_print + print(to_print) else: - print hostl, asn, cc + print(hostl, asn, cc) A_values = lib_refine.checking_A_record(r_serv2, domains_list) @@ -146,7 +146,7 @@ if __name__ == "__main__": else: publisher.debug("Script url is Idling 10s") - print 'Sleeping' + print('Sleeping') time.sleep(10) message = p.get_from_set() diff --git a/bin/WebStats.py b/bin/WebStats.py index cbb52e7a..5edadc9f 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -33,7 +33,7 @@ def analyse(server, field_name, date, url_parsed): if field_name == "domain": #save domain in a set for the monthly plot domain_set_name = "domain_set_" + date[0:6] server.sadd(domain_set_name, field) - print "added in " + domain_set_name +": "+ field + print("added in " + domain_set_name +": "+ field) def get_date_range(num_day): curr_date = datetime.date.today() @@ -122,7 +122,7 @@ if __name__ == '__main__': p.config.get("Directories", "protocolstrending_csv")) protocolsfile_path = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "protocolsfile")) - + csv_path_tld = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "tldstrending_csv")) tldsfile_path = os.path.join(os.environ['AIL_HOME'], @@ -145,24 +145,25 @@ if __name__ == '__main__': year = today.year month = today.month - print 'Building protocol graph' + print('Building protocol graph') lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto, protocolsfile_path, year, month) - print 'Building tld graph' + print('Building tld graph') lib_words.create_curve_with_word_file(r_serv_trend, csv_path_tld, tldsfile_path, year, month) - print 'Building domain graph' + print('Building domain graph') lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain, "domain", year, month) - print 'end building' + print('end building') + publisher.debug("{} queue is empty, waiting".format(config_section)) - print 'sleeping' + print('sleeping') time.sleep(5*60) continue @@ -172,10 +173,14 @@ if __name__ == '__main__': url, date, path = message.split() faup.decode(url) url_parsed = faup.get() - - analyse(r_serv_trend, 'scheme', date, url_parsed) #Scheme analysis - analyse(r_serv_trend, 'tld', date, url_parsed) #Tld analysis - analyse(r_serv_trend, 'domain', date, url_parsed) #Domain analysis + + # Scheme analysis + analyse(r_serv_trend, 'scheme', date, url_parsed) + # Tld analysis + analyse(r_serv_trend, 'tld', date, url_parsed) + # Domain analysis + analyse(r_serv_trend, 'domain', date, url_parsed) + compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed) diff --git a/bin/alertHandler.py b/bin/alertHandler.py index ce473ed4..69338cdc 100755 --- a/bin/alertHandler.py +++ b/bin/alertHandler.py @@ -62,8 +62,9 @@ if __name__ == "__main__": while True: message = p.get_from_set() if message is not None: - message = message.decode('utf8') #decode because of pyhton3 + #decode because of pyhton3 module_name, p_path = message.split(';') + print("new alert : {}".format(module_name)) #PST = Paste.Paste(p_path) else: publisher.debug("Script Attribute is idling 10s") diff --git a/bin/empty_queue.py b/bin/empty_queue.py index f1b3c453..77d22c1f 100755 --- a/bin/empty_queue.py +++ b/bin/empty_queue.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ @@ -27,10 +27,9 @@ if __name__ == "__main__": config_section = ['Curve'] for queue in config_section: - print 'dropping: ' + queue + print('dropping: ' + queue) p = Process(queue) while True: message = p.get_from_set() if message is None: break - diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py index f6e64033..8f6130fe 100755 --- a/bin/feeder/pystemon-feeder.py +++ b/bin/feeder/pystemon-feeder.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*- coding: utf-8 -*- # # This file is part of AIL framework - Analysis Information Leak framework @@ -25,7 +25,7 @@ import time import redis import base64 import os -import ConfigParser +import configparser configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') if not os.path.exists(configfile): @@ -33,7 +33,7 @@ if not os.path.exists(configfile): Did you set environment variables? \ Or activate the virtualenv.') -cfg = ConfigParser.ConfigParser() +cfg = configparser.ConfigParser() cfg.read(configfile) if cfg.has_option("ZMQ_Global", "bind"): @@ -59,6 +59,7 @@ while True: time.sleep(base_sleeptime + sleep_inc) topic = 101 paste = r.lpop("pastes") + print(paste) if paste is None: continue socket.send("%d %s" % (topic, paste)) diff --git a/bin/feeder/test-zmq.py b/bin/feeder/test-zmq.py index 2bedf3fe..d9769fb5 100644 --- a/bin/feeder/test-zmq.py +++ b/bin/feeder/test-zmq.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*- coding: utf-8 -*- # # This file is part of AIL framework - Analysis Information Leak framework @@ -19,14 +19,15 @@ socket.connect ("tcp://crf.circl.lu:%s" % port) # 101 Name of the pastes only # 102 Full pastes in raw base64(gz) -topicfilter = "102" +topicfilter = b"102" socket.setsockopt(zmq.SUBSCRIBE, topicfilter) - +print('b0') while True: message = socket.recv() + print('b1') print (message) - if topicfilter == "102": + if topicfilter == b"102": topic, paste, messagedata = message.split() - print paste, messagedata + print(paste, messagedata) else: print (message) diff --git a/bin/import_dir.py b/bin/import_dir.py index 3d291db0..a8faba7f 100755 --- a/bin/import_dir.py +++ b/bin/import_dir.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*- coding: utf-8 -*- import zmq import base64 -import StringIO +from io import StringIO import gzip import argparse import os @@ -31,8 +31,7 @@ import mimetypes ' ''' -import StringIO -import gzip + def is_hierachy_valid(path): var = path.split('/') try: @@ -72,7 +71,12 @@ if __name__ == "__main__": wanted_path = wanted_path.split('/') wanted_path = '/'.join(wanted_path[-(4+args.hierarchy):]) - messagedata = open(complete_path).read() + with gzip.open(complete_path, 'rb') as f: + messagedata = f.read() + + #print(type(complete_path)) + #file = open(complete_path) + #messagedata = file.read() #if paste do not have a 'date hierarchy' ignore it if not is_hierachy_valid(complete_path): @@ -90,5 +94,8 @@ if __name__ == "__main__": print(args.name+'>'+wanted_path) path_to_send = args.name + '>' + wanted_path - socket.send('{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata))) + #s = b'{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata)) + # use bytes object + s = b' '.join( [ args.channel.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] ) + socket.send(s) time.sleep(args.seconds) diff --git a/bin/indexer_lookup.py b/bin/indexer_lookup.py index c7674d38..6642afce 100644 --- a/bin/indexer_lookup.py +++ b/bin/indexer_lookup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*- coding: utf-8 -*- # # This file is part of AIL framework - Analysis Information Leak framework @@ -10,7 +10,7 @@ # # Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be -import ConfigParser +import configparser import argparse import gzip import os @@ -23,7 +23,7 @@ def readdoc(path=None): return f.read() configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') -cfg = ConfigParser.ConfigParser() +cfg = configparser.ConfigParser() cfg.read(configfile) # Indexer configuration - index dir and schema setup @@ -51,7 +51,7 @@ ix = index.open_dir(indexpath) from whoosh.qparser import QueryParser if args.n: - print ix.doc_count_all() + print(ix.doc_count_all()) exit(0) if args.l: diff --git a/bin/launch_queues.py b/bin/launch_queues.py index e60a7b50..55cfe717 100755 --- a/bin/launch_queues.py +++ b/bin/launch_queues.py @@ -1,8 +1,8 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* -import ConfigParser +import configparser import os import subprocess import time @@ -23,21 +23,21 @@ if __name__ == '__main__': raise Exception('Unable to find the configuration file. \ Did you set environment variables? \ Or activate the virtualenv.') - config = ConfigParser.ConfigParser() + config = configparser.ConfigParser() config.read(configfile) modules = config.sections() pids = {} for module in modules: - pin = subprocess.Popen(["python", './QueueIn.py', '-c', module]) - pout = subprocess.Popen(["python", './QueueOut.py', '-c', module]) + pin = subprocess.Popen(["python3", './QueueIn.py', '-c', module]) + pout = subprocess.Popen(["python3", './QueueOut.py', '-c', module]) pids[module] = (pin, pout) is_running = True try: while is_running: time.sleep(5) is_running = False - for module, p in pids.iteritems(): + for module, p in pids.items(): pin, pout = p if pin is None: # already dead @@ -57,7 +57,7 @@ if __name__ == '__main__': is_running = True pids[module] = (pin, pout) except KeyboardInterrupt: - for module, p in pids.iteritems(): + for module, p in pids.items(): pin, pout = p if pin is not None: pin.kill() diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index 1debd33e..594b7286 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2.7 +#!/usr/bin/python3.5 """ The ``Paste Class`` @@ -24,15 +24,8 @@ import operator import string import re import json -try: # dirty to support python3 - import ConfigParser -except: - import configparser - ConfigParser = configparser -try: # dirty to support python3 - import cStringIO -except: - from io import StringIO as cStringIO +import configparser +from io import StringIO import sys sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) from Date import Date @@ -71,7 +64,7 @@ class Paste(object): Did you set environment variables? \ Or activate the virtualenv.') - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) self.cache = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), @@ -85,11 +78,15 @@ class Paste(object): self.p_path = p_path self.p_name = os.path.basename(self.p_path) self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2) + self.p_mime = magic.from_buffer("test", mime=True) self.p_mime = magic.from_buffer(self.get_p_content(), mime=True) # Assuming that the paste will alway be in a day folder which is itself # in a month folder which is itself in a year folder. # /year/month/day/paste.gz + + # TODO use bytes ? + var = self.p_path.split('/') self.p_date = Date(var[-4], var[-3], var[-2]) self.p_source = var[-5] @@ -117,17 +114,25 @@ class Paste(object): paste = self.cache.get(self.p_path) if paste is None: try: + #print('----------------------------------------------------------------') + #print(self.p_name) + #print('----------------------------------------------------------------') with gzip.open(self.p_path, 'rb') as f: paste = f.read() self.cache.set(self.p_path, paste) self.cache.expire(self.p_path, 300) except: - return '' - pass - return paste + paste = b'' + + return paste.decode('utf8') def get_p_content_as_file(self): - return cStringIO.StringIO(self.get_p_content()) + try: + message = StringIO( (self.get_p_content()).decode('utf8') ) + except AttributeError: + message = StringIO( (self.get_p_content()) ) + + return message def get_p_content_with_removed_lines(self, threshold): num_line_removed = 0 @@ -137,6 +142,7 @@ class Paste(object): line_id = 0 for line_id, line in enumerate(f): length = len(line) + if length < line_length_threshold: string_content += line else: @@ -202,8 +208,8 @@ class Paste(object): .. seealso:: _set_p_hash_kind("md5") """ - for hash_name, the_hash in self.p_hash_kind.iteritems(): - self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content()) + for hash_name, the_hash in self.p_hash_kind.items(): + self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content().encode('utf8')) return self.p_hash def _get_p_language(self): @@ -271,7 +277,7 @@ class Paste(object): return True, var else: return False, var - + def _get_p_duplicate(self): self.p_duplicate = self.store.hget(self.p_path, "p_duplicate") return self.p_duplicate if self.p_duplicate is not None else '[]' @@ -342,7 +348,7 @@ class Paste(object): tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', gaps=True, discard_empty=True) - blob = TextBlob(clean(self.get_p_content()), tokenizer=tokenizer) + blob = TextBlob(clean( (self.get_p_content()) ), tokenizer=tokenizer) for word in blob.tokens: if word in words.keys(): @@ -351,7 +357,7 @@ class Paste(object): num = 0 words[word] = num + 1 if sort: - var = sorted(words.iteritems(), key=operator.itemgetter(1), reverse=True) + var = sorted(words.items(), key=operator.itemgetter(1), reverse=True) else: var = words diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index 6606566c..f02119ad 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -76,7 +76,7 @@ def checking_MX_record(r_serv, adress_set): r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) except Exception as e: - print e + print(e) publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) return (num, WalidMX) @@ -125,7 +125,7 @@ def checking_A_record(r_serv, domains_set): publisher.debug('The Label is too long') except Exception as e: - print e + print(e) publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score)) return (num, WalidA) diff --git a/bin/packages/lib_words.py b/bin/packages/lib_words.py index e98609d7..2101f77d 100644 --- a/bin/packages/lib_words.py +++ b/bin/packages/lib_words.py @@ -82,16 +82,16 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month """ threshold = 50 - first_day = date(year, month, 01) + first_day = date(year, month, 1) last_day = date(year, month, calendar.monthrange(year, month)[1]) words = [] - with open(feederfilename, 'rb') as f: + with open(feederfilename, 'r') as f: # words of the files words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' and word.strip()!='' ]) headers = ['Date'] + words - with open(csvfilename+'.csv', 'wb') as f: + with open(csvfilename+'.csv', 'w') as f: writer = csv.writer(f) writer.writerow(headers) @@ -103,11 +103,14 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month # from the 1srt day to the last of the list for word in words: value = r_serv.hget(word, curdate) + if value is None: row.append(0) else: # if the word have a value for the day # FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold + value = r_serv.hget(word, curdate) + value = int(value.decode('utf8')) if value >= threshold: row.append(value) writer.writerow(row) @@ -127,14 +130,14 @@ def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month): """ - first_day = date(year, month, 01) + first_day = date(year, month, 1) last_day = date(year, month, calendar.monthrange(year, month)[1]) - + redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2) words = list(server.smembers(redis_set_name)) - + headers = ['Date'] + words - with open(csvfilename+'.csv', 'wb') as f: + with open(csvfilename+'.csv', 'w') as f: writer = csv.writer(f) writer.writerow(headers) diff --git a/bin/preProcessFeed.py b/bin/preProcessFeed.py index d9ef419d..5c7a346e 100755 --- a/bin/preProcessFeed.py +++ b/bin/preProcessFeed.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* ''' @@ -48,7 +48,7 @@ if __name__ == '__main__': message = p.get_from_set() if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) - print "queue empty" + print("queue empty") time.sleep(1) continue diff --git a/bin/template.py b/bin/template.py index 22489d16..3f93e827 100755 --- a/bin/template.py +++ b/bin/template.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* """ Template for new modules diff --git a/bin/tor_fetcher.py b/bin/tor_fetcher.py index bd3d72d3..f1e72e0c 100644 --- a/bin/tor_fetcher.py +++ b/bin/tor_fetcher.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3.5 # -*-coding:UTF-8 -* import socks @@ -63,5 +63,5 @@ if __name__ == "__main__": t, path = tempfile.mkstemp() with open(path, 'w') as f: f.write(to_write) - print path + print(path) exit(0)