diff --git a/bin/Attribute.py b/bin/Attributes.py similarity index 100% rename from bin/Attribute.py rename to bin/Attributes.py diff --git a/bin/CreditCard.py b/bin/CreditCards.py similarity index 100% rename from bin/CreditCard.py rename to bin/CreditCards.py diff --git a/bin/Curve_manage_top_sets.py b/bin/CurveManageTopSets.py similarity index 99% rename from bin/Curve_manage_top_sets.py rename to bin/CurveManageTopSets.py index a88ff8b4..a49d0a7d 100755 --- a/bin/Curve_manage_top_sets.py +++ b/bin/CurveManageTopSets.py @@ -22,6 +22,7 @@ from pubsublogger import publisher from packages import lib_words import datetime import calendar +import os import ConfigParser # Config Variables diff --git a/bin/Duplicate_ssdeep.py b/bin/Duplicate_ssdeep.py deleted file mode 100755 index 1b173eca..00000000 --- a/bin/Duplicate_ssdeep.py +++ /dev/null @@ -1,182 +0,0 @@ -#!/usr/bin/env python2 -# -*-coding:UTF-8 -* - -""" -The Duplicate module -==================== - -This huge module is, in short term, checking duplicates. - -Requirements: -------------- - - -""" -import redis -import os -import time -import datetime -import json -import ssdeep -from packages import Paste -from pubsublogger import publisher - -from Helper import Process - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Duplicates' - save_dico_and_reload = 1 #min - time_1 = time.time() - flag_reload_from_disk = True - flag_write_to_disk = False - - p = Process(config_section) - - # REDIS # - # DB OBJECT & HASHS ( DISK ) - # FIXME increase flexibility - dico_redis = {} - for year in xrange(2013, datetime.date.today().year+1): - for month in xrange(0, 16): - dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( - host=p.config.get("Redis_Level_DB", "host"), port=year, - db=month) - #print("dup: "+str(year)+str(month).zfill(2)+"\n") - - # FUNCTIONS # - publisher.info("Script duplicate started") - - dicopath = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "dicofilters")) - - dico_path_set = set() - while True: - try: - hash_dico = {} - dupl = [] - - x = time.time() - - message = p.get_from_set() - if message is not None: - path = message - PST = Paste.Paste(path) - else: - publisher.debug("Script Attribute is idling 10s") - time.sleep(10) - continue - - PST._set_p_hash_kind("ssdeep") - - # Assignate the correct redis connexion - r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month] - - # Creating the dicor name: dicoyyyymm - filedicopath = os.path.join(dicopath, 'dico' + PST.p_date.year + - PST.p_date.month) - filedicopath_today = filedicopath - - # Save I/O - if time.time() - time_1 > save_dico_and_reload*60: - flag_write_to_disk = True - - if os.path.exists(filedicopath): - if flag_reload_from_disk == True: - flag_reload_from_disk = False - print 'Reloading' - with open(filedicopath, 'r') as fp: - today_dico = json.load(fp) - else: - today_dico = {} - with open(filedicopath, 'w') as fp: - json.dump(today_dico, fp) - - # For now, just use monthly dico - dico_path_set.add(filedicopath) - - # UNIQUE INDEX HASHS TABLE - yearly_index = str(datetime.date.today().year)+'00' - r_serv0 = dico_redis[yearly_index] - r_serv0.incr("current_index") - index = r_serv0.get("current_index")+str(PST.p_date) - - # For each dico - opened_dico = [] - for dico in dico_path_set: - # Opening dico - if dico == filedicopath_today: - opened_dico.append([dico, today_dico]) - else: - with open(dico, 'r') as fp: - opened_dico.append([dico, json.load(fp)]) - - - #retrieve hash from paste - paste_hash = PST._get_p_hash() - - # Go throught the Database of the dico (of the month) - threshold_dup = 99 - for dico_name, dico in opened_dico: - for dico_key, dico_hash in dico.items(): - percent = ssdeep.compare(dico_hash, paste_hash) - if percent > threshold_dup: - db = dico_name[-6:] - # Go throught the Database of the dico filter (month) - r_serv_dico = dico_redis[db] - - # index of paste - index_current = r_serv_dico.get(dico_hash) - paste_path = r_serv_dico.get(index_current) - if paste_path != None: - hash_dico[dico_hash] = (paste_path, percent) - - #print 'comparing: ' + str(dico_hash[:20]) + ' and ' + str(paste_hash[:20]) + ' percentage: ' + str(percent) - print ' '+ PST.p_path[44:] +', '+ paste_path[44:] + ', ' + str(percent) - - # Add paste in DB to prevent its analyse twice - # HASHTABLES PER MONTH (because of r_serv1 changing db) - r_serv1.set(index, PST.p_path) - r_serv1.sadd("INDEX", index) - # Adding the hash in Redis - r_serv1.set(paste_hash, index) - r_serv1.sadd("HASHS", paste_hash) - ##################### Similarity found ####################### - - # if there is data in this dictionnary - if len(hash_dico) != 0: - for dico_hash, paste_tuple in hash_dico.items(): - paste_path, percent = paste_tuple - dupl.append((paste_path, percent)) - - # Creating the object attribute and save it. - to_print = 'Duplicate;{};{};{};'.format( - PST.p_source, PST.p_date, PST.p_name) - if dupl != []: - PST.__setattr__("p_duplicate", dupl) - PST.save_attribute_redis("p_duplicate", dupl) - publisher.info('{}Detected {}'.format(to_print, len(dupl))) - print '{}Detected {}'.format(to_print, len(dupl)) - - y = time.time() - - publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) - - - # Adding the hash in the dico of the month - today_dico[index] = paste_hash - - if flag_write_to_disk: - time_1 = time.time() - flag_write_to_disk = False - flag_reload_from_disk = True - print 'writing' - with open(filedicopath, 'w') as fp: - json.dump(today_dico, fp) - except IOError: - to_print = 'Duplicate;{};{};{};'.format( - PST.p_source, PST.p_date, PST.p_name) - print "CRC Checksum Failed on :", PST.p_path - publisher.error('{}CRC Checksum Failed'.format(to_print)) diff --git a/bin/Duplicate_ssdeep_v2.py b/bin/Duplicates.py similarity index 100% rename from bin/Duplicate_ssdeep_v2.py rename to bin/Duplicates.py diff --git a/bin/Duplicate.py b/bin/Duplicates_old.py similarity index 100% rename from bin/Duplicate.py rename to bin/Duplicates_old.py diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index fef9c3ce..75a3a0a5 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -114,31 +114,31 @@ function launching_scripts { screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x' + screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x' + screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x' + screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x' + screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x' + screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x' + screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x' + screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' sleep 0.1 @@ -158,7 +158,9 @@ function launching_scripts { sleep 0.1 screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x' + screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py; read x' } diff --git a/bin/Line.py b/bin/Lines.py similarity index 100% rename from bin/Line.py rename to bin/Lines.py diff --git a/bin/ModuleInformations.py b/bin/ModuleInformation.py similarity index 77% rename from bin/ModuleInformations.py rename to bin/ModuleInformation.py index 535e06b2..dce855b0 100755 --- a/bin/ModuleInformations.py +++ b/bin/ModuleInformation.py @@ -14,27 +14,48 @@ import json from prettytable import PrettyTable # CONFIG VARIABLES -threshold_stucked_module = 1*60*1 #1 hour +threshold_stucked_module = 60*60*1 #1 hour +refreshRate = 1 log_filename = "../logs/moduleInfo.log" command_search_pid = "ps a -o pid,cmd | grep {}" command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\"" -def kill_module(module): - print 'trying to kill module:', module - time.sleep(8) - - time.sleep(1) +def getPid(module): p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) - for line in p.stdout: splittedLine = line.split() if 'python2' in splittedLine: - pid = int(splittedLine[0]) - os.kill(pid, signal.SIGUSR1) - time.sleep(15) + return int(splittedLine[0]) + else: + return None + + +def kill_module(module): + print '' + print '-> trying to kill module:', module + + pid = getPid(module) + if pid is not None: + os.kill(pid, signal.SIGUSR1) + time.sleep(1) + if getPid(module) is None: + print module, 'has been killed' + print 'restarting', module, '...' p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + else: + print 'killing failed, retrying...' + time.sleep(3) + os.kill(pid, signal.SIGUSR1) + time.sleep(1) + if getPid(module) is None: + print module, 'has been killed' + print 'restarting', module, '...' + p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + else: + print 'killing failed!' + time.sleep(7) if __name__ == "__main__": @@ -86,4 +107,4 @@ if __name__ == "__main__": print '\n' print 'Ideling queues:\n' print table2 - time.sleep(5) + time.sleep(refreshRate) diff --git a/bin/SentimentAnalyser.py b/bin/SentimentAnalysis.py similarity index 100% rename from bin/SentimentAnalyser.py rename to bin/SentimentAnalysis.py diff --git a/bin/Url.py b/bin/Web.py similarity index 100% rename from bin/Url.py rename to bin/Web.py diff --git a/bin/launch_scripts.sh b/bin/launch_scripts.sh index ad55244a..1cdde370 100755 --- a/bin/launch_scripts.sh +++ b/bin/launch_scripts.sh @@ -8,50 +8,52 @@ sleep 0.1 echo -e $GREEN"\t* Launching ZMQ scripts"$DEFAULT -screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x' + screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py; read x'