mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
Added moduleInfo + Renamed modules
This commit is contained in:
parent
8fb552b7da
commit
64bf481480
12 changed files with 92 additions and 248 deletions
|
@ -22,6 +22,7 @@ from pubsublogger import publisher
|
||||||
from packages import lib_words
|
from packages import lib_words
|
||||||
import datetime
|
import datetime
|
||||||
import calendar
|
import calendar
|
||||||
|
import os
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
|
|
||||||
# Config Variables
|
# Config Variables
|
|
@ -1,182 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
"""
|
|
||||||
The Duplicate module
|
|
||||||
====================
|
|
||||||
|
|
||||||
This huge module is, in short term, checking duplicates.
|
|
||||||
|
|
||||||
Requirements:
|
|
||||||
-------------
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
import redis
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
import json
|
|
||||||
import ssdeep
|
|
||||||
from packages import Paste
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
from Helper import Process
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
config_section = 'Duplicates'
|
|
||||||
save_dico_and_reload = 1 #min
|
|
||||||
time_1 = time.time()
|
|
||||||
flag_reload_from_disk = True
|
|
||||||
flag_write_to_disk = False
|
|
||||||
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
# DB OBJECT & HASHS ( DISK )
|
|
||||||
# FIXME increase flexibility
|
|
||||||
dico_redis = {}
|
|
||||||
for year in xrange(2013, datetime.date.today().year+1):
|
|
||||||
for month in xrange(0, 16):
|
|
||||||
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
|
|
||||||
host=p.config.get("Redis_Level_DB", "host"), port=year,
|
|
||||||
db=month)
|
|
||||||
#print("dup: "+str(year)+str(month).zfill(2)+"\n")
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("Script duplicate started")
|
|
||||||
|
|
||||||
dicopath = os.path.join(os.environ['AIL_HOME'],
|
|
||||||
p.config.get("Directories", "dicofilters"))
|
|
||||||
|
|
||||||
dico_path_set = set()
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
hash_dico = {}
|
|
||||||
dupl = []
|
|
||||||
|
|
||||||
x = time.time()
|
|
||||||
|
|
||||||
message = p.get_from_set()
|
|
||||||
if message is not None:
|
|
||||||
path = message
|
|
||||||
PST = Paste.Paste(path)
|
|
||||||
else:
|
|
||||||
publisher.debug("Script Attribute is idling 10s")
|
|
||||||
time.sleep(10)
|
|
||||||
continue
|
|
||||||
|
|
||||||
PST._set_p_hash_kind("ssdeep")
|
|
||||||
|
|
||||||
# Assignate the correct redis connexion
|
|
||||||
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
|
|
||||||
|
|
||||||
# Creating the dicor name: dicoyyyymm
|
|
||||||
filedicopath = os.path.join(dicopath, 'dico' + PST.p_date.year +
|
|
||||||
PST.p_date.month)
|
|
||||||
filedicopath_today = filedicopath
|
|
||||||
|
|
||||||
# Save I/O
|
|
||||||
if time.time() - time_1 > save_dico_and_reload*60:
|
|
||||||
flag_write_to_disk = True
|
|
||||||
|
|
||||||
if os.path.exists(filedicopath):
|
|
||||||
if flag_reload_from_disk == True:
|
|
||||||
flag_reload_from_disk = False
|
|
||||||
print 'Reloading'
|
|
||||||
with open(filedicopath, 'r') as fp:
|
|
||||||
today_dico = json.load(fp)
|
|
||||||
else:
|
|
||||||
today_dico = {}
|
|
||||||
with open(filedicopath, 'w') as fp:
|
|
||||||
json.dump(today_dico, fp)
|
|
||||||
|
|
||||||
# For now, just use monthly dico
|
|
||||||
dico_path_set.add(filedicopath)
|
|
||||||
|
|
||||||
# UNIQUE INDEX HASHS TABLE
|
|
||||||
yearly_index = str(datetime.date.today().year)+'00'
|
|
||||||
r_serv0 = dico_redis[yearly_index]
|
|
||||||
r_serv0.incr("current_index")
|
|
||||||
index = r_serv0.get("current_index")+str(PST.p_date)
|
|
||||||
|
|
||||||
# For each dico
|
|
||||||
opened_dico = []
|
|
||||||
for dico in dico_path_set:
|
|
||||||
# Opening dico
|
|
||||||
if dico == filedicopath_today:
|
|
||||||
opened_dico.append([dico, today_dico])
|
|
||||||
else:
|
|
||||||
with open(dico, 'r') as fp:
|
|
||||||
opened_dico.append([dico, json.load(fp)])
|
|
||||||
|
|
||||||
|
|
||||||
#retrieve hash from paste
|
|
||||||
paste_hash = PST._get_p_hash()
|
|
||||||
|
|
||||||
# Go throught the Database of the dico (of the month)
|
|
||||||
threshold_dup = 99
|
|
||||||
for dico_name, dico in opened_dico:
|
|
||||||
for dico_key, dico_hash in dico.items():
|
|
||||||
percent = ssdeep.compare(dico_hash, paste_hash)
|
|
||||||
if percent > threshold_dup:
|
|
||||||
db = dico_name[-6:]
|
|
||||||
# Go throught the Database of the dico filter (month)
|
|
||||||
r_serv_dico = dico_redis[db]
|
|
||||||
|
|
||||||
# index of paste
|
|
||||||
index_current = r_serv_dico.get(dico_hash)
|
|
||||||
paste_path = r_serv_dico.get(index_current)
|
|
||||||
if paste_path != None:
|
|
||||||
hash_dico[dico_hash] = (paste_path, percent)
|
|
||||||
|
|
||||||
#print 'comparing: ' + str(dico_hash[:20]) + ' and ' + str(paste_hash[:20]) + ' percentage: ' + str(percent)
|
|
||||||
print ' '+ PST.p_path[44:] +', '+ paste_path[44:] + ', ' + str(percent)
|
|
||||||
|
|
||||||
# Add paste in DB to prevent its analyse twice
|
|
||||||
# HASHTABLES PER MONTH (because of r_serv1 changing db)
|
|
||||||
r_serv1.set(index, PST.p_path)
|
|
||||||
r_serv1.sadd("INDEX", index)
|
|
||||||
# Adding the hash in Redis
|
|
||||||
r_serv1.set(paste_hash, index)
|
|
||||||
r_serv1.sadd("HASHS", paste_hash)
|
|
||||||
##################### Similarity found #######################
|
|
||||||
|
|
||||||
# if there is data in this dictionnary
|
|
||||||
if len(hash_dico) != 0:
|
|
||||||
for dico_hash, paste_tuple in hash_dico.items():
|
|
||||||
paste_path, percent = paste_tuple
|
|
||||||
dupl.append((paste_path, percent))
|
|
||||||
|
|
||||||
# Creating the object attribute and save it.
|
|
||||||
to_print = 'Duplicate;{};{};{};'.format(
|
|
||||||
PST.p_source, PST.p_date, PST.p_name)
|
|
||||||
if dupl != []:
|
|
||||||
PST.__setattr__("p_duplicate", dupl)
|
|
||||||
PST.save_attribute_redis("p_duplicate", dupl)
|
|
||||||
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
|
|
||||||
print '{}Detected {}'.format(to_print, len(dupl))
|
|
||||||
|
|
||||||
y = time.time()
|
|
||||||
|
|
||||||
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
|
|
||||||
|
|
||||||
|
|
||||||
# Adding the hash in the dico of the month
|
|
||||||
today_dico[index] = paste_hash
|
|
||||||
|
|
||||||
if flag_write_to_disk:
|
|
||||||
time_1 = time.time()
|
|
||||||
flag_write_to_disk = False
|
|
||||||
flag_reload_from_disk = True
|
|
||||||
print 'writing'
|
|
||||||
with open(filedicopath, 'w') as fp:
|
|
||||||
json.dump(today_dico, fp)
|
|
||||||
except IOError:
|
|
||||||
to_print = 'Duplicate;{};{};{};'.format(
|
|
||||||
PST.p_source, PST.p_date, PST.p_name)
|
|
||||||
print "CRC Checksum Failed on :", PST.p_path
|
|
||||||
publisher.error('{}CRC Checksum Failed'.format(to_print))
|
|
|
@ -114,31 +114,31 @@ function launching_scripts {
|
||||||
|
|
||||||
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
|
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
|
screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
|
screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x'
|
screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x'
|
screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
|
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
|
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x'
|
screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
|
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
|
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x'
|
screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
|
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
|
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x'
|
screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
|
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
|
@ -158,7 +158,9 @@ function launching_scripts {
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
|
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x'
|
screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'
|
||||||
|
sleep 0.1
|
||||||
|
screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py; read x'
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,27 +14,48 @@ import json
|
||||||
from prettytable import PrettyTable
|
from prettytable import PrettyTable
|
||||||
|
|
||||||
# CONFIG VARIABLES
|
# CONFIG VARIABLES
|
||||||
threshold_stucked_module = 1*60*1 #1 hour
|
threshold_stucked_module = 60*60*1 #1 hour
|
||||||
|
refreshRate = 1
|
||||||
log_filename = "../logs/moduleInfo.log"
|
log_filename = "../logs/moduleInfo.log"
|
||||||
command_search_pid = "ps a -o pid,cmd | grep {}"
|
command_search_pid = "ps a -o pid,cmd | grep {}"
|
||||||
command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\""
|
command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\""
|
||||||
|
|
||||||
|
|
||||||
def kill_module(module):
|
def getPid(module):
|
||||||
print 'trying to kill module:', module
|
|
||||||
time.sleep(8)
|
|
||||||
|
|
||||||
time.sleep(1)
|
|
||||||
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
||||||
|
|
||||||
for line in p.stdout:
|
for line in p.stdout:
|
||||||
splittedLine = line.split()
|
splittedLine = line.split()
|
||||||
if 'python2' in splittedLine:
|
if 'python2' in splittedLine:
|
||||||
pid = int(splittedLine[0])
|
return int(splittedLine[0])
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def kill_module(module):
|
||||||
|
print ''
|
||||||
|
print '-> trying to kill module:', module
|
||||||
|
|
||||||
|
pid = getPid(module)
|
||||||
|
if pid is not None:
|
||||||
os.kill(pid, signal.SIGUSR1)
|
os.kill(pid, signal.SIGUSR1)
|
||||||
time.sleep(15)
|
time.sleep(1)
|
||||||
|
if getPid(module) is None:
|
||||||
|
print module, 'has been killed'
|
||||||
|
print 'restarting', module, '...'
|
||||||
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print 'killing failed, retrying...'
|
||||||
|
time.sleep(3)
|
||||||
|
os.kill(pid, signal.SIGUSR1)
|
||||||
|
time.sleep(1)
|
||||||
|
if getPid(module) is None:
|
||||||
|
print module, 'has been killed'
|
||||||
|
print 'restarting', module, '...'
|
||||||
|
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
||||||
|
else:
|
||||||
|
print 'killing failed!'
|
||||||
|
time.sleep(7)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -86,4 +107,4 @@ if __name__ == "__main__":
|
||||||
print '\n'
|
print '\n'
|
||||||
print 'Ideling queues:\n'
|
print 'Ideling queues:\n'
|
||||||
print table2
|
print table2
|
||||||
time.sleep(5)
|
time.sleep(refreshRate)
|
|
@ -10,31 +10,31 @@ echo -e $GREEN"\t* Launching ZMQ scripts"$DEFAULT
|
||||||
|
|
||||||
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
|
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
|
screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
|
screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x'
|
screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x'
|
screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
|
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
|
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x'
|
screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
|
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
|
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x'
|
screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
|
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
|
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x'
|
screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
|
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
|
@ -54,4 +54,6 @@ screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionD
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
|
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x'
|
screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'
|
||||||
|
sleep 0.1
|
||||||
|
screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py; read x'
|
||||||
|
|
Loading…
Reference in a new issue