mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
commit
034a558cba
23 changed files with 348 additions and 259 deletions
|
@ -66,7 +66,7 @@ if __name__ == "__main__":
|
|||
publisher.warning('{}Checked {} valid number(s)'.format(
|
||||
to_print, len(creditcard_set)))
|
||||
#Send to duplicate
|
||||
p.populate_set_out(filepath, 'Duplicate')
|
||||
p.populate_set_out(filename, 'Duplicate')
|
||||
#send to Browse_warning_paste
|
||||
p.populate_set_out('creditcard;{}'.format(filename), 'BrowseWarningPaste')
|
||||
else:
|
|
@ -22,8 +22,8 @@ from pubsublogger import publisher
|
|||
from packages import lib_words
|
||||
import datetime
|
||||
import calendar
|
||||
|
||||
from Helper import Process
|
||||
import os
|
||||
import ConfigParser
|
||||
|
||||
# Config Variables
|
||||
Refresh_rate = 60*5 #sec
|
||||
|
@ -96,13 +96,19 @@ if __name__ == '__main__':
|
|||
# Script is the default channel used for the modules.
|
||||
publisher.channel = 'Script'
|
||||
|
||||
config_section = 'CurveManageTopSets'
|
||||
p = Process(config_section)
|
||||
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||
if not os.path.exists(configfile):
|
||||
raise Exception('Unable to find the configuration file. \
|
||||
Did you set environment variables? \
|
||||
Or activate the virtualenv.')
|
||||
|
||||
cfg = ConfigParser.ConfigParser()
|
||||
cfg.read(configfile)
|
||||
|
||||
server_term = redis.StrictRedis(
|
||||
host=p.config.get("Redis_Level_DB_TermFreq", "host"),
|
||||
port=p.config.get("Redis_Level_DB_TermFreq", "port"),
|
||||
db=p.config.get("Redis_Level_DB_TermFreq", "db"))
|
||||
host=cfg.get("Redis_Level_DB_TermFreq", "host"),
|
||||
port=cfg.getint("Redis_Level_DB_TermFreq", "port"),
|
||||
db=cfg.getint("Redis_Level_DB_TermFreq", "db"))
|
||||
|
||||
publisher.info("Script Curve_manage_top_set started")
|
||||
|
||||
|
@ -113,11 +119,6 @@ if __name__ == '__main__':
|
|||
|
||||
while True:
|
||||
# Get one message from the input queue (module only work if linked with a queue)
|
||||
message = p.get_from_set()
|
||||
if message is None:
|
||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
||||
print 'sleeping'
|
||||
time.sleep(Refresh_rate) # sleep a long time then manage the set
|
||||
manage_top_set()
|
||||
continue
|
||||
time.sleep(Refresh_rate) # sleep a long time then manage the set
|
||||
manage_top_set()
|
||||
|
|
@ -1,182 +0,0 @@
|
|||
#!/usr/bin/env python2
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
"""
|
||||
The Duplicate module
|
||||
====================
|
||||
|
||||
This huge module is, in short term, checking duplicates.
|
||||
|
||||
Requirements:
|
||||
-------------
|
||||
|
||||
|
||||
"""
|
||||
import redis
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
import json
|
||||
import ssdeep
|
||||
from packages import Paste
|
||||
from pubsublogger import publisher
|
||||
|
||||
from Helper import Process
|
||||
|
||||
if __name__ == "__main__":
|
||||
publisher.port = 6380
|
||||
publisher.channel = "Script"
|
||||
|
||||
config_section = 'Duplicates'
|
||||
save_dico_and_reload = 1 #min
|
||||
time_1 = time.time()
|
||||
flag_reload_from_disk = True
|
||||
flag_write_to_disk = False
|
||||
|
||||
p = Process(config_section)
|
||||
|
||||
# REDIS #
|
||||
# DB OBJECT & HASHS ( DISK )
|
||||
# FIXME increase flexibility
|
||||
dico_redis = {}
|
||||
for year in xrange(2013, datetime.date.today().year+1):
|
||||
for month in xrange(0, 16):
|
||||
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
|
||||
host=p.config.get("Redis_Level_DB", "host"), port=year,
|
||||
db=month)
|
||||
#print("dup: "+str(year)+str(month).zfill(2)+"\n")
|
||||
|
||||
# FUNCTIONS #
|
||||
publisher.info("Script duplicate started")
|
||||
|
||||
dicopath = os.path.join(os.environ['AIL_HOME'],
|
||||
p.config.get("Directories", "dicofilters"))
|
||||
|
||||
dico_path_set = set()
|
||||
while True:
|
||||
try:
|
||||
hash_dico = {}
|
||||
dupl = []
|
||||
|
||||
x = time.time()
|
||||
|
||||
message = p.get_from_set()
|
||||
if message is not None:
|
||||
path = message
|
||||
PST = Paste.Paste(path)
|
||||
else:
|
||||
publisher.debug("Script Attribute is idling 10s")
|
||||
time.sleep(10)
|
||||
continue
|
||||
|
||||
PST._set_p_hash_kind("ssdeep")
|
||||
|
||||
# Assignate the correct redis connexion
|
||||
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
|
||||
|
||||
# Creating the dicor name: dicoyyyymm
|
||||
filedicopath = os.path.join(dicopath, 'dico' + PST.p_date.year +
|
||||
PST.p_date.month)
|
||||
filedicopath_today = filedicopath
|
||||
|
||||
# Save I/O
|
||||
if time.time() - time_1 > save_dico_and_reload*60:
|
||||
flag_write_to_disk = True
|
||||
|
||||
if os.path.exists(filedicopath):
|
||||
if flag_reload_from_disk == True:
|
||||
flag_reload_from_disk = False
|
||||
print 'Reloading'
|
||||
with open(filedicopath, 'r') as fp:
|
||||
today_dico = json.load(fp)
|
||||
else:
|
||||
today_dico = {}
|
||||
with open(filedicopath, 'w') as fp:
|
||||
json.dump(today_dico, fp)
|
||||
|
||||
# For now, just use monthly dico
|
||||
dico_path_set.add(filedicopath)
|
||||
|
||||
# UNIQUE INDEX HASHS TABLE
|
||||
yearly_index = str(datetime.date.today().year)+'00'
|
||||
r_serv0 = dico_redis[yearly_index]
|
||||
r_serv0.incr("current_index")
|
||||
index = r_serv0.get("current_index")+str(PST.p_date)
|
||||
|
||||
# For each dico
|
||||
opened_dico = []
|
||||
for dico in dico_path_set:
|
||||
# Opening dico
|
||||
if dico == filedicopath_today:
|
||||
opened_dico.append([dico, today_dico])
|
||||
else:
|
||||
with open(dico, 'r') as fp:
|
||||
opened_dico.append([dico, json.load(fp)])
|
||||
|
||||
|
||||
#retrieve hash from paste
|
||||
paste_hash = PST._get_p_hash()
|
||||
|
||||
# Go throught the Database of the dico (of the month)
|
||||
threshold_dup = 99
|
||||
for dico_name, dico in opened_dico:
|
||||
for dico_key, dico_hash in dico.items():
|
||||
percent = ssdeep.compare(dico_hash, paste_hash)
|
||||
if percent > threshold_dup:
|
||||
db = dico_name[-6:]
|
||||
# Go throught the Database of the dico filter (month)
|
||||
r_serv_dico = dico_redis[db]
|
||||
|
||||
# index of paste
|
||||
index_current = r_serv_dico.get(dico_hash)
|
||||
paste_path = r_serv_dico.get(index_current)
|
||||
if paste_path != None:
|
||||
hash_dico[dico_hash] = (paste_path, percent)
|
||||
|
||||
#print 'comparing: ' + str(dico_hash[:20]) + ' and ' + str(paste_hash[:20]) + ' percentage: ' + str(percent)
|
||||
print ' '+ PST.p_path[44:] +', '+ paste_path[44:] + ', ' + str(percent)
|
||||
|
||||
# Add paste in DB to prevent its analyse twice
|
||||
# HASHTABLES PER MONTH (because of r_serv1 changing db)
|
||||
r_serv1.set(index, PST.p_path)
|
||||
r_serv1.sadd("INDEX", index)
|
||||
# Adding the hash in Redis
|
||||
r_serv1.set(paste_hash, index)
|
||||
r_serv1.sadd("HASHS", paste_hash)
|
||||
##################### Similarity found #######################
|
||||
|
||||
# if there is data in this dictionnary
|
||||
if len(hash_dico) != 0:
|
||||
for dico_hash, paste_tuple in hash_dico.items():
|
||||
paste_path, percent = paste_tuple
|
||||
dupl.append((paste_path, percent))
|
||||
|
||||
# Creating the object attribute and save it.
|
||||
to_print = 'Duplicate;{};{};{};'.format(
|
||||
PST.p_source, PST.p_date, PST.p_name)
|
||||
if dupl != []:
|
||||
PST.__setattr__("p_duplicate", dupl)
|
||||
PST.save_attribute_redis("p_duplicate", dupl)
|
||||
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
|
||||
print '{}Detected {}'.format(to_print, len(dupl))
|
||||
|
||||
y = time.time()
|
||||
|
||||
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
|
||||
|
||||
|
||||
# Adding the hash in the dico of the month
|
||||
today_dico[index] = paste_hash
|
||||
|
||||
if flag_write_to_disk:
|
||||
time_1 = time.time()
|
||||
flag_write_to_disk = False
|
||||
flag_reload_from_disk = True
|
||||
print 'writing'
|
||||
with open(filedicopath, 'w') as fp:
|
||||
json.dump(today_dico, fp)
|
||||
except IOError:
|
||||
to_print = 'Duplicate;{};{};{};'.format(
|
||||
PST.p_source, PST.p_date, PST.p_name)
|
||||
print "CRC Checksum Failed on :", PST.p_path
|
||||
publisher.error('{}CRC Checksum Failed'.format(to_print))
|
|
@ -16,6 +16,7 @@ import ConfigParser
|
|||
import os
|
||||
import zmq
|
||||
import time
|
||||
import datetime
|
||||
import json
|
||||
|
||||
|
||||
|
@ -132,7 +133,25 @@ class Process(object):
|
|||
in_set = self.subscriber_name + 'in'
|
||||
self.r_temp.hset('queues', self.subscriber_name,
|
||||
int(self.r_temp.scard(in_set)))
|
||||
return self.r_temp.spop(in_set)
|
||||
message = self.r_temp.spop(in_set)
|
||||
timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
|
||||
dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes')
|
||||
|
||||
if message is None:
|
||||
return None
|
||||
|
||||
else:
|
||||
try:
|
||||
path = message.split(".")[-2].split("/")[-1]
|
||||
value = str(timestamp) + ", " + path
|
||||
self.r_temp.set("MODULE_"+self.subscriber_name, value)
|
||||
return message
|
||||
|
||||
except:
|
||||
path = "?"
|
||||
value = str(timestamp) + ", " + path
|
||||
self.r_temp.set("MODULE_"+self.subscriber_name, value)
|
||||
return message
|
||||
|
||||
def populate_set_out(self, msg, channel=None):
|
||||
# multiproc
|
||||
|
|
|
@ -114,31 +114,31 @@ function launching_scripts {
|
|||
|
||||
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
|
||||
screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
|
||||
screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x'
|
||||
screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x'
|
||||
screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x'
|
||||
screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x'
|
||||
screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x'
|
||||
screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
|
||||
sleep 0.1
|
||||
|
@ -158,7 +158,9 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x'
|
||||
screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py -k 0; read x'
|
||||
|
||||
}
|
||||
|
||||
|
|
155
bin/ModuleInformation.py
Executable file
155
bin/ModuleInformation.py
Executable file
|
@ -0,0 +1,155 @@
|
|||
#!/usr/bin/env python2
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import time
|
||||
import datetime
|
||||
import redis
|
||||
import os
|
||||
import signal
|
||||
import argparse
|
||||
from subprocess import PIPE, Popen
|
||||
import ConfigParser
|
||||
import json
|
||||
from terminaltables import AsciiTable
|
||||
import textwrap
|
||||
|
||||
# CONFIG VARIABLES
|
||||
threshold_stucked_module = 60*60*1 #1 hour
|
||||
log_filename = "../logs/moduleInfo.log"
|
||||
command_search_pid = "ps a -o pid,cmd | grep {}"
|
||||
command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\""
|
||||
|
||||
|
||||
def getPid(module):
|
||||
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
||||
for line in p.stdout:
|
||||
splittedLine = line.split()
|
||||
if 'python2' in splittedLine:
|
||||
return int(splittedLine[0])
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def kill_module(module):
|
||||
print ''
|
||||
print '-> trying to kill module:', module
|
||||
|
||||
pid = getPid(module)
|
||||
if pid is not None:
|
||||
os.kill(pid, signal.SIGUSR1)
|
||||
time.sleep(1)
|
||||
if getPid(module) is None:
|
||||
print module, 'has been killed'
|
||||
print 'restarting', module, '...'
|
||||
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
||||
|
||||
else:
|
||||
print 'killing failed, retrying...'
|
||||
time.sleep(3)
|
||||
os.kill(pid, signal.SIGUSR1)
|
||||
time.sleep(1)
|
||||
if getPid(module) is None:
|
||||
print module, 'has been killed'
|
||||
print 'restarting', module, '...'
|
||||
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
||||
else:
|
||||
print 'killing failed!'
|
||||
time.sleep(7)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.')
|
||||
parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate')
|
||||
parser.add_argument('-k', '--autokill', type=int, required=True, default=1, help='Enable auto kill option (1 for TRUE, anything else for FALSE)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||
if not os.path.exists(configfile):
|
||||
raise Exception('Unable to find the configuration file. \
|
||||
Did you set environment variables? \
|
||||
Or activate the virtualenv.')
|
||||
|
||||
cfg = ConfigParser.ConfigParser()
|
||||
cfg.read(configfile)
|
||||
|
||||
# REDIS #
|
||||
server = redis.StrictRedis(
|
||||
host=cfg.get("Redis_Queues", "host"),
|
||||
port=cfg.getint("Redis_Queues", "port"),
|
||||
db=cfg.getint("Redis_Queues", "db"))
|
||||
|
||||
while True:
|
||||
|
||||
num = 0
|
||||
printarray1 = []
|
||||
printarray2 = []
|
||||
for queue, card in server.hgetall("queues").iteritems():
|
||||
key = "MODULE_" + queue
|
||||
value = server.get(key)
|
||||
if value is not None:
|
||||
timestamp, path = value.split(", ")
|
||||
if timestamp is not None and path is not None:
|
||||
num += 1
|
||||
startTime_readable = datetime.datetime.fromtimestamp(int(timestamp))
|
||||
processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0]
|
||||
|
||||
if int(card) > 0:
|
||||
if int((datetime.datetime.now() - startTime_readable).total_seconds()) > threshold_stucked_module:
|
||||
log = open(log_filename, 'a')
|
||||
log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n")
|
||||
if args.autokill == 1:
|
||||
kill_module(queue)
|
||||
|
||||
printarray1.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)])
|
||||
|
||||
else:
|
||||
printarray2.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)])
|
||||
|
||||
printarray1.sort(lambda x,y: cmp(x[4], y[4]), reverse=True)
|
||||
printarray2.sort(lambda x,y: cmp(x[4], y[4]), reverse=True)
|
||||
printarray1.insert(0,["#", "Queue", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"])
|
||||
printarray2.insert(0,["#", "Queue", "Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"])
|
||||
|
||||
os.system('clear')
|
||||
t1 = AsciiTable(printarray1, title="Working queues")
|
||||
t1.column_max_width(1)
|
||||
if not t1.ok:
|
||||
longest_col = t1.column_widths.index(max(t1.column_widths))
|
||||
max_length_col = t1.column_max_width(longest_col)
|
||||
if max_length_col > 0:
|
||||
for i, content in enumerate(t1.table_data):
|
||||
if len(content[longest_col]) > max_length_col:
|
||||
temp = ''
|
||||
for l in content[longest_col].splitlines():
|
||||
if len(l) > max_length_col:
|
||||
temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
|
||||
else:
|
||||
temp += l + '\n'
|
||||
content[longest_col] = temp.strip()
|
||||
t1.table_data[i] = content
|
||||
|
||||
t2 = AsciiTable(printarray2, title="Idling queues")
|
||||
t2.column_max_width(1)
|
||||
if not t2.ok:
|
||||
longest_col = t2.column_widths.index(max(t2.column_widths))
|
||||
max_length_col = t2.column_max_width(longest_col)
|
||||
if max_length_col > 0:
|
||||
for i, content in enumerate(t2.table_data):
|
||||
if len(content[longest_col]) > max_length_col:
|
||||
temp = ''
|
||||
for l in content[longest_col].splitlines():
|
||||
if len(l) > max_length_col:
|
||||
temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
|
||||
else:
|
||||
temp += l + '\n'
|
||||
content[longest_col] = temp.strip()
|
||||
t2.table_data[i] = content
|
||||
|
||||
|
||||
print t1.table
|
||||
print '\n'
|
||||
print t2.table
|
||||
|
||||
time.sleep(args.refresh)
|
|
@ -77,12 +77,14 @@ def compute_progression(server, field_name, num_day, url_parsed):
|
|||
member_set = []
|
||||
for keyw in server.smembers(redis_progression_name_set):
|
||||
member_set.append((keyw, int(server.hget(redis_progression_name, keyw))))
|
||||
print member_set
|
||||
member_set.sort(key=lambda tup: tup[1])
|
||||
if member_set[0][1] < keyword_increase:
|
||||
print 'removing', member_set[0][0] + '('+str(member_set[0][1])+')', 'and adding', keyword, str(keyword_increase)
|
||||
#remove min from set and add the new one
|
||||
server.srem(redis_progression_name_set, member_set[0])
|
||||
server.srem(redis_progression_name_set, member_set[0][0])
|
||||
server.sadd(redis_progression_name_set, keyword)
|
||||
server.hdel(redis_progression_name, member_set[0][0])
|
||||
server.hset(redis_progression_name, keyword, keyword_increase)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -8,50 +8,52 @@ sleep 0.1
|
|||
|
||||
echo -e $GREEN"\t* Launching ZMQ scripts"$DEFAULT
|
||||
|
||||
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x'
|
||||
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py; read x'
|
||||
|
|
|
@ -34,7 +34,7 @@ subscribe = Redis_Global
|
|||
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve
|
||||
|
||||
[CreditCards]
|
||||
subscribe = Redis_CreditCard
|
||||
subscribe = Redis_CreditCards
|
||||
publish = Redis_Duplicate,Redis_ModuleStats,Redis_BrowseWarningPaste
|
||||
|
||||
[Mail]
|
||||
|
|
62
doc/generate_graph_data.py
Executable file
62
doc/generate_graph_data.py
Executable file
|
@ -0,0 +1,62 @@
|
|||
#!/usr/bin/env python2
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
content = ""
|
||||
modules = {}
|
||||
all_modules = []
|
||||
curr_module = ""
|
||||
streamingPub = {}
|
||||
streamingSub = {}
|
||||
|
||||
with open('../bin/packages/modules.cfg', 'r') as f:
|
||||
for line in f:
|
||||
if line[0] != '#':
|
||||
if line[0] == '[':
|
||||
curr_name = line.replace('[','').replace(']','').replace('\n', '').replace(' ', '')
|
||||
all_modules.append(curr_name)
|
||||
modules[curr_name] = {'sub': [], 'pub': []}
|
||||
curr_module = curr_name
|
||||
elif curr_module != "": # searching for sub or pub
|
||||
if line.startswith("subscribe"):
|
||||
curr_subscribers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')]
|
||||
modules[curr_module]['sub'] = curr_subscribers
|
||||
for sub in curr_subscribers:
|
||||
streamingSub[sub] = curr_module
|
||||
|
||||
elif line.startswith("publish"):
|
||||
curr_publishers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')]
|
||||
modules[curr_module]['pub'] = curr_publishers
|
||||
for pub in curr_publishers:
|
||||
streamingPub[pub] = curr_module
|
||||
else:
|
||||
continue
|
||||
|
||||
output_set_graph = set()
|
||||
|
||||
for module in modules.keys():
|
||||
for stream_in in modules[module]['sub']:
|
||||
if stream_in not in streamingPub.keys():
|
||||
output_set_graph.add("\"" + stream_in + "\" [color=darkorange1] ;\n")
|
||||
output_set_graph.add("\"" + stream_in + "\"" + "->" + module + ";\n")
|
||||
else:
|
||||
output_set_graph.add("\"" + streamingPub[stream_in] + "\"" + "->" + module + ";\n")
|
||||
|
||||
for stream_out in modules[module]['pub']:
|
||||
if stream_out not in streamingSub.keys():
|
||||
output_set_graph.add("\"" + stream_out + "\" [color=darkorange1] ;\n")
|
||||
output_set_graph.add("\"" + stream_out + "\"" + "->" + module + ";\n")
|
||||
else:
|
||||
output_set_graph.add("\"" + module + "\"" + "->" + streamingSub[stream_out] + ";\n")
|
||||
|
||||
|
||||
output_text_graph = ""
|
||||
output_text_graph += "digraph unix {\n"\
|
||||
"graph [pad=\"0.5\"];\n"\
|
||||
"size=\"25,25\";\n"\
|
||||
"node [color=lightblue2, style=filled];\n"
|
||||
|
||||
for elem in output_set_graph:
|
||||
output_text_graph += elem
|
||||
|
||||
output_text_graph += "}"
|
||||
print output_text_graph
|
3
doc/generate_modules_data_flow_graph.sh
Executable file
3
doc/generate_modules_data_flow_graph.sh
Executable file
|
@ -0,0 +1,3 @@
|
|||
#!/bin/bash
|
||||
|
||||
python generate_graph_data.py | dot -T png -o module-data-flow.png
|
BIN
doc/module-data-flow.png
Normal file
BIN
doc/module-data-flow.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 152 KiB |
Binary file not shown.
Before Width: | Height: | Size: 56 KiB |
|
@ -17,6 +17,9 @@ sudo apt-get install libadns1 libadns1-dev
|
|||
#Needed for redis-lvlDB
|
||||
sudo apt-get install libev-dev libgmp-dev
|
||||
|
||||
#Need for generate-data-flow graph
|
||||
sudo apt-get install graphviz
|
||||
|
||||
#needed for mathplotlib
|
||||
test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/
|
||||
sudo easy_install -U distribute
|
||||
|
@ -69,6 +72,7 @@ echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate
|
|||
|
||||
mkdir -p $AIL_HOME/{PASTES,Blooms,dumps}
|
||||
mkdir -p $AIL_HOME/LEVEL_DB_DATA/2016
|
||||
mkdir -p $AIL_HOME/LEVEL_DB_DATA/3016
|
||||
|
||||
pip install -U pip
|
||||
pip install -r pip_packages_requirement.txt
|
||||
|
|
|
@ -10,6 +10,7 @@ textblob
|
|||
numpy
|
||||
matplotlib
|
||||
networkx
|
||||
terminaltables
|
||||
|
||||
#Tokeniser
|
||||
nltk
|
||||
|
|
|
@ -81,8 +81,22 @@ def event_stream():
|
|||
|
||||
def get_queues(r):
|
||||
# We may want to put the llen in a pipeline to do only one query.
|
||||
return [(queue, int(card)) for queue, card in
|
||||
r.hgetall("queues").iteritems()]
|
||||
data = [(queue, int(card)) for queue, card in r.hgetall("queues").iteritems()]
|
||||
newData = []
|
||||
for queue, card in data:
|
||||
key = "MODULE_" + queue
|
||||
value = r.get(key)
|
||||
if value is not None:
|
||||
timestamp, path = value.split(", ")
|
||||
if timestamp is not None:
|
||||
startTime_readable = datetime.datetime.fromtimestamp(int(timestamp))
|
||||
processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0]
|
||||
seconds = int((datetime.datetime.now() - startTime_readable).total_seconds())
|
||||
newData.append( (queue, card, seconds) )
|
||||
else:
|
||||
newData.append( (queue, cards, 0) )
|
||||
|
||||
return newData
|
||||
|
||||
|
||||
def list_len(s):
|
||||
|
|
|
@ -221,11 +221,17 @@ function create_queue_table() {
|
|||
|
||||
for(i = 0; i < (glob_tabvar.row1).length;i++){
|
||||
var tr = document.createElement('TR')
|
||||
for(j = 0; j < (glob_tabvar.row1[i]).length; j++){
|
||||
for(j = 0; j < 2; j++){
|
||||
var td = document.createElement('TD')
|
||||
td.appendChild(document.createTextNode(glob_tabvar.row1[i][j]));
|
||||
tr.appendChild(td)
|
||||
}
|
||||
if (parseInt(glob_tabvar.row1[i][2]) > 60*2 && parseInt(glob_tabvar.row1[i][1]) > 2)
|
||||
tr.className += " danger";
|
||||
else if (parseInt(glob_tabvar.row1[i][2]) > 60*1)
|
||||
tr.className += " warning";
|
||||
else
|
||||
tr.className += " success";
|
||||
tableBody.appendChild(tr);
|
||||
}
|
||||
Tablediv.appendChild(table);
|
||||
|
|
Loading…
Reference in a new issue