Merge pull request #71 from mokaddem/multiple-modif

Multiple modif v2
This commit is contained in:
Raphaël Vinot 2016-08-25 14:32:01 +02:00 committed by GitHub
commit 034a558cba
23 changed files with 348 additions and 259 deletions

View file

@ -66,7 +66,7 @@ if __name__ == "__main__":
publisher.warning('{}Checked {} valid number(s)'.format(
to_print, len(creditcard_set)))
#Send to duplicate
p.populate_set_out(filepath, 'Duplicate')
p.populate_set_out(filename, 'Duplicate')
#send to Browse_warning_paste
p.populate_set_out('creditcard;{}'.format(filename), 'BrowseWarningPaste')
else:

View file

@ -22,8 +22,8 @@ from pubsublogger import publisher
from packages import lib_words
import datetime
import calendar
from Helper import Process
import os
import ConfigParser
# Config Variables
Refresh_rate = 60*5 #sec
@ -96,13 +96,19 @@ if __name__ == '__main__':
# Script is the default channel used for the modules.
publisher.channel = 'Script'
config_section = 'CurveManageTopSets'
p = Process(config_section)
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg.read(configfile)
server_term = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_TermFreq", "host"),
port=p.config.get("Redis_Level_DB_TermFreq", "port"),
db=p.config.get("Redis_Level_DB_TermFreq", "db"))
host=cfg.get("Redis_Level_DB_TermFreq", "host"),
port=cfg.getint("Redis_Level_DB_TermFreq", "port"),
db=cfg.getint("Redis_Level_DB_TermFreq", "db"))
publisher.info("Script Curve_manage_top_set started")
@ -113,11 +119,6 @@ if __name__ == '__main__':
while True:
# Get one message from the input queue (module only work if linked with a queue)
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping'
time.sleep(Refresh_rate) # sleep a long time then manage the set
manage_top_set()
continue
time.sleep(Refresh_rate) # sleep a long time then manage the set
manage_top_set()

View file

@ -1,182 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The Duplicate module
====================
This huge module is, in short term, checking duplicates.
Requirements:
-------------
"""
import redis
import os
import time
import datetime
import json
import ssdeep
from packages import Paste
from pubsublogger import publisher
from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Duplicates'
save_dico_and_reload = 1 #min
time_1 = time.time()
flag_reload_from_disk = True
flag_write_to_disk = False
p = Process(config_section)
# REDIS #
# DB OBJECT & HASHS ( DISK )
# FIXME increase flexibility
dico_redis = {}
for year in xrange(2013, datetime.date.today().year+1):
for month in xrange(0, 16):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"), port=year,
db=month)
#print("dup: "+str(year)+str(month).zfill(2)+"\n")
# FUNCTIONS #
publisher.info("Script duplicate started")
dicopath = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "dicofilters"))
dico_path_set = set()
while True:
try:
hash_dico = {}
dupl = []
x = time.time()
message = p.get_from_set()
if message is not None:
path = message
PST = Paste.Paste(path)
else:
publisher.debug("Script Attribute is idling 10s")
time.sleep(10)
continue
PST._set_p_hash_kind("ssdeep")
# Assignate the correct redis connexion
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
# Creating the dicor name: dicoyyyymm
filedicopath = os.path.join(dicopath, 'dico' + PST.p_date.year +
PST.p_date.month)
filedicopath_today = filedicopath
# Save I/O
if time.time() - time_1 > save_dico_and_reload*60:
flag_write_to_disk = True
if os.path.exists(filedicopath):
if flag_reload_from_disk == True:
flag_reload_from_disk = False
print 'Reloading'
with open(filedicopath, 'r') as fp:
today_dico = json.load(fp)
else:
today_dico = {}
with open(filedicopath, 'w') as fp:
json.dump(today_dico, fp)
# For now, just use monthly dico
dico_path_set.add(filedicopath)
# UNIQUE INDEX HASHS TABLE
yearly_index = str(datetime.date.today().year)+'00'
r_serv0 = dico_redis[yearly_index]
r_serv0.incr("current_index")
index = r_serv0.get("current_index")+str(PST.p_date)
# For each dico
opened_dico = []
for dico in dico_path_set:
# Opening dico
if dico == filedicopath_today:
opened_dico.append([dico, today_dico])
else:
with open(dico, 'r') as fp:
opened_dico.append([dico, json.load(fp)])
#retrieve hash from paste
paste_hash = PST._get_p_hash()
# Go throught the Database of the dico (of the month)
threshold_dup = 99
for dico_name, dico in opened_dico:
for dico_key, dico_hash in dico.items():
percent = ssdeep.compare(dico_hash, paste_hash)
if percent > threshold_dup:
db = dico_name[-6:]
# Go throught the Database of the dico filter (month)
r_serv_dico = dico_redis[db]
# index of paste
index_current = r_serv_dico.get(dico_hash)
paste_path = r_serv_dico.get(index_current)
if paste_path != None:
hash_dico[dico_hash] = (paste_path, percent)
#print 'comparing: ' + str(dico_hash[:20]) + ' and ' + str(paste_hash[:20]) + ' percentage: ' + str(percent)
print ' '+ PST.p_path[44:] +', '+ paste_path[44:] + ', ' + str(percent)
# Add paste in DB to prevent its analyse twice
# HASHTABLES PER MONTH (because of r_serv1 changing db)
r_serv1.set(index, PST.p_path)
r_serv1.sadd("INDEX", index)
# Adding the hash in Redis
r_serv1.set(paste_hash, index)
r_serv1.sadd("HASHS", paste_hash)
##################### Similarity found #######################
# if there is data in this dictionnary
if len(hash_dico) != 0:
for dico_hash, paste_tuple in hash_dico.items():
paste_path, percent = paste_tuple
dupl.append((paste_path, percent))
# Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
if dupl != []:
PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis("p_duplicate", dupl)
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
print '{}Detected {}'.format(to_print, len(dupl))
y = time.time()
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
# Adding the hash in the dico of the month
today_dico[index] = paste_hash
if flag_write_to_disk:
time_1 = time.time()
flag_write_to_disk = False
flag_reload_from_disk = True
print 'writing'
with open(filedicopath, 'w') as fp:
json.dump(today_dico, fp)
except IOError:
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
print "CRC Checksum Failed on :", PST.p_path
publisher.error('{}CRC Checksum Failed'.format(to_print))

View file

@ -16,6 +16,7 @@ import ConfigParser
import os
import zmq
import time
import datetime
import json
@ -132,7 +133,25 @@ class Process(object):
in_set = self.subscriber_name + 'in'
self.r_temp.hset('queues', self.subscriber_name,
int(self.r_temp.scard(in_set)))
return self.r_temp.spop(in_set)
message = self.r_temp.spop(in_set)
timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes')
if message is None:
return None
else:
try:
path = message.split(".")[-2].split("/")[-1]
value = str(timestamp) + ", " + path
self.r_temp.set("MODULE_"+self.subscriber_name, value)
return message
except:
path = "?"
value = str(timestamp) + ", " + path
self.r_temp.set("MODULE_"+self.subscriber_name, value)
return message
def populate_set_out(self, msg, channel=None):
# multiproc

View file

@ -114,31 +114,31 @@ function launching_scripts {
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x'
screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x'
screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x'
screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x'
screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x'
screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
sleep 0.1
@ -158,7 +158,9 @@ function launching_scripts {
sleep 0.1
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x'
screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py -k 0; read x'
}

155
bin/ModuleInformation.py Executable file
View file

@ -0,0 +1,155 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
import time
import datetime
import redis
import os
import signal
import argparse
from subprocess import PIPE, Popen
import ConfigParser
import json
from terminaltables import AsciiTable
import textwrap
# CONFIG VARIABLES
threshold_stucked_module = 60*60*1 #1 hour
log_filename = "../logs/moduleInfo.log"
command_search_pid = "ps a -o pid,cmd | grep {}"
command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\""
def getPid(module):
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in p.stdout:
splittedLine = line.split()
if 'python2' in splittedLine:
return int(splittedLine[0])
else:
return None
def kill_module(module):
print ''
print '-> trying to kill module:', module
pid = getPid(module)
if pid is not None:
os.kill(pid, signal.SIGUSR1)
time.sleep(1)
if getPid(module) is None:
print module, 'has been killed'
print 'restarting', module, '...'
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
else:
print 'killing failed, retrying...'
time.sleep(3)
os.kill(pid, signal.SIGUSR1)
time.sleep(1)
if getPid(module) is None:
print module, 'has been killed'
print 'restarting', module, '...'
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
else:
print 'killing failed!'
time.sleep(7)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.')
parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate')
parser.add_argument('-k', '--autokill', type=int, required=True, default=1, help='Enable auto kill option (1 for TRUE, anything else for FALSE)')
args = parser.parse_args()
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg.read(configfile)
# REDIS #
server = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
while True:
num = 0
printarray1 = []
printarray2 = []
for queue, card in server.hgetall("queues").iteritems():
key = "MODULE_" + queue
value = server.get(key)
if value is not None:
timestamp, path = value.split(", ")
if timestamp is not None and path is not None:
num += 1
startTime_readable = datetime.datetime.fromtimestamp(int(timestamp))
processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0]
if int(card) > 0:
if int((datetime.datetime.now() - startTime_readable).total_seconds()) > threshold_stucked_module:
log = open(log_filename, 'a')
log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n")
if args.autokill == 1:
kill_module(queue)
printarray1.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)])
else:
printarray2.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)])
printarray1.sort(lambda x,y: cmp(x[4], y[4]), reverse=True)
printarray2.sort(lambda x,y: cmp(x[4], y[4]), reverse=True)
printarray1.insert(0,["#", "Queue", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"])
printarray2.insert(0,["#", "Queue", "Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"])
os.system('clear')
t1 = AsciiTable(printarray1, title="Working queues")
t1.column_max_width(1)
if not t1.ok:
longest_col = t1.column_widths.index(max(t1.column_widths))
max_length_col = t1.column_max_width(longest_col)
if max_length_col > 0:
for i, content in enumerate(t1.table_data):
if len(content[longest_col]) > max_length_col:
temp = ''
for l in content[longest_col].splitlines():
if len(l) > max_length_col:
temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
else:
temp += l + '\n'
content[longest_col] = temp.strip()
t1.table_data[i] = content
t2 = AsciiTable(printarray2, title="Idling queues")
t2.column_max_width(1)
if not t2.ok:
longest_col = t2.column_widths.index(max(t2.column_widths))
max_length_col = t2.column_max_width(longest_col)
if max_length_col > 0:
for i, content in enumerate(t2.table_data):
if len(content[longest_col]) > max_length_col:
temp = ''
for l in content[longest_col].splitlines():
if len(l) > max_length_col:
temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
else:
temp += l + '\n'
content[longest_col] = temp.strip()
t2.table_data[i] = content
print t1.table
print '\n'
print t2.table
time.sleep(args.refresh)

View file

@ -77,12 +77,14 @@ def compute_progression(server, field_name, num_day, url_parsed):
member_set = []
for keyw in server.smembers(redis_progression_name_set):
member_set.append((keyw, int(server.hget(redis_progression_name, keyw))))
print member_set
member_set.sort(key=lambda tup: tup[1])
if member_set[0][1] < keyword_increase:
print 'removing', member_set[0][0] + '('+str(member_set[0][1])+')', 'and adding', keyword, str(keyword_increase)
#remove min from set and add the new one
server.srem(redis_progression_name_set, member_set[0])
server.srem(redis_progression_name_set, member_set[0][0])
server.sadd(redis_progression_name_set, keyword)
server.hdel(redis_progression_name, member_set[0][0])
server.hset(redis_progression_name, keyword, keyword_increase)
if __name__ == '__main__':

View file

@ -8,50 +8,52 @@ sleep 0.1
echo -e $GREEN"\t* Launching ZMQ scripts"$DEFAULT
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x'
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py; read x'

View file

@ -34,7 +34,7 @@ subscribe = Redis_Global
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve
[CreditCards]
subscribe = Redis_CreditCard
subscribe = Redis_CreditCards
publish = Redis_Duplicate,Redis_ModuleStats,Redis_BrowseWarningPaste
[Mail]

62
doc/generate_graph_data.py Executable file
View file

@ -0,0 +1,62 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
content = ""
modules = {}
all_modules = []
curr_module = ""
streamingPub = {}
streamingSub = {}
with open('../bin/packages/modules.cfg', 'r') as f:
for line in f:
if line[0] != '#':
if line[0] == '[':
curr_name = line.replace('[','').replace(']','').replace('\n', '').replace(' ', '')
all_modules.append(curr_name)
modules[curr_name] = {'sub': [], 'pub': []}
curr_module = curr_name
elif curr_module != "": # searching for sub or pub
if line.startswith("subscribe"):
curr_subscribers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')]
modules[curr_module]['sub'] = curr_subscribers
for sub in curr_subscribers:
streamingSub[sub] = curr_module
elif line.startswith("publish"):
curr_publishers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')]
modules[curr_module]['pub'] = curr_publishers
for pub in curr_publishers:
streamingPub[pub] = curr_module
else:
continue
output_set_graph = set()
for module in modules.keys():
for stream_in in modules[module]['sub']:
if stream_in not in streamingPub.keys():
output_set_graph.add("\"" + stream_in + "\" [color=darkorange1] ;\n")
output_set_graph.add("\"" + stream_in + "\"" + "->" + module + ";\n")
else:
output_set_graph.add("\"" + streamingPub[stream_in] + "\"" + "->" + module + ";\n")
for stream_out in modules[module]['pub']:
if stream_out not in streamingSub.keys():
output_set_graph.add("\"" + stream_out + "\" [color=darkorange1] ;\n")
output_set_graph.add("\"" + stream_out + "\"" + "->" + module + ";\n")
else:
output_set_graph.add("\"" + module + "\"" + "->" + streamingSub[stream_out] + ";\n")
output_text_graph = ""
output_text_graph += "digraph unix {\n"\
"graph [pad=\"0.5\"];\n"\
"size=\"25,25\";\n"\
"node [color=lightblue2, style=filled];\n"
for elem in output_set_graph:
output_text_graph += elem
output_text_graph += "}"
print output_text_graph

View file

@ -0,0 +1,3 @@
#!/bin/bash
python generate_graph_data.py | dot -T png -o module-data-flow.png

BIN
doc/module-data-flow.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 56 KiB

View file

@ -17,6 +17,9 @@ sudo apt-get install libadns1 libadns1-dev
#Needed for redis-lvlDB
sudo apt-get install libev-dev libgmp-dev
#Need for generate-data-flow graph
sudo apt-get install graphviz
#needed for mathplotlib
test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/
sudo easy_install -U distribute
@ -69,6 +72,7 @@ echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate
mkdir -p $AIL_HOME/{PASTES,Blooms,dumps}
mkdir -p $AIL_HOME/LEVEL_DB_DATA/2016
mkdir -p $AIL_HOME/LEVEL_DB_DATA/3016
pip install -U pip
pip install -r pip_packages_requirement.txt

View file

@ -10,6 +10,7 @@ textblob
numpy
matplotlib
networkx
terminaltables
#Tokeniser
nltk

View file

@ -81,8 +81,22 @@ def event_stream():
def get_queues(r):
# We may want to put the llen in a pipeline to do only one query.
return [(queue, int(card)) for queue, card in
r.hgetall("queues").iteritems()]
data = [(queue, int(card)) for queue, card in r.hgetall("queues").iteritems()]
newData = []
for queue, card in data:
key = "MODULE_" + queue
value = r.get(key)
if value is not None:
timestamp, path = value.split(", ")
if timestamp is not None:
startTime_readable = datetime.datetime.fromtimestamp(int(timestamp))
processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0]
seconds = int((datetime.datetime.now() - startTime_readable).total_seconds())
newData.append( (queue, card, seconds) )
else:
newData.append( (queue, cards, 0) )
return newData
def list_len(s):

View file

@ -221,11 +221,17 @@ function create_queue_table() {
for(i = 0; i < (glob_tabvar.row1).length;i++){
var tr = document.createElement('TR')
for(j = 0; j < (glob_tabvar.row1[i]).length; j++){
for(j = 0; j < 2; j++){
var td = document.createElement('TD')
td.appendChild(document.createTextNode(glob_tabvar.row1[i][j]));
tr.appendChild(td)
}
if (parseInt(glob_tabvar.row1[i][2]) > 60*2 && parseInt(glob_tabvar.row1[i][1]) > 2)
tr.className += " danger";
else if (parseInt(glob_tabvar.row1[i][2]) > 60*1)
tr.className += " warning";
else
tr.className += " success";
tableBody.appendChild(tr);
}
Tablediv.appendChild(table);