Merge branch 'master' of github.com:CIRCL/AIL-framework

This commit is contained in:
Alexandre Dulaunoy 2016-08-25 12:33:31 +00:00
commit 1ab08d4a05
30 changed files with 383 additions and 262 deletions

View file

@ -9,8 +9,34 @@ AIL framework - Framework for Analysis of Information Leaks
AIL is a modular framework to analyse potential information leaks from unstructured data sources like pastes from Pastebin or similar services. AIL framework is flexible and can be extended to support other functionalities to mine sensitive information.
![Dashboard](./doc/screenshots/DashboardAIL.png?raw=true "AIL framework dashboard")
![Trending](./doc/screenshots/WordtrendingAIL.png?raw=true "AIL framework wordtrending")
![Dashboard](./doc/screenshots/dashboard.png?raw=true "AIL framework dashboard")
Trending charts
---------------
![Trending-Web](./doc/screenshots/trending-web.png?raw=true "AIL framework webtrending")
![Trending-Modules](./doc/screenshots/trending-module.png?raw=true "AIL framework modulestrending")
Browsing
--------
![Browse-Pastes](./doc/screenshots/browse-important.png?raw=true "AIL framework browseImportantPastes")
Sentiment analysis
------------------
![Sentiment](./doc/screenshots/sentiment.png?raw=true "AIL framework sentimentanalysis")
Terms manager and occurence
---------------------------
![Term-Manager](./doc/screenshots/terms-manager.png?raw=true "AIL framework termManager")
## Top terms
![Term-Top](./doc/screenshots/terms-top.png?raw=true "AIL framework termTop")
![Term-Plot](./doc/screenshots/terms-plot.png?raw=true "AIL framework termPlot")
AIL framework screencast: https://www.youtube.com/watch?v=9idfHCIMzBY
@ -26,6 +52,9 @@ Features
* Module for extracting Tor .onion addresses (to be further processed for analysis)
* Extracting and validating potential hostnames (e.g. to feed Passive DNS systems)
* A full-text indexer module to index unstructured information
* Modules and web statistics
* Global sentiment analysis for each providers based on nltk vader module
* Terms tracking and occurence
* Many more modules for extracting phone numbers, credentials and others
Installation
@ -48,6 +77,7 @@ linux based distributions, you can replace it with [installing_deps_archlinux.sh
There is also a [Travis file](.travis.yml) used for automating the installation that can be used to build and install AIL on other systems.
Starting AIL web interface
--------------------------
@ -94,6 +124,7 @@ Redis and LevelDB overview
* DB 0 - Cache hostname/dns
* Redis on TCP port 6380 - Redis Pub-Sub only
* Redis on TCP port 6381 - DB 0 - Queue and Paste content LRU cache
* Redis on TCP port 6382 - DB 1-4 - Trending, terms and sentiments
* LevelDB on TCP port <year> - Lines duplicate
LICENSE

View file

@ -66,7 +66,7 @@ if __name__ == "__main__":
publisher.warning('{}Checked {} valid number(s)'.format(
to_print, len(creditcard_set)))
#Send to duplicate
p.populate_set_out(filepath, 'Duplicate')
p.populate_set_out(filename, 'Duplicate')
#send to Browse_warning_paste
p.populate_set_out('creditcard;{}'.format(filename), 'BrowseWarningPaste')
else:

View file

@ -22,8 +22,8 @@ from pubsublogger import publisher
from packages import lib_words
import datetime
import calendar
from Helper import Process
import os
import ConfigParser
# Config Variables
Refresh_rate = 60*5 #sec
@ -96,13 +96,19 @@ if __name__ == '__main__':
# Script is the default channel used for the modules.
publisher.channel = 'Script'
config_section = 'CurveManageTopSets'
p = Process(config_section)
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg.read(configfile)
server_term = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_TermFreq", "host"),
port=p.config.get("Redis_Level_DB_TermFreq", "port"),
db=p.config.get("Redis_Level_DB_TermFreq", "db"))
host=cfg.get("Redis_Level_DB_TermFreq", "host"),
port=cfg.getint("Redis_Level_DB_TermFreq", "port"),
db=cfg.getint("Redis_Level_DB_TermFreq", "db"))
publisher.info("Script Curve_manage_top_set started")
@ -113,11 +119,6 @@ if __name__ == '__main__':
while True:
# Get one message from the input queue (module only work if linked with a queue)
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping'
time.sleep(Refresh_rate) # sleep a long time then manage the set
manage_top_set()
continue
time.sleep(Refresh_rate) # sleep a long time then manage the set
manage_top_set()

View file

@ -1,182 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The Duplicate module
====================
This huge module is, in short term, checking duplicates.
Requirements:
-------------
"""
import redis
import os
import time
import datetime
import json
import ssdeep
from packages import Paste
from pubsublogger import publisher
from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Duplicates'
save_dico_and_reload = 1 #min
time_1 = time.time()
flag_reload_from_disk = True
flag_write_to_disk = False
p = Process(config_section)
# REDIS #
# DB OBJECT & HASHS ( DISK )
# FIXME increase flexibility
dico_redis = {}
for year in xrange(2013, datetime.date.today().year+1):
for month in xrange(0, 16):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"), port=year,
db=month)
#print("dup: "+str(year)+str(month).zfill(2)+"\n")
# FUNCTIONS #
publisher.info("Script duplicate started")
dicopath = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "dicofilters"))
dico_path_set = set()
while True:
try:
hash_dico = {}
dupl = []
x = time.time()
message = p.get_from_set()
if message is not None:
path = message
PST = Paste.Paste(path)
else:
publisher.debug("Script Attribute is idling 10s")
time.sleep(10)
continue
PST._set_p_hash_kind("ssdeep")
# Assignate the correct redis connexion
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
# Creating the dicor name: dicoyyyymm
filedicopath = os.path.join(dicopath, 'dico' + PST.p_date.year +
PST.p_date.month)
filedicopath_today = filedicopath
# Save I/O
if time.time() - time_1 > save_dico_and_reload*60:
flag_write_to_disk = True
if os.path.exists(filedicopath):
if flag_reload_from_disk == True:
flag_reload_from_disk = False
print 'Reloading'
with open(filedicopath, 'r') as fp:
today_dico = json.load(fp)
else:
today_dico = {}
with open(filedicopath, 'w') as fp:
json.dump(today_dico, fp)
# For now, just use monthly dico
dico_path_set.add(filedicopath)
# UNIQUE INDEX HASHS TABLE
yearly_index = str(datetime.date.today().year)+'00'
r_serv0 = dico_redis[yearly_index]
r_serv0.incr("current_index")
index = r_serv0.get("current_index")+str(PST.p_date)
# For each dico
opened_dico = []
for dico in dico_path_set:
# Opening dico
if dico == filedicopath_today:
opened_dico.append([dico, today_dico])
else:
with open(dico, 'r') as fp:
opened_dico.append([dico, json.load(fp)])
#retrieve hash from paste
paste_hash = PST._get_p_hash()
# Go throught the Database of the dico (of the month)
threshold_dup = 99
for dico_name, dico in opened_dico:
for dico_key, dico_hash in dico.items():
percent = ssdeep.compare(dico_hash, paste_hash)
if percent > threshold_dup:
db = dico_name[-6:]
# Go throught the Database of the dico filter (month)
r_serv_dico = dico_redis[db]
# index of paste
index_current = r_serv_dico.get(dico_hash)
paste_path = r_serv_dico.get(index_current)
if paste_path != None:
hash_dico[dico_hash] = (paste_path, percent)
#print 'comparing: ' + str(dico_hash[:20]) + ' and ' + str(paste_hash[:20]) + ' percentage: ' + str(percent)
print ' '+ PST.p_path[44:] +', '+ paste_path[44:] + ', ' + str(percent)
# Add paste in DB to prevent its analyse twice
# HASHTABLES PER MONTH (because of r_serv1 changing db)
r_serv1.set(index, PST.p_path)
r_serv1.sadd("INDEX", index)
# Adding the hash in Redis
r_serv1.set(paste_hash, index)
r_serv1.sadd("HASHS", paste_hash)
##################### Similarity found #######################
# if there is data in this dictionnary
if len(hash_dico) != 0:
for dico_hash, paste_tuple in hash_dico.items():
paste_path, percent = paste_tuple
dupl.append((paste_path, percent))
# Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
if dupl != []:
PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis("p_duplicate", dupl)
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
print '{}Detected {}'.format(to_print, len(dupl))
y = time.time()
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
# Adding the hash in the dico of the month
today_dico[index] = paste_hash
if flag_write_to_disk:
time_1 = time.time()
flag_write_to_disk = False
flag_reload_from_disk = True
print 'writing'
with open(filedicopath, 'w') as fp:
json.dump(today_dico, fp)
except IOError:
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
print "CRC Checksum Failed on :", PST.p_path
publisher.error('{}CRC Checksum Failed'.format(to_print))

View file

@ -16,6 +16,7 @@ import ConfigParser
import os
import zmq
import time
import datetime
import json
@ -132,7 +133,25 @@ class Process(object):
in_set = self.subscriber_name + 'in'
self.r_temp.hset('queues', self.subscriber_name,
int(self.r_temp.scard(in_set)))
return self.r_temp.spop(in_set)
message = self.r_temp.spop(in_set)
timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes')
if message is None:
return None
else:
try:
path = message.split(".")[-2].split("/")[-1]
value = str(timestamp) + ", " + path
self.r_temp.set("MODULE_"+self.subscriber_name, value)
return message
except:
path = "?"
value = str(timestamp) + ", " + path
self.r_temp.set("MODULE_"+self.subscriber_name, value)
return message
def populate_set_out(self, msg, channel=None):
# multiproc

View file

@ -114,31 +114,31 @@ function launching_scripts {
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x'
screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x'
screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x'
screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x'
screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x'
screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
sleep 0.1
@ -158,7 +158,9 @@ function launching_scripts {
sleep 0.1
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x'
screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py -k 0; read x'
}

155
bin/ModuleInformation.py Executable file
View file

@ -0,0 +1,155 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
import time
import datetime
import redis
import os
import signal
import argparse
from subprocess import PIPE, Popen
import ConfigParser
import json
from terminaltables import AsciiTable
import textwrap
# CONFIG VARIABLES
threshold_stucked_module = 60*60*1 #1 hour
log_filename = "../logs/moduleInfo.log"
command_search_pid = "ps a -o pid,cmd | grep {}"
command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\""
def getPid(module):
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in p.stdout:
splittedLine = line.split()
if 'python2' in splittedLine:
return int(splittedLine[0])
else:
return None
def kill_module(module):
print ''
print '-> trying to kill module:', module
pid = getPid(module)
if pid is not None:
os.kill(pid, signal.SIGUSR1)
time.sleep(1)
if getPid(module) is None:
print module, 'has been killed'
print 'restarting', module, '...'
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
else:
print 'killing failed, retrying...'
time.sleep(3)
os.kill(pid, signal.SIGUSR1)
time.sleep(1)
if getPid(module) is None:
print module, 'has been killed'
print 'restarting', module, '...'
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
else:
print 'killing failed!'
time.sleep(7)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.')
parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate')
parser.add_argument('-k', '--autokill', type=int, required=True, default=1, help='Enable auto kill option (1 for TRUE, anything else for FALSE)')
args = parser.parse_args()
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg.read(configfile)
# REDIS #
server = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
while True:
num = 0
printarray1 = []
printarray2 = []
for queue, card in server.hgetall("queues").iteritems():
key = "MODULE_" + queue
value = server.get(key)
if value is not None:
timestamp, path = value.split(", ")
if timestamp is not None and path is not None:
num += 1
startTime_readable = datetime.datetime.fromtimestamp(int(timestamp))
processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0]
if int(card) > 0:
if int((datetime.datetime.now() - startTime_readable).total_seconds()) > threshold_stucked_module:
log = open(log_filename, 'a')
log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n")
if args.autokill == 1:
kill_module(queue)
printarray1.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)])
else:
printarray2.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)])
printarray1.sort(lambda x,y: cmp(x[4], y[4]), reverse=True)
printarray2.sort(lambda x,y: cmp(x[4], y[4]), reverse=True)
printarray1.insert(0,["#", "Queue", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"])
printarray2.insert(0,["#", "Queue", "Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"])
os.system('clear')
t1 = AsciiTable(printarray1, title="Working queues")
t1.column_max_width(1)
if not t1.ok:
longest_col = t1.column_widths.index(max(t1.column_widths))
max_length_col = t1.column_max_width(longest_col)
if max_length_col > 0:
for i, content in enumerate(t1.table_data):
if len(content[longest_col]) > max_length_col:
temp = ''
for l in content[longest_col].splitlines():
if len(l) > max_length_col:
temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
else:
temp += l + '\n'
content[longest_col] = temp.strip()
t1.table_data[i] = content
t2 = AsciiTable(printarray2, title="Idling queues")
t2.column_max_width(1)
if not t2.ok:
longest_col = t2.column_widths.index(max(t2.column_widths))
max_length_col = t2.column_max_width(longest_col)
if max_length_col > 0:
for i, content in enumerate(t2.table_data):
if len(content[longest_col]) > max_length_col:
temp = ''
for l in content[longest_col].splitlines():
if len(l) > max_length_col:
temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
else:
temp += l + '\n'
content[longest_col] = temp.strip()
t2.table_data[i] = content
print t1.table
print '\n'
print t2.table
time.sleep(args.refresh)

View file

@ -77,12 +77,14 @@ def compute_progression(server, field_name, num_day, url_parsed):
member_set = []
for keyw in server.smembers(redis_progression_name_set):
member_set.append((keyw, int(server.hget(redis_progression_name, keyw))))
print member_set
member_set.sort(key=lambda tup: tup[1])
if member_set[0][1] < keyword_increase:
print 'removing', member_set[0][0] + '('+str(member_set[0][1])+')', 'and adding', keyword, str(keyword_increase)
#remove min from set and add the new one
server.srem(redis_progression_name_set, member_set[0])
server.srem(redis_progression_name_set, member_set[0][0])
server.sadd(redis_progression_name_set, keyword)
server.hdel(redis_progression_name, member_set[0][0])
server.hset(redis_progression_name, keyword, keyword_increase)
if __name__ == '__main__':

View file

@ -8,50 +8,52 @@ sleep 0.1
echo -e $GREEN"\t* Launching ZMQ scripts"$DEFAULT
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x'
screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py; read x'

View file

@ -34,7 +34,7 @@ subscribe = Redis_Global
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve
[CreditCards]
subscribe = Redis_CreditCard
subscribe = Redis_CreditCards
publish = Redis_Duplicate,Redis_ModuleStats,Redis_BrowseWarningPaste
[Mail]

62
doc/generate_graph_data.py Executable file
View file

@ -0,0 +1,62 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
content = ""
modules = {}
all_modules = []
curr_module = ""
streamingPub = {}
streamingSub = {}
with open('../bin/packages/modules.cfg', 'r') as f:
for line in f:
if line[0] != '#':
if line[0] == '[':
curr_name = line.replace('[','').replace(']','').replace('\n', '').replace(' ', '')
all_modules.append(curr_name)
modules[curr_name] = {'sub': [], 'pub': []}
curr_module = curr_name
elif curr_module != "": # searching for sub or pub
if line.startswith("subscribe"):
curr_subscribers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')]
modules[curr_module]['sub'] = curr_subscribers
for sub in curr_subscribers:
streamingSub[sub] = curr_module
elif line.startswith("publish"):
curr_publishers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')]
modules[curr_module]['pub'] = curr_publishers
for pub in curr_publishers:
streamingPub[pub] = curr_module
else:
continue
output_set_graph = set()
for module in modules.keys():
for stream_in in modules[module]['sub']:
if stream_in not in streamingPub.keys():
output_set_graph.add("\"" + stream_in + "\" [color=darkorange1] ;\n")
output_set_graph.add("\"" + stream_in + "\"" + "->" + module + ";\n")
else:
output_set_graph.add("\"" + streamingPub[stream_in] + "\"" + "->" + module + ";\n")
for stream_out in modules[module]['pub']:
if stream_out not in streamingSub.keys():
output_set_graph.add("\"" + stream_out + "\" [color=darkorange1] ;\n")
output_set_graph.add("\"" + stream_out + "\"" + "->" + module + ";\n")
else:
output_set_graph.add("\"" + module + "\"" + "->" + streamingSub[stream_out] + ";\n")
output_text_graph = ""
output_text_graph += "digraph unix {\n"\
"graph [pad=\"0.5\"];\n"\
"size=\"25,25\";\n"\
"node [color=lightblue2, style=filled];\n"
for elem in output_set_graph:
output_text_graph += elem
output_text_graph += "}"
print output_text_graph

View file

@ -0,0 +1,3 @@
#!/bin/bash
python generate_graph_data.py | dot -T png -o module-data-flow.png

BIN
doc/module-data-flow.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 126 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 190 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

View file

@ -17,6 +17,9 @@ sudo apt-get install libadns1 libadns1-dev
#Needed for redis-lvlDB
sudo apt-get install libev-dev libgmp-dev
#Need for generate-data-flow graph
sudo apt-get install graphviz
#needed for mathplotlib
test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/
sudo easy_install -U distribute
@ -69,6 +72,7 @@ echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate
mkdir -p $AIL_HOME/{PASTES,Blooms,dumps}
mkdir -p $AIL_HOME/LEVEL_DB_DATA/2016
mkdir -p $AIL_HOME/LEVEL_DB_DATA/3016
pip install -U pip
pip install -r pip_packages_requirement.txt
@ -83,5 +87,6 @@ pushd tlsh/py_ext
python setup.py build
python setup.py install
# Download the necessary NLTK corpora
# Download the necessary NLTK corpora and sentiment vader
HOME=$(pwd) python -m textblob.download_corpora
python -m nltk.downloader vader_lexicon

View file

@ -10,6 +10,7 @@ textblob
numpy
matplotlib
networkx
terminaltables
#Tokeniser
nltk

View file

@ -81,8 +81,22 @@ def event_stream():
def get_queues(r):
# We may want to put the llen in a pipeline to do only one query.
return [(queue, int(card)) for queue, card in
r.hgetall("queues").iteritems()]
data = [(queue, int(card)) for queue, card in r.hgetall("queues").iteritems()]
newData = []
for queue, card in data:
key = "MODULE_" + queue
value = r.get(key)
if value is not None:
timestamp, path = value.split(", ")
if timestamp is not None:
startTime_readable = datetime.datetime.fromtimestamp(int(timestamp))
processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0]
seconds = int((datetime.datetime.now() - startTime_readable).total_seconds())
newData.append( (queue, card, seconds) )
else:
newData.append( (queue, cards, 0) )
return newData
def list_len(s):

View file

@ -221,11 +221,17 @@ function create_queue_table() {
for(i = 0; i < (glob_tabvar.row1).length;i++){
var tr = document.createElement('TR')
for(j = 0; j < (glob_tabvar.row1[i]).length; j++){
for(j = 0; j < 2; j++){
var td = document.createElement('TD')
td.appendChild(document.createTextNode(glob_tabvar.row1[i][j]));
tr.appendChild(td)
}
if (parseInt(glob_tabvar.row1[i][2]) > 60*2 && parseInt(glob_tabvar.row1[i][1]) > 2)
tr.className += " danger";
else if (parseInt(glob_tabvar.row1[i][2]) > 60*1)
tr.className += " warning";
else
tr.className += " success";
tableBody.appendChild(tr);
}
Tablediv.appendChild(table);