python 3 backend upgrade

This commit is contained in:
Terrtia 2018-04-16 14:50:04 +02:00
parent 3395b16873
commit 9c82dd90ec
54 changed files with 445 additions and 370 deletions

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -33,7 +33,7 @@ if __name__ == "__main__":
PST = Paste.Paste(message)
else:
publisher.debug("Script Attribute is idling 1s")
print 'sleeping'
print('sleeping')
time.sleep(1)
continue
@ -45,6 +45,6 @@ if __name__ == "__main__":
# FIXME Not used.
PST.store.sadd("Pastes_Objects", PST.p_path)
except IOError:
print "CRC Checksum Failed on :", PST.p_path
print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
The ZMQ_PubSub_Categ Module
@ -73,7 +73,7 @@ if __name__ == "__main__":
bname = os.path.basename(filename)
tmp_dict[bname] = []
with open(os.path.join(args.d, filename), 'r') as f:
patterns = [r'%s' % re.escape(s.strip()) for s in f]
patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f]
tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
prec_filename = None
@ -82,18 +82,25 @@ if __name__ == "__main__":
filename = p.get_from_set()
if filename is None:
publisher.debug("Script Categ is Idling 10s")
print 'Sleeping'
print('Sleeping')
time.sleep(10)
continue
paste = Paste.Paste(filename)
content = paste.get_p_content()
print('-----------------------------------------------------')
print(filename)
print(content)
print('-----------------------------------------------------')
for categ, pattern in tmp_dict.items():
found = set(re.findall(pattern, content))
if len(found) >= matchingThreshold:
msg = '{} {}'.format(paste.p_path, len(found))
print msg, categ
#msg = " ".join( [paste.p_path, bytes(len(found))] )
print(msg, categ)
p.populate_set_out(msg, categ)
publisher.info(

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -61,26 +61,34 @@ if __name__ == "__main__":
minTopPassList = p.config.getint("Credential", "minTopPassList")
regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
#regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
while True:
message = p.get_from_set()
if message is None:
publisher.debug("Script Credential is Idling 10s")
print('sleeping 10s')
time.sleep(10)
time.sleep(1)
continue
filepath, count = message.split()
filepath, count = message.split(' ')
if count < minTopPassList:
#if count < minTopPassList:
# Less than 5 matches from the top password list, false positive.
print("false positive:", count)
continue
#print("false positive:", count)
#continue
paste = Paste.Paste(filepath)
content = paste.get_p_content()
creds = set(re.findall(regex_cred, content))
print(len(creds))
print(creds)
print(content)
print('-----')
publisher.warning('to_print')
if len(creds) == 0:
continue
@ -89,7 +97,7 @@ if __name__ == "__main__":
message = 'Checked {} credentials found.'.format(len(creds))
if sites_set:
message += ' Related websites: {}'.format(', '.join(sites_set))
message += ' Related websites: {}'.format( (', '.join(sites_set)) )
to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path)
@ -102,7 +110,8 @@ if __name__ == "__main__":
#Send to duplicate
p.populate_set_out(filepath, 'Duplicate')
#Send to alertHandler
p.populate_set_out('credential;{}'.format(filepath), 'alertHandler')
msg = 'credential;{}'.format(filepath)
p.populate_set_out(msg, 'alertHandler')
#Put in form, count occurences, then send to moduleStats
creds_sites = {}
@ -122,9 +131,10 @@ if __name__ == "__main__":
else:
creds_sites[domain] = 1
for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats
print 'credential;{};{};{}'.format(num, site, paste.p_date)
p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats')
for site, num in creds_sites.items(): # Send for each different site to moduleStats
mssg = 'credential;{};{};{}'.format(num, site, paste.p_date)
print(mssg)
p.populate_set_out(mssg, 'ModuleStats')
if sites_set:
print("=======> Probably on : {}".format(', '.join(sites_set)))
@ -158,4 +168,3 @@ if __name__ == "__main__":
for partCred in splitedCred:
if len(partCred) > minimumLengthThreshold:
server_cred.sadd(partCred, uniq_num_cred)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -18,6 +18,7 @@ from packages import Paste
from packages import lib_refine
from pubsublogger import publisher
import re
import sys
from Helper import Process
@ -58,13 +59,14 @@ if __name__ == "__main__":
content = paste.get_p_content()
all_cards = re.findall(regex, content)
if len(all_cards) > 0:
print 'All matching', all_cards
print('All matching', all_cards)
creditcard_set = set([])
for card in all_cards:
clean_card = re.sub('[^0-9]', '', card)
clean_card = clean_card
if lib_refine.is_luhn_valid(clean_card):
print clean_card, 'is valid'
print(clean_card, 'is valid')
creditcard_set.add(clean_card)
paste.__setattr__(channel, creditcard_set)
@ -76,13 +78,15 @@ if __name__ == "__main__":
if (len(creditcard_set) > 0):
publisher.warning('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_path))
print('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_path))
#Send to duplicate
p.populate_set_out(filename, 'Duplicate')
#send to Browse_warning_paste
p.populate_set_out('creditcard;{}'.format(filename), 'alertHandler')
msg = 'creditcard;{}'.format(filename)
p.populate_set_out(msg, 'alertHandler')
else:
publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path))
else:
publisher.debug("Script creditcard is idling 1m")
time.sleep(10)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module.
@ -53,7 +53,7 @@ def check_if_tracked_term(term, path):
#add_paste to tracked_word_set
set_name = "tracked_" + term
server_term.sadd(set_name, path)
print term, 'addded', set_name, '->', path
print(term, 'addded', set_name, '->', path)
p.populate_set_out("New Term added", 'CurveManageTopSets')
# Send a notification only when the member is in the set
@ -149,15 +149,16 @@ if __name__ == "__main__":
if generate_new_graph:
generate_new_graph = False
print 'Building graph'
print('Building graph')
today = datetime.date.today()
year = today.year
month = today.month
lib_words.create_curve_with_word_file(r_serv1, csv_path,
wordfile_path, year,
month)
publisher.debug("Script Curve is Idling")
print "sleeping"
print("sleeping")
time.sleep(10)
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -16,7 +16,7 @@ from packages import lib_words
import datetime
import calendar
import os
import ConfigParser
import configparser
# Config Variables
Refresh_rate = 60*5 #sec
@ -68,26 +68,26 @@ def manage_top_set():
# convert dico into sorted array
array_month = []
for w, v in dico.iteritems():
for w, v in dico.items():
array_month.append((w, v))
array_month.sort(key=lambda tup: -tup[1])
array_month = array_month[0:20]
array_week = []
for w, v in dico_week.iteritems():
for w, v in dico_week.items():
array_week.append((w, v))
array_week.sort(key=lambda tup: -tup[1])
array_week = array_week[0:20]
# convert dico_per_paste into sorted array
array_month_per_paste = []
for w, v in dico_per_paste.iteritems():
for w, v in dico_per_paste.items():
array_month_per_paste.append((w, v))
array_month_per_paste.sort(key=lambda tup: -tup[1])
array_month_per_paste = array_month_per_paste[0:20]
array_week_per_paste = []
for w, v in dico_week_per_paste.iteritems():
for w, v in dico_week_per_paste.items():
array_week_per_paste.append((w, v))
array_week_per_paste.sort(key=lambda tup: -tup[1])
array_week_per_paste = array_week_per_paste[0:20]
@ -114,7 +114,7 @@ def manage_top_set():
timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
value = str(timestamp) + ", " + "-"
r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value)
print "refreshed module"
print("refreshed module")
@ -131,7 +131,7 @@ if __name__ == '__main__':
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
@ -162,4 +162,3 @@ if __name__ == '__main__':
# Get one message from the input queue (module only work if linked with a queue)
time.sleep(Refresh_rate) # sleep a long time then manage the set
manage_top_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
The CVE Module
@ -32,7 +32,8 @@ def search_cve(message):
publisher.warning('{} contains CVEs'.format(paste.p_name))
#send to Browse_warning_paste
p.populate_set_out('cve;{}'.format(filepath), 'alertHandler')
msg = 'cve;{}'.format(filepath)
p.populate_set_out(msg, 'alertHandler')
#Send to duplicate
p.populate_set_out(filepath, 'Duplicate')
@ -63,4 +64,3 @@ if __name__ == '__main__':
# Do something with the message from the queue
search_cve(message)

View file

@ -1,18 +1,18 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
import argparse
import redis
from pubsublogger import publisher
from packages.lib_words import create_dirfile
import ConfigParser
import configparser
def main():
"""Main Function"""
# CONFIG #
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg')
parser = argparse.ArgumentParser(

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -44,6 +44,7 @@ def main():
continue
paste = PST.get_p_content()
mimetype = PST._get_p_encoding()
if mimetype == "text/plain":
c.text(rawtext=paste)
c.potentialdomain()
@ -59,7 +60,7 @@ def main():
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
except IOError:
print "CRC Checksum Failed on :", PST.p_path
print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
from pubsublogger import publisher
@ -23,7 +23,7 @@ if __name__ == "__main__":
if message is not None:
f = open(dump_file, 'a')
while message is not None:
print message
print(message)
date = datetime.datetime.now()
if message is not None:
f.write(date.isoformat() + ' ' + message + '\n')

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -49,8 +49,8 @@ if __name__ == "__main__":
# REDIS #
dico_redis = {}
date_today = datetime.today()
for year in xrange(2013, date_today.year+1):
for month in xrange(0, 13):
for year in range(2013, date_today.year+1):
for month in range(0, 13):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"), port=year,
db=month)
@ -90,7 +90,7 @@ if __name__ == "__main__":
# Get the date of the range
date_range = date_today - timedelta(days = maximum_month_range*30.4166666)
num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month)
for diff_month in xrange(0, num_of_month+1):
for diff_month in range(0, num_of_month+1):
curr_date_range = date_today - timedelta(days = diff_month*30.4166666)
to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2)
dico_range_list.append(to_append)
@ -102,7 +102,7 @@ if __name__ == "__main__":
yearly_index = str(date_today.year)+'00'
r_serv0 = dico_redis[yearly_index]
r_serv0.incr("current_index")
index = r_serv0.get("current_index")+str(PST.p_date)
index = (r_serv0.get("current_index")).decode('utf8') + str(PST.p_date)
# Open selected dico range
opened_dico = []
@ -114,8 +114,10 @@ if __name__ == "__main__":
# Go throught the Database of the dico (of the month)
for curr_dico_name, curr_dico_redis in opened_dico:
for hash_type, paste_hash in paste_hashes.iteritems():
for hash_type, paste_hash in paste_hashes.items():
for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type):
dico_hash = dico_hash.decode('utf8')
try:
if hash_type == 'ssdeep':
percent = 100-ssdeep.compare(dico_hash, paste_hash)
@ -130,15 +132,18 @@ if __name__ == "__main__":
# index of paste
index_current = r_serv_dico.get(dico_hash)
index_current = index_current.decode('utf8')
paste_path = r_serv_dico.get(index_current)
paste_path = paste_path.decode('utf8')
paste_date = r_serv_dico.get(index_current+'_date')
paste_date = paste_date.decode('utf8')
paste_date = paste_date if paste_date != None else "No date available"
if paste_path != None:
hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date)
print '['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent)
except Exception,e:
print str(e)
print('['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent))
except Exception:
print(str(e))
#print 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash
# Add paste in DB after checking to prevent its analysis twice
@ -147,7 +152,7 @@ if __name__ == "__main__":
r_serv1.set(index+'_date', PST._get_p_date())
r_serv1.sadd("INDEX", index)
# Adding hashes in Redis
for hash_type, paste_hash in paste_hashes.iteritems():
for hash_type, paste_hash in paste_hashes.items():
r_serv1.set(paste_hash, index)
r_serv1.sadd("HASHS_"+hash_type, paste_hash)
@ -166,7 +171,7 @@ if __name__ == "__main__":
PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis("p_duplicate", dupl)
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path))
print '{}Detected {}'.format(to_print, len(dupl))
print('{}Detected {}'.format(to_print, len(dupl)))
y = time.time()
@ -176,5 +181,5 @@ if __name__ == "__main__":
except IOError:
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
print "CRC Checksum Failed on :", PST.p_path
print("CRC Checksum Failed on :", PST.p_path)
publisher.error('{}CRC Checksum Failed'.format(to_print))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -33,8 +33,8 @@ if __name__ == "__main__":
# DB OBJECT & HASHS ( DISK )
# FIXME increase flexibility
dico_redis = {}
for year in xrange(2013, 2017):
for month in xrange(0, 16):
for year in range(2013, 2017):
for month in range(0, 16):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"), port=year,
db=month)
@ -147,7 +147,7 @@ if __name__ == "__main__":
if percentage >= 50:
dupl.append((paste, percentage))
else:
print 'percentage: ' + str(percentage)
print('percentage: ' + str(percentage))
# Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format(
@ -156,11 +156,11 @@ if __name__ == "__main__":
PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis("p_duplicate", dupl)
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
print '{}Detected {}'.format(to_print, len(dupl))
print('{}Detected {}'.format(to_print, len(dupl)))
y = time.time()
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
except IOError:
print "CRC Checksum Failed on :", PST.p_path
print("CRC Checksum Failed on :", PST.p_path)
publisher.error('{}CRC Checksum Failed'.format(to_print))

View file

@ -67,7 +67,7 @@ if __name__ == '__main__':
continue
# Creating the full filepath
filename = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "pastes"), paste.decode('utf8'))
p.config.get("Directories", "pastes"), paste)
#print(filename)
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
@ -77,5 +77,5 @@ if __name__ == '__main__':
f.write(base64.standard_b64decode(gzip64encoded))
print(filename)
p.populate_set_out(filename.encode('utf8'))
p.populate_set_out(filename)
processed_paste+=1

View file

@ -58,7 +58,7 @@ class PubSub(object):
new_sub = context.socket(zmq.SUB)
new_sub.connect(address)
# bytes64 encode bytes to ascii only bytes
new_sub.setsockopt(zmq.SUBSCRIBE, channel.encode('ascii'))
new_sub.setsockopt_string(zmq.SUBSCRIBE, channel)
self.subscribers.append(new_sub)
def setup_publish(self, conn_name):
@ -78,15 +78,15 @@ class PubSub(object):
self.publishers['ZMQ'].append((p, channel))
def publish(self, message):
m = json.loads(message.decode('ascii'))
m = json.loads(message)
channel_message = m.get('channel')
for p, channel in self.publishers['Redis']:
if channel_message is None or channel_message == channel:
p.publish(channel, ( m['message']).encode('ascii') )
p.publish(channel, ( m['message']) )
for p, channel in self.publishers['ZMQ']:
if channel_message is None or channel_message == channel:
mess = ( m['message'] ).encode('ascii')
p.send(b' '.join( [channel, mess] ) )
p.send('{} {}'.format(channel, m['message']))
#p.send(b' '.join( [channel, mess] ) )
def subscribe(self):
@ -99,7 +99,8 @@ class PubSub(object):
for sub in self.subscribers:
try:
msg = sub.recv(zmq.NOBLOCK)
yield msg.split(b" ", 1)[1]
msg = msg.decode('utf8')
yield msg.split(" ", 1)[1]
except zmq.error.Again as e:
time.sleep(0.2)
pass
@ -150,6 +151,12 @@ class Process(object):
self.r_temp.hset('queues', self.subscriber_name,
int(self.r_temp.scard(in_set)))
message = self.r_temp.spop(in_set)
try:
message = message.decode('utf8')
except AttributeError:
pass
timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes')
@ -158,12 +165,12 @@ class Process(object):
else:
#try:
if b'.gz' in message:
path = message.split(b".")[-2].split(b"/")[-1]
if '.gz' in message:
path = message.split(".")[-2].split("/")[-1]
#find start of path with AIL_HOME
index_s = (message.decode('ascii')).find(os.environ['AIL_HOME'])
index_s = message.find(os.environ['AIL_HOME'])
#Stop when .gz
index_e = message.find(b".gz")+3
index_e = message.find(".gz")+3
if(index_s == -1):
complete_path = message[0:index_e]
else:
@ -173,7 +180,7 @@ class Process(object):
path = "-"
complete_path = "?"
value = str(timestamp) + ", " + path.decode('ascii')
value = str(timestamp) + ", " + path
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value)
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path)
self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum))
@ -190,13 +197,12 @@ class Process(object):
def populate_set_out(self, msg, channel=None):
# multiproc
msg = msg.decode('ascii')
msg = {'message': msg}
if channel is not None:
msg.update({'channel': channel})
# bytes64 encode bytes to ascii only bytes
j = (json.dumps(msg)).encode('ascii')
j = json.dumps(msg)
self.r_temp.sadd(self.subscriber_name + 'out', j)
def publish(self):
@ -209,6 +215,12 @@ class Process(object):
self.pubsub.setup_publish(name)
while True:
message = self.r_temp.spop(self.subscriber_name + 'out')
try:
message = message.decode('utf8')
except AttributeError:
pass
if message is None:
time.sleep(1)
continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -107,10 +107,11 @@ if __name__ == "__main__":
continue
docpath = message.split(" ", -1)[-1]
paste = PST.get_p_content()
print "Indexing - "+indexname+" :", docpath
print("Indexing - " + indexname + " :", docpath)
if time.time() - last_refresh > TIME_WAIT: #avoid calculating the index's size at each message
#avoid calculating the index's size at each message
if( time.time() - last_refresh > TIME_WAIT):
last_refresh = time.time()
if check_index_size(baseindexpath, indexname) >= INDEX_SIZE_THRESHOLD*(1000*1000):
timestamp = int(time.time())
@ -128,11 +129,11 @@ if __name__ == "__main__":
if indexertype == "whoosh":
indexwriter = ix.writer()
indexwriter.update_document(
title=unicode(docpath, errors='ignore'),
path=unicode(docpath, errors='ignore'),
content=unicode(paste, errors='ignore'))
title=docpath,
path=docpath,
content=paste)
indexwriter.commit()
except IOError:
print "CRC Checksum Failed on :", PST.p_path
print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name))

View file

@ -15,46 +15,49 @@ RSA private key, certificate messages
import time
from pubsublogger import publisher
from bin.packages import Paste
from bin.Helper import Process
#from bin.packages import Paste
#from bin.Helper import Process
from packages import Paste
from Helper import Process
def search_key(paste):
content = paste.get_p_content()
find = False
if b'-----BEGIN PGP MESSAGE-----' in content:
if '-----BEGIN PGP MESSAGE-----' in content:
publisher.warning('{} has a PGP enc message'.format(paste.p_name))
find = True
if b'-----BEGIN CERTIFICATE-----' in content:
if '-----BEGIN CERTIFICATE-----' in content:
publisher.warning('{} has a certificate message'.format(paste.p_name))
find = True
if b'-----BEGIN RSA PRIVATE KEY-----' in content:
if '-----BEGIN RSA PRIVATE KEY-----' in content:
publisher.warning('{} has a RSA private key message'.format(paste.p_name))
find = True
if b'-----BEGIN PRIVATE KEY-----' in content:
if '-----BEGIN PRIVATE KEY-----' in content:
publisher.warning('{} has a private key message'.format(paste.p_name))
find = True
if b'-----BEGIN ENCRYPTED PRIVATE KEY-----' in content:
if '-----BEGIN ENCRYPTED PRIVATE KEY-----' in content:
publisher.warning('{} has an encrypted private key message'.format(paste.p_name))
find = True
if b'-----BEGIN OPENSSH PRIVATE KEY-----' in content:
if '-----BEGIN OPENSSH PRIVATE KEY-----' in content:
publisher.warning('{} has an openssh private key message'.format(paste.p_name))
find = True
if b'-----BEGIN DSA PRIVATE KEY-----' in content:
if '-----BEGIN DSA PRIVATE KEY-----' in content:
publisher.warning('{} has a dsa private key message'.format(paste.p_name))
find = True
if b'-----BEGIN EC PRIVATE KEY-----' in content:
if '-----BEGIN EC PRIVATE KEY-----' in content:
publisher.warning('{} has an ec private key message'.format(paste.p_name))
find = True
if b'-----BEGIN PGP PRIVATE KEY BLOCK-----' in content:
if '-----BEGIN PGP PRIVATE KEY BLOCK-----' in content:
publisher.warning('{} has a pgp private key block message'.format(paste.p_name))
find = True
@ -63,7 +66,9 @@ def search_key(paste):
#Send to duplicate
p.populate_set_out(message, 'Duplicate')
#send to Browse_warning_paste
p.populate_set_out('keys;{}'.format(message), 'alertHandler')
msg = ('keys;{}'.format(message))
print(message)
p.populate_set_out( msg, 'alertHandler')
if __name__ == '__main__':

View file

@ -130,8 +130,8 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "Lines" bash -c 'python3 Lines.py; read x'
sleep 0.1
#screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
#sleep 0.1
screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Categ" bash -c 'python3 Categ.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Tokenize" bash -c 'python3 Tokenize.py; read x'
@ -142,8 +142,8 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "Mail" bash -c './Mail.py; read x'
sleep 0.1
#screen -S "Script_AIL" -X screen -t "Web" bash -c './Web.py; read x'
#sleep 0.1
screen -S "Script_AIL" -X screen -t "Web" bash -c './Web.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Credential" bash -c './Credential.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Curve" bash -c './Curve.py; read x'
@ -168,8 +168,8 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x'
sleep 0.1
#screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
#sleep 0.1
screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "alertHandler" bash -c './alertHandler.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -61,7 +61,7 @@ if __name__ == '__main__':
while True:
try:
message = p.get_from_set()
print message
print(message)
if message is not None:
PST = Paste.Paste(message)
else:
@ -81,4 +81,4 @@ if __name__ == '__main__':
else:
p.populate_set_out( PST.p_path , 'LinesLong')
except IOError:
print "CRC Checksum Error on : ", PST.p_path
print("CRC Checksum Error on : ", PST.p_path)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -78,14 +78,14 @@ if __name__ == "__main__":
publisher.info(to_print)
#Send to ModuleStats
for mail in MX_values[1]:
print 'mail;{};{};{}'.format(1, mail, PST.p_date)
print('mail;{};{};{}'.format(1, mail, PST.p_date))
p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats')
prec_filename = filename
else:
publisher.debug("Script Mails is Idling 10s")
print 'Sleeping'
print('Sleeping')
time.sleep(10)
message = p.get_from_set()

View file

@ -91,7 +91,7 @@ if __name__ == '__main__':
complete_paste, gzip64encoded = splitted
try:
feeder_name = ( complete_paste.replace(b"archive/",b"") ).split(b"/")[0]
feeder_name = ( complete_paste.replace("archive/","") ).split("/")[0]
# TODO take real name ?
paste_name = complete_paste
@ -109,9 +109,10 @@ if __name__ == '__main__':
processed_paste_per_feeder[feeder_name] = 1
duplicated_paste_per_feeder[feeder_name] = 0
relay_message = b" ".join( [paste_name, gzip64encoded] )
relay_message = "{0} {1}".format(paste_name, gzip64encoded)
#relay_message = b" ".join( [paste_name, gzip64encoded] )
digest = hashlib.sha1(gzip64encoded).hexdigest()
digest = hashlib.sha1(gzip64encoded.encode('utf8')).hexdigest()
# Avoid any duplicate coming from any sources
if operation_mode == 1:

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
'''
@ -20,7 +20,7 @@ import os
import signal
import argparse
from subprocess import PIPE, Popen
import ConfigParser
import configparser
import json
from terminaltables import AsciiTable
import textwrap
@ -51,7 +51,7 @@ last_refresh = 0
def getPid(module):
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in p.stdout:
print line
print(line)
splittedLine = line.split()
if 'python2' in splittedLine:
return int(splittedLine[0])
@ -76,7 +76,7 @@ def cleanRedis():
flag_pid_valid = True
if not flag_pid_valid:
print flag_pid_valid, 'cleaning', pid, 'in', k
print(flag_pid_valid, 'cleaning', pid, 'in', k)
server.srem(k, pid)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k])
@ -85,11 +85,11 @@ def cleanRedis():
def kill_module(module, pid):
print ''
print '-> trying to kill module:', module
print('')
print('-> trying to kill module:', module)
if pid is None:
print 'pid was None'
print('pid was None')
printarrayGlob.insert(1, [0, module, pid, "PID was None"])
printarrayGlob.pop()
pid = getPid(module)
@ -102,15 +102,15 @@ def kill_module(module, pid):
try:
os.kill(pid, signal.SIGUSR1)
except OSError:
print pid, 'already killed'
print(pid, 'already killed')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"])
printarrayGlob.pop()
return
time.sleep(1)
if getPid(module) is None:
print module, 'has been killed'
print 'restarting', module, '...'
print(module, 'has been killed')
print('restarting', module, '...')
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
@ -119,7 +119,7 @@ def kill_module(module, pid):
printarrayGlob.pop()
else:
print 'killing failed, retrying...'
print('killing failed, retrying...')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."])
printarrayGlob.pop()
@ -128,8 +128,8 @@ def kill_module(module, pid):
os.kill(pid, signal.SIGUSR1)
time.sleep(1)
if getPid(module) is None:
print module, 'has been killed'
print 'restarting', module, '...'
print(module, 'has been killed')
print('restarting', module, '...')
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
@ -137,12 +137,12 @@ def kill_module(module, pid):
printarrayGlob.pop()
printarrayGlob.pop()
else:
print 'killing failed!'
print('killing failed!')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"])
printarrayGlob.pop()
else:
print 'Module does not exist'
print('Module does not exist')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"])
printarrayGlob.pop()
@ -192,7 +192,7 @@ if __name__ == "__main__":
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
# REDIS #
@ -227,7 +227,7 @@ if __name__ == "__main__":
printarray1 = []
printarray2 = []
printarray3 = []
for queue, card in server.hgetall("queues").iteritems():
for queue, card in server.hgetall("queues").items():
all_queue.add(queue)
key = "MODULE_" + queue + "_"
keySet = "MODULE_TYPE_" + queue
@ -337,15 +337,15 @@ if __name__ == "__main__":
legend = AsciiTable(legend_array, title="Legend")
legend.column_max_width(1)
print legend.table
print '\n'
print t1.table
print '\n'
print t2.table
print '\n'
print t3.table
print '\n'
print t4.table
print(legend.table)
print('\n')
print(t1.table)
print('\n')
print(t2.table)
print('\n')
print(t3.table)
print('\n')
print(t4.table9)
if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5:
lastTime = datetime.datetime.now()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
This module makes statistics for some modules and providers
@ -52,10 +52,10 @@ def compute_most_posted(server, message):
# Member set is a list of (value, score) pairs
if int(member_set[0][1]) < keyword_total_sum:
#remove min from set and add the new one
print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server.zrem(redis_progression_name_set, member_set[0][0])
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
print redis_progression_name_set
print(redis_progression_name_set)
def compute_provider_info(server_trend, server_pasteName, path):
@ -94,7 +94,7 @@ def compute_provider_info(server_trend, server_pasteName, path):
# Member set is a list of (value, score) pairs
if float(member_set[0][1]) < new_avg:
#remove min from set and add the new one
print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
print('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server_trend.zrem(redis_sum_size_set, member_set[0][0])
server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider)
server_trend.zrem(redis_avg_size_name_set, member_set[0][0])
@ -110,7 +110,7 @@ def compute_provider_info(server_trend, server_pasteName, path):
# Member set is a list of (value, score) pairs
if int(member_set[0][1]) < num_paste:
#remove min from set and add the new one
print 'Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
print('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server_trend.zrem(member_set[0][0])
server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
@ -149,7 +149,7 @@ if __name__ == '__main__':
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping'
print('sleeping')
time.sleep(20)
continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \
@ -10,7 +10,7 @@ from asciimatics.event import Event
from asciimatics.event import KeyboardEvent, MouseEvent
import sys, os
import time, datetime
import argparse, ConfigParser
import argparse, configparser
import json
import redis
import psutil
@ -497,7 +497,7 @@ MANAGE MODULES AND GET INFOS
def getPid(module):
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in p.stdout:
print line
print(line)
splittedLine = line.split()
if 'python2' in splittedLine:
return int(splittedLine[0])
@ -511,21 +511,21 @@ def clearRedisModuleInfo():
def cleanRedis():
for k in server.keys("MODULE_TYPE_*"):
moduleName = k[12:].split('_')[0]
moduleName = (k[12:].decode('utf8')).split('_')[0]
for pid in server.smembers(k):
flag_pid_valid = False
proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
try:
for line in proc.stdout:
splittedLine = line.split()
if ('python2' in splittedLine or 'python' in splittedLine) and "./"+moduleName+".py" in splittedLine:
if ('python3' in splittedLine or 'python' in splittedLine) and "./"+moduleName + ".py" in splittedLine:
flag_pid_valid = True
if not flag_pid_valid:
#print flag_pid_valid, 'cleaning', pid, 'in', k
server.srem(k, pid)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + k], 0))
log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + (k).decode('utf8')], 0))
#Error due to resize, interrupted sys call
except IOError as e:
@ -601,7 +601,9 @@ def fetchQueueData():
printarray_running = []
printarray_idle = []
printarray_notrunning = []
for queue, card in server.hgetall("queues").iteritems():
for queue, card in iter(server.hgetall("queues").items()):
queue = queue.decode('utf8')
card = card.decode('utf8')
all_queue.add(queue)
key = "MODULE_" + queue + "_"
keySet = "MODULE_TYPE_" + queue
@ -715,6 +717,11 @@ def format_string(tab, padding_row):
text=""
for ite, elem in enumerate(the_array):
try:
elem = elem.decode('utf8')
except AttributeError:
pass
if len(elem) > padding_row[ite]:
text += "*" + elem[-padding_row[ite]+6:]
padd_off = " "*5
@ -761,7 +768,7 @@ def demo(screen):
if time.time() - time_cooldown > args.refresh:
cleanRedis()
for key, val in fetchQueueData().iteritems(): #fetch data and put it into the tables
for key, val in iter(fetchQueueData().items()): #fetch data and put it into the tables
TABLES[key] = val
TABLES["logs"] = format_string(printarrayLog, TABLES_PADDING["logs"])
@ -790,7 +797,7 @@ if __name__ == "__main__":
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
# REDIS #

View file

@ -1,10 +1,10 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
import ConfigParser
import configparser
import os
import smtplib
from email.MIMEMultipart import MIMEMultipart
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
"""
@ -28,7 +28,7 @@ def sendEmailNotification(recipient, term):
Did you set environment variables? \
Or activate the virtualenv?')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
sender = cfg.get("Notifications", "sender"),
@ -76,8 +76,5 @@ def sendEmailNotification(recipient, term):
smtp_server.quit()
except Exception as e:
print str(e)
print(str(e))
# raise e

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
The ZMQ_Sub_Onion Module
@ -37,7 +37,7 @@ from Helper import Process
def fetch(p, r_cache, urls, domains, path):
failed = []
downloaded = []
print len(urls), 'Urls to fetch.'
print(len(urls), 'Urls to fetch.')
for url, domain in zip(urls, domains):
if r_cache.exists(url) or url in failed:
continue
@ -73,9 +73,9 @@ def fetch(p, r_cache, urls, domains, path):
r_cache.setbit(url, 0, 0)
r_cache.expire(url, 3600)
failed.append(url)
print 'Failed at downloading', url
print process.stdout.read()
print 'Failed:', len(failed), 'Downloaded:', len(downloaded)
print('Failed at downloading', url)
print(process.stdout.read())
print('Failed:', len(failed), 'Downloaded:', len(downloaded))
if __name__ == "__main__":
@ -109,7 +109,7 @@ if __name__ == "__main__":
while True:
if message is not None:
print message
print(message)
filename, score = message.split()
# "For each new paste"
@ -152,6 +152,6 @@ if __name__ == "__main__":
prec_filename = filename
else:
publisher.debug("Script url is Idling 10s")
print 'Sleeping'
print('Sleeping')
time.sleep(10)
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -30,10 +30,11 @@ def search_phone(message):
# if the list is greater than 4, we consider the Paste may contain a list of phone numbers
if len(results) > 4:
print results
print(results)
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
#send to Browse_warning_paste
p.populate_set_out('phone;{}'.format(message), 'alertHandler')
msg = 'phone;{}'.format(message)
p.populate_set_out(msg, 'alertHandler')
#Send to duplicate
p.populate_set_out(message, 'Duplicate')
stats = {}

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
from pubsublogger import publisher

View file

@ -1,9 +1,9 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
import redis
import argparse
import ConfigParser
import configparser
import time
import os
from pubsublogger import publisher
@ -14,7 +14,7 @@ def main():
"""Main Function"""
# CONFIG #
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg')
# SCRIPT PARSER #
@ -49,7 +49,7 @@ def main():
row.sort()
table.add_rows(row, header=False)
os.system('clear')
print table.draw()
print(table.draw())
if __name__ == "__main__":

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
This Module is used for term frequency.
@ -76,6 +76,9 @@ if __name__ == "__main__":
dico_regex, dico_regexname_to_redis = refresh_dicos()
print('dico got refreshed')
print(dico_regex)
print(dico_regexname_to_redis)
filename = message
temp = filename.split('/')
timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0))
@ -115,6 +118,6 @@ if __name__ == "__main__":
else:
publisher.debug("Script RegexForTermsFrequency is Idling")
print "sleeping"
print("sleeping")
time.sleep(5)
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
import time
from packages import Paste
@ -29,7 +29,7 @@ if __name__ == "__main__":
filepath = p.get_from_set()
if filepath is None:
publisher.debug("Script Release is Idling 10s")
print 'Sleeping'
print('Sleeping')
time.sleep(10)
continue
@ -40,6 +40,7 @@ if __name__ == "__main__":
continue
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path)
print(to_print)
if len(releases) > 30:
publisher.warning(to_print)
else:

View file

@ -1,9 +1,9 @@
#!/usr/bin/python2.7
#!/usr/bin/python3.5
# -*-coding:UTF-8 -*
import redis
import argparse
import ConfigParser
import configparser
from datetime import datetime
from pubsublogger import publisher
@ -14,7 +14,7 @@ def main():
"""Main Function"""
# CONFIG #
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg')
# SCRIPT PARSER #

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -13,7 +13,7 @@ It test different possibility to makes some sqlInjection.
import time
import string
import urllib2
import urllib.request
import re
from pubsublogger import publisher
from Helper import Process
@ -74,8 +74,8 @@ def analyse(url, path):
if (result_path > 0) or (result_query > 0):
paste = Paste.Paste(path)
if (result_path > 1) or (result_query > 1):
print "Detected SQL in URL: "
print urllib2.unquote(url)
print("Detected SQL in URL: ")
print(urllib.request.unquote(url))
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path)
publisher.warning(to_print)
#Send to duplicate
@ -83,8 +83,8 @@ def analyse(url, path):
#send to Browse_warning_paste
p.populate_set_out('sqlinjection;{}'.format(path), 'alertHandler')
else:
print "Potential SQL injection:"
print urllib2.unquote(url)
print("Potential SQL injection:")
print(urllib.request.unquote(url))
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path)
publisher.info(to_print)
@ -92,7 +92,7 @@ def analyse(url, path):
# Try to detect if the url passed might be an sql injection by appliying the regex
# defined above on it.
def is_sql_injection(url_parsed):
line = urllib2.unquote(url_parsed)
line = urllib.request.unquote(url_parsed)
line = string.upper(line)
result = []
result_suspect = []
@ -114,10 +114,10 @@ def is_sql_injection(url_parsed):
result_suspect.append(line[temp_res:temp_res+len(word)])
if len(result)>0:
print result
print(result)
return 2
elif len(result_suspect)>0:
print result_suspect
print(result_suspect)
return 1
else:
return 0

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
Sentiment analyser module.
@ -33,7 +33,7 @@ size_threshold = 250
line_max_length_threshold = 1000
import os
import ConfigParser
import configparser
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
@ -41,7 +41,7 @@ if not os.path.exists(configfile):
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file")
@ -69,7 +69,7 @@ def Analyse(message, server):
combined_datetime = datetime.datetime.combine(the_date, the_time)
timestamp = calendar.timegm(combined_datetime.timetuple())
sentences = tokenize.sent_tokenize(p_content.decode('utf-8', 'ignore'))
sentences = tokenize.sent_tokenize(p_content)
if len(sentences) > 0:
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
@ -109,11 +109,11 @@ def Analyse(message, server):
provider_timestamp = provider + '_' + str(timestamp)
server.incr('UniqID')
UniqID = server.get('UniqID')
print provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines'
print(provider_timestamp, '->', UniqID.decode('utf8'), 'dropped', num_line_removed, 'lines')
server.sadd(provider_timestamp, UniqID)
server.set(UniqID, avg_score)
else:
print 'Dropped:', p_MimeType
print('Dropped:', p_MimeType)
def isJSON(content):
@ -121,7 +121,7 @@ def isJSON(content):
json.loads(content)
return True
except Exception,e:
except Exception:
return False
import signal
@ -170,4 +170,3 @@ if __name__ == '__main__':
continue
else:
signal.alarm(0)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
This Module is used for term frequency.
@ -126,6 +126,6 @@ if __name__ == "__main__":
else:
publisher.debug("Script RegexForTermsFrequency is Idling")
print "sleeping"
print("sleeping")
time.sleep(5)
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
The ZMQ_Feed_Q Module
@ -21,7 +21,7 @@ Requirements
"""
import redis
import ConfigParser
import configparser
import os
configfile = os.path.join(os.environ['AIL_BIN'], './packages/config.cfg')
@ -31,7 +31,7 @@ def main():
"""Main Function"""
# CONFIG #
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
# REDIS

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
import time
from packages import Paste
@ -29,7 +29,7 @@ if __name__ == "__main__":
languages = [shell, c, php, bash, python, javascript, bash, ruby, adr]
regex = '|'.join(languages)
print regex
print(regex)
while True:
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
The Tokenize Module
@ -50,7 +50,7 @@ if __name__ == "__main__":
while True:
message = p.get_from_set()
print message
print(message)
if message is not None:
paste = Paste.Paste(message)
signal.alarm(5)
@ -67,4 +67,4 @@ if __name__ == "__main__":
else:
publisher.debug("Tokeniser is idling 10s")
time.sleep(10)
print "sleepin"
print("Sleeping")

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
import ConfigParser
from ConfigParser import ConfigParser as cfgP
import configparser
from configparser import ConfigParser as cfgP
import os
from collections import OrderedDict
import sys
@ -20,9 +20,9 @@ def main():
Or activate the virtualenv.')
configfileSample = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
cfgSample = ConfigParser.ConfigParser()
cfgSample = configparser.ConfigParser()
cfgSample.read(configfileSample)
sections = cfgP.sections(cfg)
@ -109,4 +109,3 @@ if __name__ == "__main__":
sys.exit()
else:
sys.exit(1)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -100,7 +100,7 @@ if __name__ == "__main__":
publisher.debug('{} Published'.format(url))
if f1 == "onion":
print domain
print(domain)
hostl = unicode(avoidNone(subdomain)+avoidNone(domain))
try:
@ -121,16 +121,16 @@ if __name__ == "__main__":
# EU is not an official ISO 3166 code (but used by RIPE
# IP allocation)
if cc is not None and cc != "EU":
print hostl, asn, cc, \
pycountry.countries.get(alpha_2=cc).name
print(hostl, asn, cc, \
pycountry.countries.get(alpha_2=cc).name)
if cc == cc_critical:
to_print = 'Url;{};{};{};Detected {} {}'.format(
PST.p_source, PST.p_date, PST.p_name,
hostl, cc)
#publisher.warning(to_print)
print to_print
print(to_print)
else:
print hostl, asn, cc
print(hostl, asn, cc)
A_values = lib_refine.checking_A_record(r_serv2,
domains_list)
@ -146,7 +146,7 @@ if __name__ == "__main__":
else:
publisher.debug("Script url is Idling 10s")
print 'Sleeping'
print('Sleeping')
time.sleep(10)
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -33,7 +33,7 @@ def analyse(server, field_name, date, url_parsed):
if field_name == "domain": #save domain in a set for the monthly plot
domain_set_name = "domain_set_" + date[0:6]
server.sadd(domain_set_name, field)
print "added in " + domain_set_name +": "+ field
print("added in " + domain_set_name +": "+ field)
def get_date_range(num_day):
curr_date = datetime.date.today()
@ -145,24 +145,25 @@ if __name__ == '__main__':
year = today.year
month = today.month
print 'Building protocol graph'
print('Building protocol graph')
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto,
protocolsfile_path, year,
month)
print 'Building tld graph'
print('Building tld graph')
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_tld,
tldsfile_path, year,
month)
print 'Building domain graph'
print('Building domain graph')
lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain,
"domain", year,
month)
print 'end building'
print('end building')
publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping'
print('sleeping')
time.sleep(5*60)
continue
@ -173,9 +174,13 @@ if __name__ == '__main__':
faup.decode(url)
url_parsed = faup.get()
analyse(r_serv_trend, 'scheme', date, url_parsed) #Scheme analysis
analyse(r_serv_trend, 'tld', date, url_parsed) #Tld analysis
analyse(r_serv_trend, 'domain', date, url_parsed) #Domain analysis
# Scheme analysis
analyse(r_serv_trend, 'scheme', date, url_parsed)
# Tld analysis
analyse(r_serv_trend, 'tld', date, url_parsed)
# Domain analysis
analyse(r_serv_trend, 'domain', date, url_parsed)
compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed)
compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed)
compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed)

View file

@ -62,8 +62,9 @@ if __name__ == "__main__":
while True:
message = p.get_from_set()
if message is not None:
message = message.decode('utf8') #decode because of pyhton3
#decode because of pyhton3
module_name, p_path = message.split(';')
print("new alert : {}".format(module_name))
#PST = Paste.Paste(p_path)
else:
publisher.debug("Script Attribute is idling 10s")

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
@ -27,10 +27,9 @@ if __name__ == "__main__":
config_section = ['Curve']
for queue in config_section:
print 'dropping: ' + queue
print('dropping: ' + queue)
p = Process(queue)
while True:
message = p.get_from_set()
if message is None:
break

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*- coding: utf-8 -*-
#
# This file is part of AIL framework - Analysis Information Leak framework
@ -25,7 +25,7 @@ import time
import redis
import base64
import os
import ConfigParser
import configparser
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
@ -33,7 +33,7 @@ if not os.path.exists(configfile):
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
if cfg.has_option("ZMQ_Global", "bind"):
@ -59,6 +59,7 @@ while True:
time.sleep(base_sleeptime + sleep_inc)
topic = 101
paste = r.lpop("pastes")
print(paste)
if paste is None:
continue
socket.send("%d %s" % (topic, paste))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*- coding: utf-8 -*-
#
# This file is part of AIL framework - Analysis Information Leak framework
@ -19,14 +19,15 @@ socket.connect ("tcp://crf.circl.lu:%s" % port)
# 101 Name of the pastes only
# 102 Full pastes in raw base64(gz)
topicfilter = "102"
topicfilter = b"102"
socket.setsockopt(zmq.SUBSCRIBE, topicfilter)
print('b0')
while True:
message = socket.recv()
print('b1')
print (message)
if topicfilter == "102":
if topicfilter == b"102":
topic, paste, messagedata = message.split()
print paste, messagedata
print(paste, messagedata)
else:
print (message)

View file

@ -1,9 +1,9 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*- coding: utf-8 -*-
import zmq
import base64
import StringIO
from io import StringIO
import gzip
import argparse
import os
@ -31,8 +31,7 @@ import mimetypes
'
'''
import StringIO
import gzip
def is_hierachy_valid(path):
var = path.split('/')
try:
@ -72,7 +71,12 @@ if __name__ == "__main__":
wanted_path = wanted_path.split('/')
wanted_path = '/'.join(wanted_path[-(4+args.hierarchy):])
messagedata = open(complete_path).read()
with gzip.open(complete_path, 'rb') as f:
messagedata = f.read()
#print(type(complete_path))
#file = open(complete_path)
#messagedata = file.read()
#if paste do not have a 'date hierarchy' ignore it
if not is_hierachy_valid(complete_path):
@ -90,5 +94,8 @@ if __name__ == "__main__":
print(args.name+'>'+wanted_path)
path_to_send = args.name + '>' + wanted_path
socket.send('{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata)))
#s = b'{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata))
# use bytes object
s = b' '.join( [ args.channel.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] )
socket.send(s)
time.sleep(args.seconds)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*- coding: utf-8 -*-
#
# This file is part of AIL framework - Analysis Information Leak framework
@ -10,7 +10,7 @@
#
# Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be
import ConfigParser
import configparser
import argparse
import gzip
import os
@ -23,7 +23,7 @@ def readdoc(path=None):
return f.read()
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
# Indexer configuration - index dir and schema setup
@ -51,7 +51,7 @@ ix = index.open_dir(indexpath)
from whoosh.qparser import QueryParser
if args.n:
print ix.doc_count_all()
print(ix.doc_count_all())
exit(0)
if args.l:

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
import ConfigParser
import configparser
import os
import subprocess
import time
@ -23,21 +23,21 @@ if __name__ == '__main__':
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
config = ConfigParser.ConfigParser()
config = configparser.ConfigParser()
config.read(configfile)
modules = config.sections()
pids = {}
for module in modules:
pin = subprocess.Popen(["python", './QueueIn.py', '-c', module])
pout = subprocess.Popen(["python", './QueueOut.py', '-c', module])
pin = subprocess.Popen(["python3", './QueueIn.py', '-c', module])
pout = subprocess.Popen(["python3", './QueueOut.py', '-c', module])
pids[module] = (pin, pout)
is_running = True
try:
while is_running:
time.sleep(5)
is_running = False
for module, p in pids.iteritems():
for module, p in pids.items():
pin, pout = p
if pin is None:
# already dead
@ -57,7 +57,7 @@ if __name__ == '__main__':
is_running = True
pids[module] = (pin, pout)
except KeyboardInterrupt:
for module, p in pids.iteritems():
for module, p in pids.items():
pin, pout = p
if pin is not None:
pin.kill()

View file

@ -1,4 +1,4 @@
#!/usr/bin/python2.7
#!/usr/bin/python3.5
"""
The ``Paste Class``
@ -24,15 +24,8 @@ import operator
import string
import re
import json
try: # dirty to support python3
import ConfigParser
except:
import configparser
ConfigParser = configparser
try: # dirty to support python3
import cStringIO
except:
from io import StringIO as cStringIO
from io import StringIO
import sys
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
from Date import Date
@ -71,7 +64,7 @@ class Paste(object):
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
self.cache = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
@ -85,11 +78,15 @@ class Paste(object):
self.p_path = p_path
self.p_name = os.path.basename(self.p_path)
self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2)
self.p_mime = magic.from_buffer("test", mime=True)
self.p_mime = magic.from_buffer(self.get_p_content(), mime=True)
# Assuming that the paste will alway be in a day folder which is itself
# in a month folder which is itself in a year folder.
# /year/month/day/paste.gz
# TODO use bytes ?
var = self.p_path.split('/')
self.p_date = Date(var[-4], var[-3], var[-2])
self.p_source = var[-5]
@ -117,17 +114,25 @@ class Paste(object):
paste = self.cache.get(self.p_path)
if paste is None:
try:
#print('----------------------------------------------------------------')
#print(self.p_name)
#print('----------------------------------------------------------------')
with gzip.open(self.p_path, 'rb') as f:
paste = f.read()
self.cache.set(self.p_path, paste)
self.cache.expire(self.p_path, 300)
except:
return ''
pass
return paste
paste = b''
return paste.decode('utf8')
def get_p_content_as_file(self):
return cStringIO.StringIO(self.get_p_content())
try:
message = StringIO( (self.get_p_content()).decode('utf8') )
except AttributeError:
message = StringIO( (self.get_p_content()) )
return message
def get_p_content_with_removed_lines(self, threshold):
num_line_removed = 0
@ -137,6 +142,7 @@ class Paste(object):
line_id = 0
for line_id, line in enumerate(f):
length = len(line)
if length < line_length_threshold:
string_content += line
else:
@ -202,8 +208,8 @@ class Paste(object):
.. seealso:: _set_p_hash_kind("md5")
"""
for hash_name, the_hash in self.p_hash_kind.iteritems():
self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content())
for hash_name, the_hash in self.p_hash_kind.items():
self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content().encode('utf8'))
return self.p_hash
def _get_p_language(self):
@ -342,7 +348,7 @@ class Paste(object):
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
gaps=True, discard_empty=True)
blob = TextBlob(clean(self.get_p_content()), tokenizer=tokenizer)
blob = TextBlob(clean( (self.get_p_content()) ), tokenizer=tokenizer)
for word in blob.tokens:
if word in words.keys():
@ -351,7 +357,7 @@ class Paste(object):
num = 0
words[word] = num + 1
if sort:
var = sorted(words.iteritems(), key=operator.itemgetter(1), reverse=True)
var = sorted(words.items(), key=operator.itemgetter(1), reverse=True)
else:
var = words

View file

@ -76,7 +76,7 @@ def checking_MX_record(r_serv, adress_set):
r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
except Exception as e:
print e
print(e)
publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
return (num, WalidMX)
@ -125,7 +125,7 @@ def checking_A_record(r_serv, domains_set):
publisher.debug('The Label is too long')
except Exception as e:
print e
print(e)
publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score))
return (num, WalidA)

View file

@ -82,16 +82,16 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
"""
threshold = 50
first_day = date(year, month, 01)
first_day = date(year, month, 1)
last_day = date(year, month, calendar.monthrange(year, month)[1])
words = []
with open(feederfilename, 'rb') as f:
with open(feederfilename, 'r') as f:
# words of the files
words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' and word.strip()!='' ])
headers = ['Date'] + words
with open(csvfilename+'.csv', 'wb') as f:
with open(csvfilename+'.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(headers)
@ -103,11 +103,14 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
# from the 1srt day to the last of the list
for word in words:
value = r_serv.hget(word, curdate)
if value is None:
row.append(0)
else:
# if the word have a value for the day
# FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold
value = r_serv.hget(word, curdate)
value = int(value.decode('utf8'))
if value >= threshold:
row.append(value)
writer.writerow(row)
@ -127,14 +130,14 @@ def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month):
"""
first_day = date(year, month, 01)
first_day = date(year, month, 1)
last_day = date(year, month, calendar.monthrange(year, month)[1])
redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2)
words = list(server.smembers(redis_set_name))
headers = ['Date'] + words
with open(csvfilename+'.csv', 'wb') as f:
with open(csvfilename+'.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(headers)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
'''
@ -48,7 +48,7 @@ if __name__ == '__main__':
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
print "queue empty"
print("queue empty")
time.sleep(1)
continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
"""
Template for new modules

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3.5
# -*-coding:UTF-8 -*
import socks
@ -63,5 +63,5 @@ if __name__ == "__main__":
t, path = tempfile.mkstemp()
with open(path, 'w') as f:
f.write(to_write)
print path
print(path)
exit(0)