python 3 backend upgrade

This commit is contained in:
Terrtia 2018-04-16 14:50:04 +02:00
parent 3395b16873
commit 9c82dd90ec
54 changed files with 445 additions and 370 deletions

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -33,7 +33,7 @@ if __name__ == "__main__":
PST = Paste.Paste(message) PST = Paste.Paste(message)
else: else:
publisher.debug("Script Attribute is idling 1s") publisher.debug("Script Attribute is idling 1s")
print 'sleeping' print('sleeping')
time.sleep(1) time.sleep(1)
continue continue
@ -45,6 +45,6 @@ if __name__ == "__main__":
# FIXME Not used. # FIXME Not used.
PST.store.sadd("Pastes_Objects", PST.p_path) PST.store.sadd("Pastes_Objects", PST.p_path)
except IOError: except IOError:
print "CRC Checksum Failed on :", PST.p_path print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name)) PST.p_source, PST.p_date, PST.p_name))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The ZMQ_PubSub_Categ Module The ZMQ_PubSub_Categ Module
@ -73,7 +73,7 @@ if __name__ == "__main__":
bname = os.path.basename(filename) bname = os.path.basename(filename)
tmp_dict[bname] = [] tmp_dict[bname] = []
with open(os.path.join(args.d, filename), 'r') as f: with open(os.path.join(args.d, filename), 'r') as f:
patterns = [r'%s' % re.escape(s.strip()) for s in f] patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f]
tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE) tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
prec_filename = None prec_filename = None
@ -82,18 +82,25 @@ if __name__ == "__main__":
filename = p.get_from_set() filename = p.get_from_set()
if filename is None: if filename is None:
publisher.debug("Script Categ is Idling 10s") publisher.debug("Script Categ is Idling 10s")
print 'Sleeping' print('Sleeping')
time.sleep(10) time.sleep(10)
continue continue
paste = Paste.Paste(filename) paste = Paste.Paste(filename)
content = paste.get_p_content() content = paste.get_p_content()
print('-----------------------------------------------------')
print(filename)
print(content)
print('-----------------------------------------------------')
for categ, pattern in tmp_dict.items(): for categ, pattern in tmp_dict.items():
found = set(re.findall(pattern, content)) found = set(re.findall(pattern, content))
if len(found) >= matchingThreshold: if len(found) >= matchingThreshold:
msg = '{} {}'.format(paste.p_path, len(found)) msg = '{} {}'.format(paste.p_path, len(found))
print msg, categ #msg = " ".join( [paste.p_path, bytes(len(found))] )
print(msg, categ)
p.populate_set_out(msg, categ) p.populate_set_out(msg, categ)
publisher.info( publisher.info(

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -61,26 +61,34 @@ if __name__ == "__main__":
minTopPassList = p.config.getint("Credential", "minTopPassList") minTopPassList = p.config.getint("Credential", "minTopPassList")
regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
while True: while True:
message = p.get_from_set() message = p.get_from_set()
if message is None: if message is None:
publisher.debug("Script Credential is Idling 10s") publisher.debug("Script Credential is Idling 10s")
print('sleeping 10s') print('sleeping 10s')
time.sleep(10) time.sleep(1)
continue continue
filepath, count = message.split() filepath, count = message.split(' ')
if count < minTopPassList: #if count < minTopPassList:
# Less than 5 matches from the top password list, false positive. # Less than 5 matches from the top password list, false positive.
print("false positive:", count) #print("false positive:", count)
continue #continue
paste = Paste.Paste(filepath) paste = Paste.Paste(filepath)
content = paste.get_p_content() content = paste.get_p_content()
creds = set(re.findall(regex_cred, content)) creds = set(re.findall(regex_cred, content))
print(len(creds))
print(creds)
print(content)
print('-----')
publisher.warning('to_print')
if len(creds) == 0: if len(creds) == 0:
continue continue
@ -89,7 +97,7 @@ if __name__ == "__main__":
message = 'Checked {} credentials found.'.format(len(creds)) message = 'Checked {} credentials found.'.format(len(creds))
if sites_set: if sites_set:
message += ' Related websites: {}'.format(', '.join(sites_set)) message += ' Related websites: {}'.format( (', '.join(sites_set)) )
to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path) to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path)
@ -97,12 +105,13 @@ if __name__ == "__main__":
#num of creds above tresh, publish an alert #num of creds above tresh, publish an alert
if len(creds) > criticalNumberToAlert: if len(creds) > criticalNumberToAlert:
print("========> Found more than 10 credentials in this file : {}".format(filepath)) print("========> Found more than 10 credentials in this file : {}".format( filepath ))
publisher.warning(to_print) publisher.warning(to_print)
#Send to duplicate #Send to duplicate
p.populate_set_out(filepath, 'Duplicate') p.populate_set_out(filepath, 'Duplicate')
#Send to alertHandler #Send to alertHandler
p.populate_set_out('credential;{}'.format(filepath), 'alertHandler') msg = 'credential;{}'.format(filepath)
p.populate_set_out(msg, 'alertHandler')
#Put in form, count occurences, then send to moduleStats #Put in form, count occurences, then send to moduleStats
creds_sites = {} creds_sites = {}
@ -122,9 +131,10 @@ if __name__ == "__main__":
else: else:
creds_sites[domain] = 1 creds_sites[domain] = 1
for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats for site, num in creds_sites.items(): # Send for each different site to moduleStats
print 'credential;{};{};{}'.format(num, site, paste.p_date) mssg = 'credential;{};{};{}'.format(num, site, paste.p_date)
p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') print(mssg)
p.populate_set_out(mssg, 'ModuleStats')
if sites_set: if sites_set:
print("=======> Probably on : {}".format(', '.join(sites_set))) print("=======> Probably on : {}".format(', '.join(sites_set)))
@ -158,4 +168,3 @@ if __name__ == "__main__":
for partCred in splitedCred: for partCred in splitedCred:
if len(partCred) > minimumLengthThreshold: if len(partCred) > minimumLengthThreshold:
server_cred.sadd(partCred, uniq_num_cred) server_cred.sadd(partCred, uniq_num_cred)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -18,6 +18,7 @@ from packages import Paste
from packages import lib_refine from packages import lib_refine
from pubsublogger import publisher from pubsublogger import publisher
import re import re
import sys
from Helper import Process from Helper import Process
@ -58,13 +59,14 @@ if __name__ == "__main__":
content = paste.get_p_content() content = paste.get_p_content()
all_cards = re.findall(regex, content) all_cards = re.findall(regex, content)
if len(all_cards) > 0: if len(all_cards) > 0:
print 'All matching', all_cards print('All matching', all_cards)
creditcard_set = set([]) creditcard_set = set([])
for card in all_cards: for card in all_cards:
clean_card = re.sub('[^0-9]', '', card) clean_card = re.sub('[^0-9]', '', card)
clean_card = clean_card
if lib_refine.is_luhn_valid(clean_card): if lib_refine.is_luhn_valid(clean_card):
print clean_card, 'is valid' print(clean_card, 'is valid')
creditcard_set.add(clean_card) creditcard_set.add(clean_card)
paste.__setattr__(channel, creditcard_set) paste.__setattr__(channel, creditcard_set)
@ -76,13 +78,15 @@ if __name__ == "__main__":
if (len(creditcard_set) > 0): if (len(creditcard_set) > 0):
publisher.warning('{}Checked {} valid number(s);{}'.format( publisher.warning('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_path)) to_print, len(creditcard_set), paste.p_path))
print('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_path))
#Send to duplicate #Send to duplicate
p.populate_set_out(filename, 'Duplicate') p.populate_set_out(filename, 'Duplicate')
#send to Browse_warning_paste #send to Browse_warning_paste
p.populate_set_out('creditcard;{}'.format(filename), 'alertHandler') msg = 'creditcard;{}'.format(filename)
p.populate_set_out(msg, 'alertHandler')
else: else:
publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path)) publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path))
else: else:
publisher.debug("Script creditcard is idling 1m") publisher.debug("Script creditcard is idling 1m")
time.sleep(10) time.sleep(10)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module. This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module.
@ -53,7 +53,7 @@ def check_if_tracked_term(term, path):
#add_paste to tracked_word_set #add_paste to tracked_word_set
set_name = "tracked_" + term set_name = "tracked_" + term
server_term.sadd(set_name, path) server_term.sadd(set_name, path)
print term, 'addded', set_name, '->', path print(term, 'addded', set_name, '->', path)
p.populate_set_out("New Term added", 'CurveManageTopSets') p.populate_set_out("New Term added", 'CurveManageTopSets')
# Send a notification only when the member is in the set # Send a notification only when the member is in the set
@ -149,15 +149,16 @@ if __name__ == "__main__":
if generate_new_graph: if generate_new_graph:
generate_new_graph = False generate_new_graph = False
print 'Building graph' print('Building graph')
today = datetime.date.today() today = datetime.date.today()
year = today.year year = today.year
month = today.month month = today.month
lib_words.create_curve_with_word_file(r_serv1, csv_path, lib_words.create_curve_with_word_file(r_serv1, csv_path,
wordfile_path, year, wordfile_path, year,
month) month)
publisher.debug("Script Curve is Idling") publisher.debug("Script Curve is Idling")
print "sleeping" print("sleeping")
time.sleep(10) time.sleep(10)
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -16,7 +16,7 @@ from packages import lib_words
import datetime import datetime
import calendar import calendar
import os import os
import ConfigParser import configparser
# Config Variables # Config Variables
Refresh_rate = 60*5 #sec Refresh_rate = 60*5 #sec
@ -68,26 +68,26 @@ def manage_top_set():
# convert dico into sorted array # convert dico into sorted array
array_month = [] array_month = []
for w, v in dico.iteritems(): for w, v in dico.items():
array_month.append((w, v)) array_month.append((w, v))
array_month.sort(key=lambda tup: -tup[1]) array_month.sort(key=lambda tup: -tup[1])
array_month = array_month[0:20] array_month = array_month[0:20]
array_week = [] array_week = []
for w, v in dico_week.iteritems(): for w, v in dico_week.items():
array_week.append((w, v)) array_week.append((w, v))
array_week.sort(key=lambda tup: -tup[1]) array_week.sort(key=lambda tup: -tup[1])
array_week = array_week[0:20] array_week = array_week[0:20]
# convert dico_per_paste into sorted array # convert dico_per_paste into sorted array
array_month_per_paste = [] array_month_per_paste = []
for w, v in dico_per_paste.iteritems(): for w, v in dico_per_paste.items():
array_month_per_paste.append((w, v)) array_month_per_paste.append((w, v))
array_month_per_paste.sort(key=lambda tup: -tup[1]) array_month_per_paste.sort(key=lambda tup: -tup[1])
array_month_per_paste = array_month_per_paste[0:20] array_month_per_paste = array_month_per_paste[0:20]
array_week_per_paste = [] array_week_per_paste = []
for w, v in dico_week_per_paste.iteritems(): for w, v in dico_week_per_paste.items():
array_week_per_paste.append((w, v)) array_week_per_paste.append((w, v))
array_week_per_paste.sort(key=lambda tup: -tup[1]) array_week_per_paste.sort(key=lambda tup: -tup[1])
array_week_per_paste = array_week_per_paste[0:20] array_week_per_paste = array_week_per_paste[0:20]
@ -114,7 +114,7 @@ def manage_top_set():
timestamp = int(time.mktime(datetime.datetime.now().timetuple())) timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
value = str(timestamp) + ", " + "-" value = str(timestamp) + ", " + "-"
r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value)
print "refreshed module" print("refreshed module")
@ -131,7 +131,7 @@ if __name__ == '__main__':
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
@ -162,4 +162,3 @@ if __name__ == '__main__':
# Get one message from the input queue (module only work if linked with a queue) # Get one message from the input queue (module only work if linked with a queue)
time.sleep(Refresh_rate) # sleep a long time then manage the set time.sleep(Refresh_rate) # sleep a long time then manage the set
manage_top_set() manage_top_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The CVE Module The CVE Module
@ -32,7 +32,8 @@ def search_cve(message):
publisher.warning('{} contains CVEs'.format(paste.p_name)) publisher.warning('{} contains CVEs'.format(paste.p_name))
#send to Browse_warning_paste #send to Browse_warning_paste
p.populate_set_out('cve;{}'.format(filepath), 'alertHandler') msg = 'cve;{}'.format(filepath)
p.populate_set_out(msg, 'alertHandler')
#Send to duplicate #Send to duplicate
p.populate_set_out(filepath, 'Duplicate') p.populate_set_out(filepath, 'Duplicate')
@ -63,4 +64,3 @@ if __name__ == '__main__':
# Do something with the message from the queue # Do something with the message from the queue
search_cve(message) search_cve(message)

View file

@ -1,18 +1,18 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import argparse import argparse
import redis import redis
from pubsublogger import publisher from pubsublogger import publisher
from packages.lib_words import create_dirfile from packages.lib_words import create_dirfile
import ConfigParser import configparser
def main(): def main():
"""Main Function""" """Main Function"""
# CONFIG # # CONFIG #
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg') cfg.read('./packages/config.cfg')
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -44,6 +44,7 @@ def main():
continue continue
paste = PST.get_p_content() paste = PST.get_p_content()
mimetype = PST._get_p_encoding() mimetype = PST._get_p_encoding()
if mimetype == "text/plain": if mimetype == "text/plain":
c.text(rawtext=paste) c.text(rawtext=paste)
c.potentialdomain() c.potentialdomain()
@ -59,7 +60,7 @@ def main():
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
except IOError: except IOError:
print "CRC Checksum Failed on :", PST.p_path print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name)) PST.p_source, PST.p_date, PST.p_name))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
from pubsublogger import publisher from pubsublogger import publisher
@ -23,7 +23,7 @@ if __name__ == "__main__":
if message is not None: if message is not None:
f = open(dump_file, 'a') f = open(dump_file, 'a')
while message is not None: while message is not None:
print message print(message)
date = datetime.datetime.now() date = datetime.datetime.now()
if message is not None: if message is not None:
f.write(date.isoformat() + ' ' + message + '\n') f.write(date.isoformat() + ' ' + message + '\n')

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -49,8 +49,8 @@ if __name__ == "__main__":
# REDIS # # REDIS #
dico_redis = {} dico_redis = {}
date_today = datetime.today() date_today = datetime.today()
for year in xrange(2013, date_today.year+1): for year in range(2013, date_today.year+1):
for month in xrange(0, 13): for month in range(0, 13):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"), port=year, host=p.config.get("Redis_Level_DB", "host"), port=year,
db=month) db=month)
@ -90,7 +90,7 @@ if __name__ == "__main__":
# Get the date of the range # Get the date of the range
date_range = date_today - timedelta(days = maximum_month_range*30.4166666) date_range = date_today - timedelta(days = maximum_month_range*30.4166666)
num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month) num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month)
for diff_month in xrange(0, num_of_month+1): for diff_month in range(0, num_of_month+1):
curr_date_range = date_today - timedelta(days = diff_month*30.4166666) curr_date_range = date_today - timedelta(days = diff_month*30.4166666)
to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2) to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2)
dico_range_list.append(to_append) dico_range_list.append(to_append)
@ -102,7 +102,7 @@ if __name__ == "__main__":
yearly_index = str(date_today.year)+'00' yearly_index = str(date_today.year)+'00'
r_serv0 = dico_redis[yearly_index] r_serv0 = dico_redis[yearly_index]
r_serv0.incr("current_index") r_serv0.incr("current_index")
index = r_serv0.get("current_index")+str(PST.p_date) index = (r_serv0.get("current_index")).decode('utf8') + str(PST.p_date)
# Open selected dico range # Open selected dico range
opened_dico = [] opened_dico = []
@ -114,8 +114,10 @@ if __name__ == "__main__":
# Go throught the Database of the dico (of the month) # Go throught the Database of the dico (of the month)
for curr_dico_name, curr_dico_redis in opened_dico: for curr_dico_name, curr_dico_redis in opened_dico:
for hash_type, paste_hash in paste_hashes.iteritems(): for hash_type, paste_hash in paste_hashes.items():
for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type): for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type):
dico_hash = dico_hash.decode('utf8')
try: try:
if hash_type == 'ssdeep': if hash_type == 'ssdeep':
percent = 100-ssdeep.compare(dico_hash, paste_hash) percent = 100-ssdeep.compare(dico_hash, paste_hash)
@ -130,15 +132,18 @@ if __name__ == "__main__":
# index of paste # index of paste
index_current = r_serv_dico.get(dico_hash) index_current = r_serv_dico.get(dico_hash)
index_current = index_current.decode('utf8')
paste_path = r_serv_dico.get(index_current) paste_path = r_serv_dico.get(index_current)
paste_path = paste_path.decode('utf8')
paste_date = r_serv_dico.get(index_current+'_date') paste_date = r_serv_dico.get(index_current+'_date')
paste_date = paste_date.decode('utf8')
paste_date = paste_date if paste_date != None else "No date available" paste_date = paste_date if paste_date != None else "No date available"
if paste_path != None: if paste_path != None:
hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date) hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date)
print '['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent) print('['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent))
except Exception,e: except Exception:
print str(e) print(str(e))
#print 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash #print 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash
# Add paste in DB after checking to prevent its analysis twice # Add paste in DB after checking to prevent its analysis twice
@ -147,7 +152,7 @@ if __name__ == "__main__":
r_serv1.set(index+'_date', PST._get_p_date()) r_serv1.set(index+'_date', PST._get_p_date())
r_serv1.sadd("INDEX", index) r_serv1.sadd("INDEX", index)
# Adding hashes in Redis # Adding hashes in Redis
for hash_type, paste_hash in paste_hashes.iteritems(): for hash_type, paste_hash in paste_hashes.items():
r_serv1.set(paste_hash, index) r_serv1.set(paste_hash, index)
r_serv1.sadd("HASHS_"+hash_type, paste_hash) r_serv1.sadd("HASHS_"+hash_type, paste_hash)
@ -166,7 +171,7 @@ if __name__ == "__main__":
PST.__setattr__("p_duplicate", dupl) PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis("p_duplicate", dupl) PST.save_attribute_redis("p_duplicate", dupl)
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path)) publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path))
print '{}Detected {}'.format(to_print, len(dupl)) print('{}Detected {}'.format(to_print, len(dupl)))
y = time.time() y = time.time()
@ -176,5 +181,5 @@ if __name__ == "__main__":
except IOError: except IOError:
to_print = 'Duplicate;{};{};{};'.format( to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name) PST.p_source, PST.p_date, PST.p_name)
print "CRC Checksum Failed on :", PST.p_path print("CRC Checksum Failed on :", PST.p_path)
publisher.error('{}CRC Checksum Failed'.format(to_print)) publisher.error('{}CRC Checksum Failed'.format(to_print))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -33,8 +33,8 @@ if __name__ == "__main__":
# DB OBJECT & HASHS ( DISK ) # DB OBJECT & HASHS ( DISK )
# FIXME increase flexibility # FIXME increase flexibility
dico_redis = {} dico_redis = {}
for year in xrange(2013, 2017): for year in range(2013, 2017):
for month in xrange(0, 16): for month in range(0, 16):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"), port=year, host=p.config.get("Redis_Level_DB", "host"), port=year,
db=month) db=month)
@ -147,7 +147,7 @@ if __name__ == "__main__":
if percentage >= 50: if percentage >= 50:
dupl.append((paste, percentage)) dupl.append((paste, percentage))
else: else:
print 'percentage: ' + str(percentage) print('percentage: ' + str(percentage))
# Creating the object attribute and save it. # Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format( to_print = 'Duplicate;{};{};{};'.format(
@ -156,11 +156,11 @@ if __name__ == "__main__":
PST.__setattr__("p_duplicate", dupl) PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis("p_duplicate", dupl) PST.save_attribute_redis("p_duplicate", dupl)
publisher.info('{}Detected {}'.format(to_print, len(dupl))) publisher.info('{}Detected {}'.format(to_print, len(dupl)))
print '{}Detected {}'.format(to_print, len(dupl)) print('{}Detected {}'.format(to_print, len(dupl)))
y = time.time() y = time.time()
publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
except IOError: except IOError:
print "CRC Checksum Failed on :", PST.p_path print("CRC Checksum Failed on :", PST.p_path)
publisher.error('{}CRC Checksum Failed'.format(to_print)) publisher.error('{}CRC Checksum Failed'.format(to_print))

View file

@ -67,7 +67,7 @@ if __name__ == '__main__':
continue continue
# Creating the full filepath # Creating the full filepath
filename = os.path.join(os.environ['AIL_HOME'], filename = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "pastes"), paste.decode('utf8')) p.config.get("Directories", "pastes"), paste)
#print(filename) #print(filename)
dirname = os.path.dirname(filename) dirname = os.path.dirname(filename)
if not os.path.exists(dirname): if not os.path.exists(dirname):
@ -77,5 +77,5 @@ if __name__ == '__main__':
f.write(base64.standard_b64decode(gzip64encoded)) f.write(base64.standard_b64decode(gzip64encoded))
print(filename) print(filename)
p.populate_set_out(filename.encode('utf8')) p.populate_set_out(filename)
processed_paste+=1 processed_paste+=1

View file

@ -58,7 +58,7 @@ class PubSub(object):
new_sub = context.socket(zmq.SUB) new_sub = context.socket(zmq.SUB)
new_sub.connect(address) new_sub.connect(address)
# bytes64 encode bytes to ascii only bytes # bytes64 encode bytes to ascii only bytes
new_sub.setsockopt(zmq.SUBSCRIBE, channel.encode('ascii')) new_sub.setsockopt_string(zmq.SUBSCRIBE, channel)
self.subscribers.append(new_sub) self.subscribers.append(new_sub)
def setup_publish(self, conn_name): def setup_publish(self, conn_name):
@ -78,15 +78,15 @@ class PubSub(object):
self.publishers['ZMQ'].append((p, channel)) self.publishers['ZMQ'].append((p, channel))
def publish(self, message): def publish(self, message):
m = json.loads(message.decode('ascii')) m = json.loads(message)
channel_message = m.get('channel') channel_message = m.get('channel')
for p, channel in self.publishers['Redis']: for p, channel in self.publishers['Redis']:
if channel_message is None or channel_message == channel: if channel_message is None or channel_message == channel:
p.publish(channel, ( m['message']).encode('ascii') ) p.publish(channel, ( m['message']) )
for p, channel in self.publishers['ZMQ']: for p, channel in self.publishers['ZMQ']:
if channel_message is None or channel_message == channel: if channel_message is None or channel_message == channel:
mess = ( m['message'] ).encode('ascii') p.send('{} {}'.format(channel, m['message']))
p.send(b' '.join( [channel, mess] ) ) #p.send(b' '.join( [channel, mess] ) )
def subscribe(self): def subscribe(self):
@ -99,7 +99,8 @@ class PubSub(object):
for sub in self.subscribers: for sub in self.subscribers:
try: try:
msg = sub.recv(zmq.NOBLOCK) msg = sub.recv(zmq.NOBLOCK)
yield msg.split(b" ", 1)[1] msg = msg.decode('utf8')
yield msg.split(" ", 1)[1]
except zmq.error.Again as e: except zmq.error.Again as e:
time.sleep(0.2) time.sleep(0.2)
pass pass
@ -150,6 +151,12 @@ class Process(object):
self.r_temp.hset('queues', self.subscriber_name, self.r_temp.hset('queues', self.subscriber_name,
int(self.r_temp.scard(in_set))) int(self.r_temp.scard(in_set)))
message = self.r_temp.spop(in_set) message = self.r_temp.spop(in_set)
try:
message = message.decode('utf8')
except AttributeError:
pass
timestamp = int(time.mktime(datetime.datetime.now().timetuple())) timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes') dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes')
@ -158,12 +165,12 @@ class Process(object):
else: else:
#try: #try:
if b'.gz' in message: if '.gz' in message:
path = message.split(b".")[-2].split(b"/")[-1] path = message.split(".")[-2].split("/")[-1]
#find start of path with AIL_HOME #find start of path with AIL_HOME
index_s = (message.decode('ascii')).find(os.environ['AIL_HOME']) index_s = message.find(os.environ['AIL_HOME'])
#Stop when .gz #Stop when .gz
index_e = message.find(b".gz")+3 index_e = message.find(".gz")+3
if(index_s == -1): if(index_s == -1):
complete_path = message[0:index_e] complete_path = message[0:index_e]
else: else:
@ -173,7 +180,7 @@ class Process(object):
path = "-" path = "-"
complete_path = "?" complete_path = "?"
value = str(timestamp) + ", " + path.decode('ascii') value = str(timestamp) + ", " + path
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value)
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path) self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path)
self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum))
@ -190,13 +197,12 @@ class Process(object):
def populate_set_out(self, msg, channel=None): def populate_set_out(self, msg, channel=None):
# multiproc # multiproc
msg = msg.decode('ascii')
msg = {'message': msg} msg = {'message': msg}
if channel is not None: if channel is not None:
msg.update({'channel': channel}) msg.update({'channel': channel})
# bytes64 encode bytes to ascii only bytes # bytes64 encode bytes to ascii only bytes
j = (json.dumps(msg)).encode('ascii') j = json.dumps(msg)
self.r_temp.sadd(self.subscriber_name + 'out', j) self.r_temp.sadd(self.subscriber_name + 'out', j)
def publish(self): def publish(self):
@ -209,6 +215,12 @@ class Process(object):
self.pubsub.setup_publish(name) self.pubsub.setup_publish(name)
while True: while True:
message = self.r_temp.spop(self.subscriber_name + 'out') message = self.r_temp.spop(self.subscriber_name + 'out')
try:
message = message.decode('utf8')
except AttributeError:
pass
if message is None: if message is None:
time.sleep(1) time.sleep(1)
continue continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -107,10 +107,11 @@ if __name__ == "__main__":
continue continue
docpath = message.split(" ", -1)[-1] docpath = message.split(" ", -1)[-1]
paste = PST.get_p_content() paste = PST.get_p_content()
print "Indexing - "+indexname+" :", docpath print("Indexing - " + indexname + " :", docpath)
if time.time() - last_refresh > TIME_WAIT: #avoid calculating the index's size at each message #avoid calculating the index's size at each message
if( time.time() - last_refresh > TIME_WAIT):
last_refresh = time.time() last_refresh = time.time()
if check_index_size(baseindexpath, indexname) >= INDEX_SIZE_THRESHOLD*(1000*1000): if check_index_size(baseindexpath, indexname) >= INDEX_SIZE_THRESHOLD*(1000*1000):
timestamp = int(time.time()) timestamp = int(time.time())
@ -128,11 +129,11 @@ if __name__ == "__main__":
if indexertype == "whoosh": if indexertype == "whoosh":
indexwriter = ix.writer() indexwriter = ix.writer()
indexwriter.update_document( indexwriter.update_document(
title=unicode(docpath, errors='ignore'), title=docpath,
path=unicode(docpath, errors='ignore'), path=docpath,
content=unicode(paste, errors='ignore')) content=paste)
indexwriter.commit() indexwriter.commit()
except IOError: except IOError:
print "CRC Checksum Failed on :", PST.p_path print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name)) PST.p_source, PST.p_date, PST.p_name))

View file

@ -15,46 +15,49 @@ RSA private key, certificate messages
import time import time
from pubsublogger import publisher from pubsublogger import publisher
from bin.packages import Paste #from bin.packages import Paste
from bin.Helper import Process #from bin.Helper import Process
from packages import Paste
from Helper import Process
def search_key(paste): def search_key(paste):
content = paste.get_p_content() content = paste.get_p_content()
find = False find = False
if b'-----BEGIN PGP MESSAGE-----' in content: if '-----BEGIN PGP MESSAGE-----' in content:
publisher.warning('{} has a PGP enc message'.format(paste.p_name)) publisher.warning('{} has a PGP enc message'.format(paste.p_name))
find = True find = True
if b'-----BEGIN CERTIFICATE-----' in content: if '-----BEGIN CERTIFICATE-----' in content:
publisher.warning('{} has a certificate message'.format(paste.p_name)) publisher.warning('{} has a certificate message'.format(paste.p_name))
find = True find = True
if b'-----BEGIN RSA PRIVATE KEY-----' in content: if '-----BEGIN RSA PRIVATE KEY-----' in content:
publisher.warning('{} has a RSA private key message'.format(paste.p_name)) publisher.warning('{} has a RSA private key message'.format(paste.p_name))
find = True find = True
if b'-----BEGIN PRIVATE KEY-----' in content: if '-----BEGIN PRIVATE KEY-----' in content:
publisher.warning('{} has a private key message'.format(paste.p_name)) publisher.warning('{} has a private key message'.format(paste.p_name))
find = True find = True
if b'-----BEGIN ENCRYPTED PRIVATE KEY-----' in content: if '-----BEGIN ENCRYPTED PRIVATE KEY-----' in content:
publisher.warning('{} has an encrypted private key message'.format(paste.p_name)) publisher.warning('{} has an encrypted private key message'.format(paste.p_name))
find = True find = True
if b'-----BEGIN OPENSSH PRIVATE KEY-----' in content: if '-----BEGIN OPENSSH PRIVATE KEY-----' in content:
publisher.warning('{} has an openssh private key message'.format(paste.p_name)) publisher.warning('{} has an openssh private key message'.format(paste.p_name))
find = True find = True
if b'-----BEGIN DSA PRIVATE KEY-----' in content: if '-----BEGIN DSA PRIVATE KEY-----' in content:
publisher.warning('{} has a dsa private key message'.format(paste.p_name)) publisher.warning('{} has a dsa private key message'.format(paste.p_name))
find = True find = True
if b'-----BEGIN EC PRIVATE KEY-----' in content: if '-----BEGIN EC PRIVATE KEY-----' in content:
publisher.warning('{} has an ec private key message'.format(paste.p_name)) publisher.warning('{} has an ec private key message'.format(paste.p_name))
find = True find = True
if b'-----BEGIN PGP PRIVATE KEY BLOCK-----' in content: if '-----BEGIN PGP PRIVATE KEY BLOCK-----' in content:
publisher.warning('{} has a pgp private key block message'.format(paste.p_name)) publisher.warning('{} has a pgp private key block message'.format(paste.p_name))
find = True find = True
@ -63,7 +66,9 @@ def search_key(paste):
#Send to duplicate #Send to duplicate
p.populate_set_out(message, 'Duplicate') p.populate_set_out(message, 'Duplicate')
#send to Browse_warning_paste #send to Browse_warning_paste
p.populate_set_out('keys;{}'.format(message), 'alertHandler') msg = ('keys;{}'.format(message))
print(message)
p.populate_set_out( msg, 'alertHandler')
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -130,8 +130,8 @@ function launching_scripts {
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Lines" bash -c 'python3 Lines.py; read x' screen -S "Script_AIL" -X screen -t "Lines" bash -c 'python3 Lines.py; read x'
sleep 0.1 sleep 0.1
#screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
#sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Categ" bash -c 'python3 Categ.py; read x' screen -S "Script_AIL" -X screen -t "Categ" bash -c 'python3 Categ.py; read x'
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Tokenize" bash -c 'python3 Tokenize.py; read x' screen -S "Script_AIL" -X screen -t "Tokenize" bash -c 'python3 Tokenize.py; read x'
@ -142,8 +142,8 @@ function launching_scripts {
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Mail" bash -c './Mail.py; read x' screen -S "Script_AIL" -X screen -t "Mail" bash -c './Mail.py; read x'
sleep 0.1 sleep 0.1
#screen -S "Script_AIL" -X screen -t "Web" bash -c './Web.py; read x' screen -S "Script_AIL" -X screen -t "Web" bash -c './Web.py; read x'
#sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Credential" bash -c './Credential.py; read x' screen -S "Script_AIL" -X screen -t "Credential" bash -c './Credential.py; read x'
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Curve" bash -c './Curve.py; read x' screen -S "Script_AIL" -X screen -t "Curve" bash -c './Curve.py; read x'
@ -168,8 +168,8 @@ function launching_scripts {
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x'
sleep 0.1 sleep 0.1
#screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
#sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "alertHandler" bash -c './alertHandler.py; read x' screen -S "Script_AIL" -X screen -t "alertHandler" bash -c './alertHandler.py; read x'
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -61,7 +61,7 @@ if __name__ == '__main__':
while True: while True:
try: try:
message = p.get_from_set() message = p.get_from_set()
print message print(message)
if message is not None: if message is not None:
PST = Paste.Paste(message) PST = Paste.Paste(message)
else: else:
@ -77,8 +77,8 @@ if __name__ == '__main__':
# FIXME Not used. # FIXME Not used.
PST.store.sadd("Pastes_Objects", PST.p_path) PST.store.sadd("Pastes_Objects", PST.p_path)
if lines_infos[1] < args.max: if lines_infos[1] < args.max:
p.populate_set_out(PST.p_path, 'LinesShort') p.populate_set_out( PST.p_path , 'LinesShort')
else: else:
p.populate_set_out(PST.p_path, 'LinesLong') p.populate_set_out( PST.p_path , 'LinesLong')
except IOError: except IOError:
print "CRC Checksum Error on : ", PST.p_path print("CRC Checksum Error on : ", PST.p_path)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -78,14 +78,14 @@ if __name__ == "__main__":
publisher.info(to_print) publisher.info(to_print)
#Send to ModuleStats #Send to ModuleStats
for mail in MX_values[1]: for mail in MX_values[1]:
print 'mail;{};{};{}'.format(1, mail, PST.p_date) print('mail;{};{};{}'.format(1, mail, PST.p_date))
p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats')
prec_filename = filename prec_filename = filename
else: else:
publisher.debug("Script Mails is Idling 10s") publisher.debug("Script Mails is Idling 10s")
print 'Sleeping' print('Sleeping')
time.sleep(10) time.sleep(10)
message = p.get_from_set() message = p.get_from_set()

View file

@ -91,7 +91,7 @@ if __name__ == '__main__':
complete_paste, gzip64encoded = splitted complete_paste, gzip64encoded = splitted
try: try:
feeder_name = ( complete_paste.replace(b"archive/",b"") ).split(b"/")[0] feeder_name = ( complete_paste.replace("archive/","") ).split("/")[0]
# TODO take real name ? # TODO take real name ?
paste_name = complete_paste paste_name = complete_paste
@ -109,9 +109,10 @@ if __name__ == '__main__':
processed_paste_per_feeder[feeder_name] = 1 processed_paste_per_feeder[feeder_name] = 1
duplicated_paste_per_feeder[feeder_name] = 0 duplicated_paste_per_feeder[feeder_name] = 0
relay_message = b" ".join( [paste_name, gzip64encoded] ) relay_message = "{0} {1}".format(paste_name, gzip64encoded)
#relay_message = b" ".join( [paste_name, gzip64encoded] )
digest = hashlib.sha1(gzip64encoded).hexdigest() digest = hashlib.sha1(gzip64encoded.encode('utf8')).hexdigest()
# Avoid any duplicate coming from any sources # Avoid any duplicate coming from any sources
if operation_mode == 1: if operation_mode == 1:

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -20,7 +20,7 @@ import os
import signal import signal
import argparse import argparse
from subprocess import PIPE, Popen from subprocess import PIPE, Popen
import ConfigParser import configparser
import json import json
from terminaltables import AsciiTable from terminaltables import AsciiTable
import textwrap import textwrap
@ -51,7 +51,7 @@ last_refresh = 0
def getPid(module): def getPid(module):
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in p.stdout: for line in p.stdout:
print line print(line)
splittedLine = line.split() splittedLine = line.split()
if 'python2' in splittedLine: if 'python2' in splittedLine:
return int(splittedLine[0]) return int(splittedLine[0])
@ -76,7 +76,7 @@ def cleanRedis():
flag_pid_valid = True flag_pid_valid = True
if not flag_pid_valid: if not flag_pid_valid:
print flag_pid_valid, 'cleaning', pid, 'in', k print(flag_pid_valid, 'cleaning', pid, 'in', k)
server.srem(k, pid) server.srem(k, pid)
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k]) printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k])
@ -85,11 +85,11 @@ def cleanRedis():
def kill_module(module, pid): def kill_module(module, pid):
print '' print('')
print '-> trying to kill module:', module print('-> trying to kill module:', module)
if pid is None: if pid is None:
print 'pid was None' print('pid was None')
printarrayGlob.insert(1, [0, module, pid, "PID was None"]) printarrayGlob.insert(1, [0, module, pid, "PID was None"])
printarrayGlob.pop() printarrayGlob.pop()
pid = getPid(module) pid = getPid(module)
@ -102,15 +102,15 @@ def kill_module(module, pid):
try: try:
os.kill(pid, signal.SIGUSR1) os.kill(pid, signal.SIGUSR1)
except OSError: except OSError:
print pid, 'already killed' print(pid, 'already killed')
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"]) printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"])
printarrayGlob.pop() printarrayGlob.pop()
return return
time.sleep(1) time.sleep(1)
if getPid(module) is None: if getPid(module) is None:
print module, 'has been killed' print(module, 'has been killed')
print 'restarting', module, '...' print('restarting', module, '...')
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
@ -119,7 +119,7 @@ def kill_module(module, pid):
printarrayGlob.pop() printarrayGlob.pop()
else: else:
print 'killing failed, retrying...' print('killing failed, retrying...')
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."]) printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."])
printarrayGlob.pop() printarrayGlob.pop()
@ -128,8 +128,8 @@ def kill_module(module, pid):
os.kill(pid, signal.SIGUSR1) os.kill(pid, signal.SIGUSR1)
time.sleep(1) time.sleep(1)
if getPid(module) is None: if getPid(module) is None:
print module, 'has been killed' print(module, 'has been killed')
print 'restarting', module, '...' print('restarting', module, '...')
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
@ -137,12 +137,12 @@ def kill_module(module, pid):
printarrayGlob.pop() printarrayGlob.pop()
printarrayGlob.pop() printarrayGlob.pop()
else: else:
print 'killing failed!' print('killing failed!')
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"]) printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"])
printarrayGlob.pop() printarrayGlob.pop()
else: else:
print 'Module does not exist' print('Module does not exist')
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"]) printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"])
printarrayGlob.pop() printarrayGlob.pop()
@ -192,7 +192,7 @@ if __name__ == "__main__":
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
# REDIS # # REDIS #
@ -227,7 +227,7 @@ if __name__ == "__main__":
printarray1 = [] printarray1 = []
printarray2 = [] printarray2 = []
printarray3 = [] printarray3 = []
for queue, card in server.hgetall("queues").iteritems(): for queue, card in server.hgetall("queues").items():
all_queue.add(queue) all_queue.add(queue)
key = "MODULE_" + queue + "_" key = "MODULE_" + queue + "_"
keySet = "MODULE_TYPE_" + queue keySet = "MODULE_TYPE_" + queue
@ -337,15 +337,15 @@ if __name__ == "__main__":
legend = AsciiTable(legend_array, title="Legend") legend = AsciiTable(legend_array, title="Legend")
legend.column_max_width(1) legend.column_max_width(1)
print legend.table print(legend.table)
print '\n' print('\n')
print t1.table print(t1.table)
print '\n' print('\n')
print t2.table print(t2.table)
print '\n' print('\n')
print t3.table print(t3.table)
print '\n' print('\n')
print t4.table print(t4.table9)
if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5: if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5:
lastTime = datetime.datetime.now() lastTime = datetime.datetime.now()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
This module makes statistics for some modules and providers This module makes statistics for some modules and providers
@ -52,10 +52,10 @@ def compute_most_posted(server, message):
# Member set is a list of (value, score) pairs # Member set is a list of (value, score) pairs
if int(member_set[0][1]) < keyword_total_sum: if int(member_set[0][1]) < keyword_total_sum:
#remove min from set and add the new one #remove min from set and add the new one
print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server.zrem(redis_progression_name_set, member_set[0][0]) server.zrem(redis_progression_name_set, member_set[0][0])
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
print redis_progression_name_set print(redis_progression_name_set)
def compute_provider_info(server_trend, server_pasteName, path): def compute_provider_info(server_trend, server_pasteName, path):
@ -94,7 +94,7 @@ def compute_provider_info(server_trend, server_pasteName, path):
# Member set is a list of (value, score) pairs # Member set is a list of (value, score) pairs
if float(member_set[0][1]) < new_avg: if float(member_set[0][1]) < new_avg:
#remove min from set and add the new one #remove min from set and add the new one
print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' print('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server_trend.zrem(redis_sum_size_set, member_set[0][0]) server_trend.zrem(redis_sum_size_set, member_set[0][0])
server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider) server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider)
server_trend.zrem(redis_avg_size_name_set, member_set[0][0]) server_trend.zrem(redis_avg_size_name_set, member_set[0][0])
@ -110,7 +110,7 @@ def compute_provider_info(server_trend, server_pasteName, path):
# Member set is a list of (value, score) pairs # Member set is a list of (value, score) pairs
if int(member_set[0][1]) < num_paste: if int(member_set[0][1]) < num_paste:
#remove min from set and add the new one #remove min from set and add the new one
print 'Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' print('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server_trend.zrem(member_set[0][0]) server_trend.zrem(member_set[0][0])
server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider) server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
@ -149,7 +149,7 @@ if __name__ == '__main__':
if message is None: if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section)) publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping' print('sleeping')
time.sleep(20) time.sleep(20)
continue continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \ from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \
@ -10,7 +10,7 @@ from asciimatics.event import Event
from asciimatics.event import KeyboardEvent, MouseEvent from asciimatics.event import KeyboardEvent, MouseEvent
import sys, os import sys, os
import time, datetime import time, datetime
import argparse, ConfigParser import argparse, configparser
import json import json
import redis import redis
import psutil import psutil
@ -497,7 +497,7 @@ MANAGE MODULES AND GET INFOS
def getPid(module): def getPid(module):
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in p.stdout: for line in p.stdout:
print line print(line)
splittedLine = line.split() splittedLine = line.split()
if 'python2' in splittedLine: if 'python2' in splittedLine:
return int(splittedLine[0]) return int(splittedLine[0])
@ -511,21 +511,21 @@ def clearRedisModuleInfo():
def cleanRedis(): def cleanRedis():
for k in server.keys("MODULE_TYPE_*"): for k in server.keys("MODULE_TYPE_*"):
moduleName = k[12:].split('_')[0] moduleName = (k[12:].decode('utf8')).split('_')[0]
for pid in server.smembers(k): for pid in server.smembers(k):
flag_pid_valid = False flag_pid_valid = False
proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
try: try:
for line in proc.stdout: for line in proc.stdout:
splittedLine = line.split() splittedLine = line.split()
if ('python2' in splittedLine or 'python' in splittedLine) and "./"+moduleName+".py" in splittedLine: if ('python3' in splittedLine or 'python' in splittedLine) and "./"+moduleName + ".py" in splittedLine:
flag_pid_valid = True flag_pid_valid = True
if not flag_pid_valid: if not flag_pid_valid:
#print flag_pid_valid, 'cleaning', pid, 'in', k #print flag_pid_valid, 'cleaning', pid, 'in', k
server.srem(k, pid) server.srem(k, pid)
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + k], 0)) log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + (k).decode('utf8')], 0))
#Error due to resize, interrupted sys call #Error due to resize, interrupted sys call
except IOError as e: except IOError as e:
@ -601,7 +601,9 @@ def fetchQueueData():
printarray_running = [] printarray_running = []
printarray_idle = [] printarray_idle = []
printarray_notrunning = [] printarray_notrunning = []
for queue, card in server.hgetall("queues").iteritems(): for queue, card in iter(server.hgetall("queues").items()):
queue = queue.decode('utf8')
card = card.decode('utf8')
all_queue.add(queue) all_queue.add(queue)
key = "MODULE_" + queue + "_" key = "MODULE_" + queue + "_"
keySet = "MODULE_TYPE_" + queue keySet = "MODULE_TYPE_" + queue
@ -715,6 +717,11 @@ def format_string(tab, padding_row):
text="" text=""
for ite, elem in enumerate(the_array): for ite, elem in enumerate(the_array):
try:
elem = elem.decode('utf8')
except AttributeError:
pass
if len(elem) > padding_row[ite]: if len(elem) > padding_row[ite]:
text += "*" + elem[-padding_row[ite]+6:] text += "*" + elem[-padding_row[ite]+6:]
padd_off = " "*5 padd_off = " "*5
@ -761,7 +768,7 @@ def demo(screen):
if time.time() - time_cooldown > args.refresh: if time.time() - time_cooldown > args.refresh:
cleanRedis() cleanRedis()
for key, val in fetchQueueData().iteritems(): #fetch data and put it into the tables for key, val in iter(fetchQueueData().items()): #fetch data and put it into the tables
TABLES[key] = val TABLES[key] = val
TABLES["logs"] = format_string(printarrayLog, TABLES_PADDING["logs"]) TABLES["logs"] = format_string(printarrayLog, TABLES_PADDING["logs"])
@ -790,7 +797,7 @@ if __name__ == "__main__":
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
# REDIS # # REDIS #

View file

@ -1,10 +1,10 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import ConfigParser import configparser
import os import os
import smtplib import smtplib
from email.MIMEMultipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
""" """
@ -28,7 +28,7 @@ def sendEmailNotification(recipient, term):
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv?') Or activate the virtualenv?')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
sender = cfg.get("Notifications", "sender"), sender = cfg.get("Notifications", "sender"),
@ -76,8 +76,5 @@ def sendEmailNotification(recipient, term):
smtp_server.quit() smtp_server.quit()
except Exception as e: except Exception as e:
print str(e) print(str(e))
# raise e # raise e

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The ZMQ_Sub_Onion Module The ZMQ_Sub_Onion Module
@ -37,7 +37,7 @@ from Helper import Process
def fetch(p, r_cache, urls, domains, path): def fetch(p, r_cache, urls, domains, path):
failed = [] failed = []
downloaded = [] downloaded = []
print len(urls), 'Urls to fetch.' print(len(urls), 'Urls to fetch.')
for url, domain in zip(urls, domains): for url, domain in zip(urls, domains):
if r_cache.exists(url) or url in failed: if r_cache.exists(url) or url in failed:
continue continue
@ -73,9 +73,9 @@ def fetch(p, r_cache, urls, domains, path):
r_cache.setbit(url, 0, 0) r_cache.setbit(url, 0, 0)
r_cache.expire(url, 3600) r_cache.expire(url, 3600)
failed.append(url) failed.append(url)
print 'Failed at downloading', url print('Failed at downloading', url)
print process.stdout.read() print(process.stdout.read())
print 'Failed:', len(failed), 'Downloaded:', len(downloaded) print('Failed:', len(failed), 'Downloaded:', len(downloaded))
if __name__ == "__main__": if __name__ == "__main__":
@ -109,7 +109,7 @@ if __name__ == "__main__":
while True: while True:
if message is not None: if message is not None:
print message print(message)
filename, score = message.split() filename, score = message.split()
# "For each new paste" # "For each new paste"
@ -152,6 +152,6 @@ if __name__ == "__main__":
prec_filename = filename prec_filename = filename
else: else:
publisher.debug("Script url is Idling 10s") publisher.debug("Script url is Idling 10s")
print 'Sleeping' print('Sleeping')
time.sleep(10) time.sleep(10)
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -30,10 +30,11 @@ def search_phone(message):
# if the list is greater than 4, we consider the Paste may contain a list of phone numbers # if the list is greater than 4, we consider the Paste may contain a list of phone numbers
if len(results) > 4: if len(results) > 4:
print results print(results)
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
#send to Browse_warning_paste #send to Browse_warning_paste
p.populate_set_out('phone;{}'.format(message), 'alertHandler') msg = 'phone;{}'.format(message)
p.populate_set_out(msg, 'alertHandler')
#Send to duplicate #Send to duplicate
p.populate_set_out(message, 'Duplicate') p.populate_set_out(message, 'Duplicate')
stats = {} stats = {}

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
from pubsublogger import publisher from pubsublogger import publisher

View file

@ -1,9 +1,9 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis import redis
import argparse import argparse
import ConfigParser import configparser
import time import time
import os import os
from pubsublogger import publisher from pubsublogger import publisher
@ -14,7 +14,7 @@ def main():
"""Main Function""" """Main Function"""
# CONFIG # # CONFIG #
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg') cfg.read('./packages/config.cfg')
# SCRIPT PARSER # # SCRIPT PARSER #
@ -49,7 +49,7 @@ def main():
row.sort() row.sort()
table.add_rows(row, header=False) table.add_rows(row, header=False)
os.system('clear') os.system('clear')
print table.draw() print(table.draw())
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
This Module is used for term frequency. This Module is used for term frequency.
@ -76,6 +76,9 @@ if __name__ == "__main__":
dico_regex, dico_regexname_to_redis = refresh_dicos() dico_regex, dico_regexname_to_redis = refresh_dicos()
print('dico got refreshed') print('dico got refreshed')
print(dico_regex)
print(dico_regexname_to_redis)
filename = message filename = message
temp = filename.split('/') temp = filename.split('/')
timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0))
@ -115,6 +118,6 @@ if __name__ == "__main__":
else: else:
publisher.debug("Script RegexForTermsFrequency is Idling") publisher.debug("Script RegexForTermsFrequency is Idling")
print "sleeping" print("sleeping")
time.sleep(5) time.sleep(5)
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import time import time
from packages import Paste from packages import Paste
@ -29,7 +29,7 @@ if __name__ == "__main__":
filepath = p.get_from_set() filepath = p.get_from_set()
if filepath is None: if filepath is None:
publisher.debug("Script Release is Idling 10s") publisher.debug("Script Release is Idling 10s")
print 'Sleeping' print('Sleeping')
time.sleep(10) time.sleep(10)
continue continue
@ -40,6 +40,7 @@ if __name__ == "__main__":
continue continue
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path) to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path)
print(to_print)
if len(releases) > 30: if len(releases) > 30:
publisher.warning(to_print) publisher.warning(to_print)
else: else:

View file

@ -1,9 +1,9 @@
#!/usr/bin/python2.7 #!/usr/bin/python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis import redis
import argparse import argparse
import ConfigParser import configparser
from datetime import datetime from datetime import datetime
from pubsublogger import publisher from pubsublogger import publisher
@ -14,7 +14,7 @@ def main():
"""Main Function""" """Main Function"""
# CONFIG # # CONFIG #
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg') cfg.read('./packages/config.cfg')
# SCRIPT PARSER # # SCRIPT PARSER #

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -13,7 +13,7 @@ It test different possibility to makes some sqlInjection.
import time import time
import string import string
import urllib2 import urllib.request
import re import re
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
@ -74,8 +74,8 @@ def analyse(url, path):
if (result_path > 0) or (result_query > 0): if (result_path > 0) or (result_query > 0):
paste = Paste.Paste(path) paste = Paste.Paste(path)
if (result_path > 1) or (result_query > 1): if (result_path > 1) or (result_query > 1):
print "Detected SQL in URL: " print("Detected SQL in URL: ")
print urllib2.unquote(url) print(urllib.request.unquote(url))
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path)
publisher.warning(to_print) publisher.warning(to_print)
#Send to duplicate #Send to duplicate
@ -83,8 +83,8 @@ def analyse(url, path):
#send to Browse_warning_paste #send to Browse_warning_paste
p.populate_set_out('sqlinjection;{}'.format(path), 'alertHandler') p.populate_set_out('sqlinjection;{}'.format(path), 'alertHandler')
else: else:
print "Potential SQL injection:" print("Potential SQL injection:")
print urllib2.unquote(url) print(urllib.request.unquote(url))
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path)
publisher.info(to_print) publisher.info(to_print)
@ -92,7 +92,7 @@ def analyse(url, path):
# Try to detect if the url passed might be an sql injection by appliying the regex # Try to detect if the url passed might be an sql injection by appliying the regex
# defined above on it. # defined above on it.
def is_sql_injection(url_parsed): def is_sql_injection(url_parsed):
line = urllib2.unquote(url_parsed) line = urllib.request.unquote(url_parsed)
line = string.upper(line) line = string.upper(line)
result = [] result = []
result_suspect = [] result_suspect = []
@ -114,10 +114,10 @@ def is_sql_injection(url_parsed):
result_suspect.append(line[temp_res:temp_res+len(word)]) result_suspect.append(line[temp_res:temp_res+len(word)])
if len(result)>0: if len(result)>0:
print result print(result)
return 2 return 2
elif len(result_suspect)>0: elif len(result_suspect)>0:
print result_suspect print(result_suspect)
return 1 return 1
else: else:
return 0 return 0

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
Sentiment analyser module. Sentiment analyser module.
@ -33,7 +33,7 @@ size_threshold = 250
line_max_length_threshold = 1000 line_max_length_threshold = 1000
import os import os
import ConfigParser import configparser
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile): if not os.path.exists(configfile):
@ -41,7 +41,7 @@ if not os.path.exists(configfile):
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file") sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file")
@ -69,7 +69,7 @@ def Analyse(message, server):
combined_datetime = datetime.datetime.combine(the_date, the_time) combined_datetime = datetime.datetime.combine(the_date, the_time)
timestamp = calendar.timegm(combined_datetime.timetuple()) timestamp = calendar.timegm(combined_datetime.timetuple())
sentences = tokenize.sent_tokenize(p_content.decode('utf-8', 'ignore')) sentences = tokenize.sent_tokenize(p_content)
if len(sentences) > 0: if len(sentences) > 0:
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
@ -109,11 +109,11 @@ def Analyse(message, server):
provider_timestamp = provider + '_' + str(timestamp) provider_timestamp = provider + '_' + str(timestamp)
server.incr('UniqID') server.incr('UniqID')
UniqID = server.get('UniqID') UniqID = server.get('UniqID')
print provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines' print(provider_timestamp, '->', UniqID.decode('utf8'), 'dropped', num_line_removed, 'lines')
server.sadd(provider_timestamp, UniqID) server.sadd(provider_timestamp, UniqID)
server.set(UniqID, avg_score) server.set(UniqID, avg_score)
else: else:
print 'Dropped:', p_MimeType print('Dropped:', p_MimeType)
def isJSON(content): def isJSON(content):
@ -121,7 +121,7 @@ def isJSON(content):
json.loads(content) json.loads(content)
return True return True
except Exception,e: except Exception:
return False return False
import signal import signal
@ -170,4 +170,3 @@ if __name__ == '__main__':
continue continue
else: else:
signal.alarm(0) signal.alarm(0)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
This Module is used for term frequency. This Module is used for term frequency.
@ -126,6 +126,6 @@ if __name__ == "__main__":
else: else:
publisher.debug("Script RegexForTermsFrequency is Idling") publisher.debug("Script RegexForTermsFrequency is Idling")
print "sleeping" print("sleeping")
time.sleep(5) time.sleep(5)
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The ZMQ_Feed_Q Module The ZMQ_Feed_Q Module
@ -21,7 +21,7 @@ Requirements
""" """
import redis import redis
import ConfigParser import configparser
import os import os
configfile = os.path.join(os.environ['AIL_BIN'], './packages/config.cfg') configfile = os.path.join(os.environ['AIL_BIN'], './packages/config.cfg')
@ -31,7 +31,7 @@ def main():
"""Main Function""" """Main Function"""
# CONFIG # # CONFIG #
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
# REDIS # REDIS

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import time import time
from packages import Paste from packages import Paste
@ -29,7 +29,7 @@ if __name__ == "__main__":
languages = [shell, c, php, bash, python, javascript, bash, ruby, adr] languages = [shell, c, php, bash, python, javascript, bash, ruby, adr]
regex = '|'.join(languages) regex = '|'.join(languages)
print regex print(regex)
while True: while True:
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The Tokenize Module The Tokenize Module
@ -50,7 +50,7 @@ if __name__ == "__main__":
while True: while True:
message = p.get_from_set() message = p.get_from_set()
print message print(message)
if message is not None: if message is not None:
paste = Paste.Paste(message) paste = Paste.Paste(message)
signal.alarm(5) signal.alarm(5)
@ -67,4 +67,4 @@ if __name__ == "__main__":
else: else:
publisher.debug("Tokeniser is idling 10s") publisher.debug("Tokeniser is idling 10s")
time.sleep(10) time.sleep(10)
print "sleepin" print("Sleeping")

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import ConfigParser import configparser
from ConfigParser import ConfigParser as cfgP from configparser import ConfigParser as cfgP
import os import os
from collections import OrderedDict from collections import OrderedDict
import sys import sys
@ -20,9 +20,9 @@ def main():
Or activate the virtualenv.') Or activate the virtualenv.')
configfileSample = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample') configfileSample = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
cfgSample = ConfigParser.ConfigParser() cfgSample = configparser.ConfigParser()
cfgSample.read(configfileSample) cfgSample.read(configfileSample)
sections = cfgP.sections(cfg) sections = cfgP.sections(cfg)
@ -109,4 +109,3 @@ if __name__ == "__main__":
sys.exit() sys.exit()
else: else:
sys.exit(1) sys.exit(1)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -100,7 +100,7 @@ if __name__ == "__main__":
publisher.debug('{} Published'.format(url)) publisher.debug('{} Published'.format(url))
if f1 == "onion": if f1 == "onion":
print domain print(domain)
hostl = unicode(avoidNone(subdomain)+avoidNone(domain)) hostl = unicode(avoidNone(subdomain)+avoidNone(domain))
try: try:
@ -121,16 +121,16 @@ if __name__ == "__main__":
# EU is not an official ISO 3166 code (but used by RIPE # EU is not an official ISO 3166 code (but used by RIPE
# IP allocation) # IP allocation)
if cc is not None and cc != "EU": if cc is not None and cc != "EU":
print hostl, asn, cc, \ print(hostl, asn, cc, \
pycountry.countries.get(alpha_2=cc).name pycountry.countries.get(alpha_2=cc).name)
if cc == cc_critical: if cc == cc_critical:
to_print = 'Url;{};{};{};Detected {} {}'.format( to_print = 'Url;{};{};{};Detected {} {}'.format(
PST.p_source, PST.p_date, PST.p_name, PST.p_source, PST.p_date, PST.p_name,
hostl, cc) hostl, cc)
#publisher.warning(to_print) #publisher.warning(to_print)
print to_print print(to_print)
else: else:
print hostl, asn, cc print(hostl, asn, cc)
A_values = lib_refine.checking_A_record(r_serv2, A_values = lib_refine.checking_A_record(r_serv2,
domains_list) domains_list)
@ -146,7 +146,7 @@ if __name__ == "__main__":
else: else:
publisher.debug("Script url is Idling 10s") publisher.debug("Script url is Idling 10s")
print 'Sleeping' print('Sleeping')
time.sleep(10) time.sleep(10)
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -33,7 +33,7 @@ def analyse(server, field_name, date, url_parsed):
if field_name == "domain": #save domain in a set for the monthly plot if field_name == "domain": #save domain in a set for the monthly plot
domain_set_name = "domain_set_" + date[0:6] domain_set_name = "domain_set_" + date[0:6]
server.sadd(domain_set_name, field) server.sadd(domain_set_name, field)
print "added in " + domain_set_name +": "+ field print("added in " + domain_set_name +": "+ field)
def get_date_range(num_day): def get_date_range(num_day):
curr_date = datetime.date.today() curr_date = datetime.date.today()
@ -145,24 +145,25 @@ if __name__ == '__main__':
year = today.year year = today.year
month = today.month month = today.month
print 'Building protocol graph' print('Building protocol graph')
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto, lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto,
protocolsfile_path, year, protocolsfile_path, year,
month) month)
print 'Building tld graph' print('Building tld graph')
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_tld, lib_words.create_curve_with_word_file(r_serv_trend, csv_path_tld,
tldsfile_path, year, tldsfile_path, year,
month) month)
print 'Building domain graph' print('Building domain graph')
lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain, lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain,
"domain", year, "domain", year,
month) month)
print 'end building' print('end building')
publisher.debug("{} queue is empty, waiting".format(config_section)) publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping' print('sleeping')
time.sleep(5*60) time.sleep(5*60)
continue continue
@ -173,9 +174,13 @@ if __name__ == '__main__':
faup.decode(url) faup.decode(url)
url_parsed = faup.get() url_parsed = faup.get()
analyse(r_serv_trend, 'scheme', date, url_parsed) #Scheme analysis # Scheme analysis
analyse(r_serv_trend, 'tld', date, url_parsed) #Tld analysis analyse(r_serv_trend, 'scheme', date, url_parsed)
analyse(r_serv_trend, 'domain', date, url_parsed) #Domain analysis # Tld analysis
analyse(r_serv_trend, 'tld', date, url_parsed)
# Domain analysis
analyse(r_serv_trend, 'domain', date, url_parsed)
compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed)
compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed)
compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed)

View file

@ -62,8 +62,9 @@ if __name__ == "__main__":
while True: while True:
message = p.get_from_set() message = p.get_from_set()
if message is not None: if message is not None:
message = message.decode('utf8') #decode because of pyhton3 #decode because of pyhton3
module_name, p_path = message.split(';') module_name, p_path = message.split(';')
print("new alert : {}".format(module_name))
#PST = Paste.Paste(p_path) #PST = Paste.Paste(p_path)
else: else:
publisher.debug("Script Attribute is idling 10s") publisher.debug("Script Attribute is idling 10s")

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -27,10 +27,9 @@ if __name__ == "__main__":
config_section = ['Curve'] config_section = ['Curve']
for queue in config_section: for queue in config_section:
print 'dropping: ' + queue print('dropping: ' + queue)
p = Process(queue) p = Process(queue)
while True: while True:
message = p.get_from_set() message = p.get_from_set()
if message is None: if message is None:
break break

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# This file is part of AIL framework - Analysis Information Leak framework # This file is part of AIL framework - Analysis Information Leak framework
@ -25,7 +25,7 @@ import time
import redis import redis
import base64 import base64
import os import os
import ConfigParser import configparser
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile): if not os.path.exists(configfile):
@ -33,7 +33,7 @@ if not os.path.exists(configfile):
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
if cfg.has_option("ZMQ_Global", "bind"): if cfg.has_option("ZMQ_Global", "bind"):
@ -59,6 +59,7 @@ while True:
time.sleep(base_sleeptime + sleep_inc) time.sleep(base_sleeptime + sleep_inc)
topic = 101 topic = 101
paste = r.lpop("pastes") paste = r.lpop("pastes")
print(paste)
if paste is None: if paste is None:
continue continue
socket.send("%d %s" % (topic, paste)) socket.send("%d %s" % (topic, paste))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# This file is part of AIL framework - Analysis Information Leak framework # This file is part of AIL framework - Analysis Information Leak framework
@ -19,14 +19,15 @@ socket.connect ("tcp://crf.circl.lu:%s" % port)
# 101 Name of the pastes only # 101 Name of the pastes only
# 102 Full pastes in raw base64(gz) # 102 Full pastes in raw base64(gz)
topicfilter = "102" topicfilter = b"102"
socket.setsockopt(zmq.SUBSCRIBE, topicfilter) socket.setsockopt(zmq.SUBSCRIBE, topicfilter)
print('b0')
while True: while True:
message = socket.recv() message = socket.recv()
print('b1')
print (message) print (message)
if topicfilter == "102": if topicfilter == b"102":
topic, paste, messagedata = message.split() topic, paste, messagedata = message.split()
print paste, messagedata print(paste, messagedata)
else: else:
print (message) print (message)

View file

@ -1,9 +1,9 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import zmq import zmq
import base64 import base64
import StringIO from io import StringIO
import gzip import gzip
import argparse import argparse
import os import os
@ -31,8 +31,7 @@ import mimetypes
' '
''' '''
import StringIO
import gzip
def is_hierachy_valid(path): def is_hierachy_valid(path):
var = path.split('/') var = path.split('/')
try: try:
@ -72,7 +71,12 @@ if __name__ == "__main__":
wanted_path = wanted_path.split('/') wanted_path = wanted_path.split('/')
wanted_path = '/'.join(wanted_path[-(4+args.hierarchy):]) wanted_path = '/'.join(wanted_path[-(4+args.hierarchy):])
messagedata = open(complete_path).read() with gzip.open(complete_path, 'rb') as f:
messagedata = f.read()
#print(type(complete_path))
#file = open(complete_path)
#messagedata = file.read()
#if paste do not have a 'date hierarchy' ignore it #if paste do not have a 'date hierarchy' ignore it
if not is_hierachy_valid(complete_path): if not is_hierachy_valid(complete_path):
@ -90,5 +94,8 @@ if __name__ == "__main__":
print(args.name+'>'+wanted_path) print(args.name+'>'+wanted_path)
path_to_send = args.name + '>' + wanted_path path_to_send = args.name + '>' + wanted_path
socket.send('{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata))) #s = b'{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata))
# use bytes object
s = b' '.join( [ args.channel.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] )
socket.send(s)
time.sleep(args.seconds) time.sleep(args.seconds)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# This file is part of AIL framework - Analysis Information Leak framework # This file is part of AIL framework - Analysis Information Leak framework
@ -10,7 +10,7 @@
# #
# Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be # Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be
import ConfigParser import configparser
import argparse import argparse
import gzip import gzip
import os import os
@ -23,7 +23,7 @@ def readdoc(path=None):
return f.read() return f.read()
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
# Indexer configuration - index dir and schema setup # Indexer configuration - index dir and schema setup
@ -51,7 +51,7 @@ ix = index.open_dir(indexpath)
from whoosh.qparser import QueryParser from whoosh.qparser import QueryParser
if args.n: if args.n:
print ix.doc_count_all() print(ix.doc_count_all())
exit(0) exit(0)
if args.l: if args.l:

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import ConfigParser import configparser
import os import os
import subprocess import subprocess
import time import time
@ -23,21 +23,21 @@ if __name__ == '__main__':
raise Exception('Unable to find the configuration file. \ raise Exception('Unable to find the configuration file. \
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
config = ConfigParser.ConfigParser() config = configparser.ConfigParser()
config.read(configfile) config.read(configfile)
modules = config.sections() modules = config.sections()
pids = {} pids = {}
for module in modules: for module in modules:
pin = subprocess.Popen(["python", './QueueIn.py', '-c', module]) pin = subprocess.Popen(["python3", './QueueIn.py', '-c', module])
pout = subprocess.Popen(["python", './QueueOut.py', '-c', module]) pout = subprocess.Popen(["python3", './QueueOut.py', '-c', module])
pids[module] = (pin, pout) pids[module] = (pin, pout)
is_running = True is_running = True
try: try:
while is_running: while is_running:
time.sleep(5) time.sleep(5)
is_running = False is_running = False
for module, p in pids.iteritems(): for module, p in pids.items():
pin, pout = p pin, pout = p
if pin is None: if pin is None:
# already dead # already dead
@ -57,7 +57,7 @@ if __name__ == '__main__':
is_running = True is_running = True
pids[module] = (pin, pout) pids[module] = (pin, pout)
except KeyboardInterrupt: except KeyboardInterrupt:
for module, p in pids.iteritems(): for module, p in pids.items():
pin, pout = p pin, pout = p
if pin is not None: if pin is not None:
pin.kill() pin.kill()

View file

@ -1,4 +1,4 @@
#!/usr/bin/python2.7 #!/usr/bin/python3.5
""" """
The ``Paste Class`` The ``Paste Class``
@ -24,15 +24,8 @@ import operator
import string import string
import re import re
import json import json
try: # dirty to support python3 import configparser
import ConfigParser from io import StringIO
except:
import configparser
ConfigParser = configparser
try: # dirty to support python3
import cStringIO
except:
from io import StringIO as cStringIO
import sys import sys
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
from Date import Date from Date import Date
@ -71,7 +64,7 @@ class Paste(object):
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
self.cache = redis.StrictRedis( self.cache = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"), host=cfg.get("Redis_Queues", "host"),
@ -85,11 +78,15 @@ class Paste(object):
self.p_path = p_path self.p_path = p_path
self.p_name = os.path.basename(self.p_path) self.p_name = os.path.basename(self.p_path)
self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2) self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2)
self.p_mime = magic.from_buffer("test", mime=True)
self.p_mime = magic.from_buffer(self.get_p_content(), mime=True) self.p_mime = magic.from_buffer(self.get_p_content(), mime=True)
# Assuming that the paste will alway be in a day folder which is itself # Assuming that the paste will alway be in a day folder which is itself
# in a month folder which is itself in a year folder. # in a month folder which is itself in a year folder.
# /year/month/day/paste.gz # /year/month/day/paste.gz
# TODO use bytes ?
var = self.p_path.split('/') var = self.p_path.split('/')
self.p_date = Date(var[-4], var[-3], var[-2]) self.p_date = Date(var[-4], var[-3], var[-2])
self.p_source = var[-5] self.p_source = var[-5]
@ -117,17 +114,25 @@ class Paste(object):
paste = self.cache.get(self.p_path) paste = self.cache.get(self.p_path)
if paste is None: if paste is None:
try: try:
#print('----------------------------------------------------------------')
#print(self.p_name)
#print('----------------------------------------------------------------')
with gzip.open(self.p_path, 'rb') as f: with gzip.open(self.p_path, 'rb') as f:
paste = f.read() paste = f.read()
self.cache.set(self.p_path, paste) self.cache.set(self.p_path, paste)
self.cache.expire(self.p_path, 300) self.cache.expire(self.p_path, 300)
except: except:
return '' paste = b''
pass
return paste return paste.decode('utf8')
def get_p_content_as_file(self): def get_p_content_as_file(self):
return cStringIO.StringIO(self.get_p_content()) try:
message = StringIO( (self.get_p_content()).decode('utf8') )
except AttributeError:
message = StringIO( (self.get_p_content()) )
return message
def get_p_content_with_removed_lines(self, threshold): def get_p_content_with_removed_lines(self, threshold):
num_line_removed = 0 num_line_removed = 0
@ -137,6 +142,7 @@ class Paste(object):
line_id = 0 line_id = 0
for line_id, line in enumerate(f): for line_id, line in enumerate(f):
length = len(line) length = len(line)
if length < line_length_threshold: if length < line_length_threshold:
string_content += line string_content += line
else: else:
@ -202,8 +208,8 @@ class Paste(object):
.. seealso:: _set_p_hash_kind("md5") .. seealso:: _set_p_hash_kind("md5")
""" """
for hash_name, the_hash in self.p_hash_kind.iteritems(): for hash_name, the_hash in self.p_hash_kind.items():
self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content()) self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content().encode('utf8'))
return self.p_hash return self.p_hash
def _get_p_language(self): def _get_p_language(self):
@ -342,7 +348,7 @@ class Paste(object):
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
gaps=True, discard_empty=True) gaps=True, discard_empty=True)
blob = TextBlob(clean(self.get_p_content()), tokenizer=tokenizer) blob = TextBlob(clean( (self.get_p_content()) ), tokenizer=tokenizer)
for word in blob.tokens: for word in blob.tokens:
if word in words.keys(): if word in words.keys():
@ -351,7 +357,7 @@ class Paste(object):
num = 0 num = 0
words[word] = num + 1 words[word] = num + 1
if sort: if sort:
var = sorted(words.iteritems(), key=operator.itemgetter(1), reverse=True) var = sorted(words.items(), key=operator.itemgetter(1), reverse=True)
else: else:
var = words var = words

View file

@ -76,7 +76,7 @@ def checking_MX_record(r_serv, adress_set):
r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
except Exception as e: except Exception as e:
print e print(e)
publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
return (num, WalidMX) return (num, WalidMX)
@ -125,7 +125,7 @@ def checking_A_record(r_serv, domains_set):
publisher.debug('The Label is too long') publisher.debug('The Label is too long')
except Exception as e: except Exception as e:
print e print(e)
publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score)) publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score))
return (num, WalidA) return (num, WalidA)

View file

@ -82,16 +82,16 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
""" """
threshold = 50 threshold = 50
first_day = date(year, month, 01) first_day = date(year, month, 1)
last_day = date(year, month, calendar.monthrange(year, month)[1]) last_day = date(year, month, calendar.monthrange(year, month)[1])
words = [] words = []
with open(feederfilename, 'rb') as f: with open(feederfilename, 'r') as f:
# words of the files # words of the files
words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' and word.strip()!='' ]) words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' and word.strip()!='' ])
headers = ['Date'] + words headers = ['Date'] + words
with open(csvfilename+'.csv', 'wb') as f: with open(csvfilename+'.csv', 'w') as f:
writer = csv.writer(f) writer = csv.writer(f)
writer.writerow(headers) writer.writerow(headers)
@ -103,11 +103,14 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
# from the 1srt day to the last of the list # from the 1srt day to the last of the list
for word in words: for word in words:
value = r_serv.hget(word, curdate) value = r_serv.hget(word, curdate)
if value is None: if value is None:
row.append(0) row.append(0)
else: else:
# if the word have a value for the day # if the word have a value for the day
# FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold # FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold
value = r_serv.hget(word, curdate)
value = int(value.decode('utf8'))
if value >= threshold: if value >= threshold:
row.append(value) row.append(value)
writer.writerow(row) writer.writerow(row)
@ -127,14 +130,14 @@ def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month):
""" """
first_day = date(year, month, 01) first_day = date(year, month, 1)
last_day = date(year, month, calendar.monthrange(year, month)[1]) last_day = date(year, month, calendar.monthrange(year, month)[1])
redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2) redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2)
words = list(server.smembers(redis_set_name)) words = list(server.smembers(redis_set_name))
headers = ['Date'] + words headers = ['Date'] + words
with open(csvfilename+'.csv', 'wb') as f: with open(csvfilename+'.csv', 'w') as f:
writer = csv.writer(f) writer = csv.writer(f)
writer.writerow(headers) writer.writerow(headers)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -48,7 +48,7 @@ if __name__ == '__main__':
message = p.get_from_set() message = p.get_from_set()
if message is None: if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section)) publisher.debug("{} queue is empty, waiting".format(config_section))
print "queue empty" print("queue empty")
time.sleep(1) time.sleep(1)
continue continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
Template for new modules Template for new modules

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3.5
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import socks import socks
@ -63,5 +63,5 @@ if __name__ == "__main__":
t, path = tempfile.mkstemp() t, path = tempfile.mkstemp()
with open(path, 'w') as f: with open(path, 'w') as f:
f.write(to_write) f.write(to_write)
print path print(path)
exit(0) exit(0)