Big cleanup, pep8

This commit is contained in:
Raphaël Vinot 2014-08-14 17:55:18 +02:00
parent 715adb4546
commit 078c8ea836
25 changed files with 394 additions and 408 deletions

View file

@ -1,14 +1,13 @@
#!/usr/bin/python2.7 #!/usr/bin/python2.7
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis, argparse, zmq, ConfigParser, time, os import redis
import argparse
import ConfigParser
from pubsublogger import publisher from pubsublogger import publisher
from packages import ZMQ_PubSub
import texttable
import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from pylab import *
def main(): def main():
"""Main Function""" """Main Function"""
@ -19,14 +18,10 @@ def main():
# SCRIPT PARSER # # SCRIPT PARSER #
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description = '''This script is a part of the Analysis Information description='''This script is a part of the Analysis Information Leak framework.''',
Leak framework.''',
epilog='''''') epilog='''''')
parser.add_argument('-f', parser.add_argument('-f', type=str, metavar="filename", default="figure",
type = str,
metavar = "filename",
default = "figure",
help='The absolute path name of the "figure.png"', help='The absolute path name of the "figure.png"',
action='store') action='store')
@ -74,7 +69,6 @@ def main():
total_list.sort(reverse=True) total_list.sort(reverse=True)
plt.plot(codepad_list, 'b', label='Codepad.org') plt.plot(codepad_list, 'b', label='Codepad.org')
plt.plot(pastebin_list, 'g', label='Pastebin.org') plt.plot(pastebin_list, 'g', label='Pastebin.org')
plt.plot(pastie_list, 'y', label='Pastie.org') plt.plot(pastie_list, 'y', label='Pastie.org')

View file

@ -17,12 +17,14 @@ Requirements
*Should register to the Publisher "ZMQ_PubSub_Tokenize" *Should register to the Publisher "ZMQ_PubSub_Tokenize"
""" """
import redis, zmq, ConfigParser import redis
import ConfigParser
from pubsublogger import publisher from pubsublogger import publisher
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -44,12 +46,12 @@ def main():
subscriber_name = "categ" subscriber_name = "categ"
subscriber_config_section = "PubSub_Words" subscriber_config_section = "PubSub_Words"
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Suscribed to channel {0}""".format(channel)) publisher.info("""Suscribed to channel {0}""".format(channel))
while True: while True:
Sub.get_and_lpush(r_serv) sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Categ_Q"): if r_serv.sismember("SHUTDOWN_FLAGS", "Categ_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Categ_Q") r_serv.srem("SHUTDOWN_FLAGS", "Categ_Q")

View file

@ -26,13 +26,17 @@ Requirements
*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly. *Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.
""" """
import redis, argparse, zmq, ConfigParser, time import redis
from packages import Paste as P import argparse
import ConfigParser
import time
from packages import Paste
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -42,13 +46,10 @@ def main():
# SCRIPT PARSER # # SCRIPT PARSER #
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description = '''This script is a part of the Analysis Information description='''This script is a part of the Analysis Information Leak framework.''',
Leak framework.''',
epilog='''''') epilog='''''')
parser.add_argument('-max', parser.add_argument('-max', type=int, default=500,
type = int,
default = 500,
help='The limit between "short lines" and "long lines" (500)', help='The limit between "short lines" and "long lines" (500)',
action='store') action='store')
@ -65,8 +66,6 @@ def main():
port=cfg.getint("Redis_Queues", "port"), port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db")) db=cfg.getint("Redis_Queues", "db"))
p_serv = r_serv.pipeline(False)
# LOGGING # # LOGGING #
publisher.channel = "Script" publisher.channel = "Script"
@ -80,24 +79,25 @@ def main():
publisher_config_section = "PubSub_Longlines" publisher_config_section = "PubSub_Longlines"
publisher_name = "publine" publisher_name = "publine"
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
channel_0 = cfg.get("PubSub_Longlines", "channel_0") channel_0 = cfg.get("PubSub_Longlines", "channel_0")
channel_1 = cfg.get("PubSub_Longlines", "channel_1") channel_1 = cfg.get("PubSub_Longlines", "channel_1")
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Lines script Subscribed to channel {0} and Start to publish tmp_string = "Lines script Subscribed to channel {} and Start to publish on channel {}, {}"
on channel {1}, {2}""".format(cfg.get("PubSub_Global", "channel"), publisher.info(tmp_string.format(
cfg.get("PubSub_Global", "channel"),
cfg.get("PubSub_Longlines", "channel_0"), cfg.get("PubSub_Longlines", "channel_0"),
cfg.get("PubSub_Longlines", "channel_1"))) cfg.get("PubSub_Longlines", "channel_1")))
while True: while True:
try: try:
message = Sub.get_msg_from_queue(r_serv1) message = sub.get_msg_from_queue(r_serv1)
if message != None: if message is not None:
PST = P.Paste(message.split(" ",-1)[-1]) PST = Paste.Paste(message.split(" ", -1)[-1])
else: else:
if r_serv1.sismember("SHUTDOWN_FLAGS", "Lines"): if r_serv1.sismember("SHUTDOWN_FLAGS", "Lines"):
r_serv1.srem("SHUTDOWN_FLAGS", "Lines") r_serv1.srem("SHUTDOWN_FLAGS", "Lines")
@ -119,7 +119,7 @@ def main():
else: else:
msg = channel_1+" "+PST.p_path msg = channel_1+" "+PST.p_path
Pub.send_message(msg) pub.send_message(msg)
except IOError: except IOError:
print "CRC Checksum Error on : ", PST.p_path print "CRC Checksum Error on : ", PST.p_path
pass pass

View file

@ -18,12 +18,14 @@ Requirements
""" """
import redis, zmq, ConfigParser import redis
import ConfigParser
from pubsublogger import publisher from pubsublogger import publisher
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -44,13 +46,13 @@ def main():
channel = cfg.get("PubSub_Global", "channel") channel = cfg.get("PubSub_Global", "channel")
subscriber_name = "line" subscriber_name = "line"
Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Global", channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name)
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Suscribed to channel {0}""".format(channel)) publisher.info("""Suscribed to channel {0}""".format(channel))
while True: while True:
Sub.get_and_lpush(r_serv) sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Lines_Q"): if r_serv.sismember("SHUTDOWN_FLAGS", "Lines_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Lines_Q") r_serv.srem("SHUTDOWN_FLAGS", "Lines_Q")

View file

@ -21,13 +21,16 @@ Requirements
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. *Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
""" """
import redis, zmq, ConfigParser, time import redis
from packages import Paste as P import ConfigParser
import time
from packages import Paste
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -53,8 +56,8 @@ def main():
publisher_config_section = "PubSub_Words" publisher_config_section = "PubSub_Words"
publisher_name = "pubtokenize" publisher_name = "pubtokenize"
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
channel_0 = cfg.get("PubSub_Words", "channel_0") channel_0 = cfg.get("PubSub_Words", "channel_0")
@ -62,10 +65,10 @@ def main():
publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1"))) publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1")))
while True: while True:
message = Sub.get_msg_from_queue(r_serv) message = sub.get_msg_from_queue(r_serv)
print message print message
if message != None: if message is not None:
PST = P.Paste(message.split(" ",-1)[-1]) PST = Paste.Paste(message.split(" ", -1)[-1])
else: else:
if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"): if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"):
r_serv.srem("SHUTDOWN_FLAGS", "Tokenize") r_serv.srem("SHUTDOWN_FLAGS", "Tokenize")
@ -80,7 +83,7 @@ def main():
for word, score in PST._get_top_words().items(): for word, score in PST._get_top_words().items():
if len(word) >= 4: if len(word) >= 4:
msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score) msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score)
Pub.send_message(msg) pub.send_message(msg)
print msg print msg
else: else:
pass pass

View file

@ -17,12 +17,14 @@ Requirements
*Should register to the Publisher "ZMQ_PubSub_Line" channel 1 *Should register to the Publisher "ZMQ_PubSub_Line" channel 1
""" """
import redis, zmq, ConfigParser import redis
import ConfigParser
from pubsublogger import publisher from pubsublogger import publisher
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -44,13 +46,13 @@ def main():
subscriber_name = "tokenize" subscriber_name = "tokenize"
subscriber_config_section = "PubSub_Longlines" subscriber_config_section = "PubSub_Longlines"
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Suscribed to channel {0}""".format(channel)) publisher.info("""Suscribed to channel {0}""".format(channel))
while True: while True:
Sub.get_and_lpush(r_serv) sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize_Q"): if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Tokenize_Q") r_serv.srem("SHUTDOWN_FLAGS", "Tokenize_Q")

View file

@ -26,13 +26,16 @@ Requirements
*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly. *Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.
""" """
import redis, zmq, ConfigParser, time import redis
from packages import Paste as P import ConfigParser
import time
from packages import Paste
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -51,8 +54,6 @@ def main():
port=cfg.getint("Redis_Queues", "port"), port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db")) db=cfg.getint("Redis_Queues", "db"))
p_serv = r_serv.pipeline(False)
# LOGGING # # LOGGING #
publisher.channel = "Script" publisher.channel = "Script"
@ -62,17 +63,17 @@ def main():
subscriber_name = "attributes" subscriber_name = "attributes"
subscriber_config_section = "PubSub_Global" subscriber_config_section = "PubSub_Global"
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
# FUNCTIONS # # FUNCTIONS #
publisher.info("""ZMQ Attribute is Running""") publisher.info("""ZMQ Attribute is Running""")
while True: while True:
try: try:
message = Sub.get_msg_from_queue(r_serv1) message = sub.get_msg_from_queue(r_serv1)
if message != None: if message is not None:
PST = P.Paste(message.split(" ",-1)[-1]) PST = Paste.Paste(message.split(" ", -1)[-1])
else: else:
if r_serv1.sismember("SHUTDOWN_FLAGS", "Attributes"): if r_serv1.sismember("SHUTDOWN_FLAGS", "Attributes"):
r_serv1.srem("SHUTDOWN_FLAGS", "Attributes") r_serv1.srem("SHUTDOWN_FLAGS", "Attributes")

View file

@ -18,12 +18,14 @@ Requirements
""" """
import redis, zmq, ConfigParser import redis
import ConfigParser
from pubsublogger import publisher from pubsublogger import publisher
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -44,13 +46,13 @@ def main():
channel = cfg.get("PubSub_Global", "channel") channel = cfg.get("PubSub_Global", "channel")
subscriber_name = "attributes" subscriber_name = "attributes"
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name)
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Suscribed to channel {0}""".format(channel)) publisher.info("""Suscribed to channel {0}""".format(channel))
while True: while True:
Sub.get_and_lpush(r_serv) sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Attributes_Q"): if r_serv.sismember("SHUTDOWN_FLAGS", "Attributes_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Attributes_Q") r_serv.srem("SHUTDOWN_FLAGS", "Attributes_Q")

View file

@ -1,7 +1,10 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis, zmq, ConfigParser, json, pprint, time import redis
from packages import Paste as P import ConfigParser
import pprint
import time
from packages import Paste
from packages import lib_refine from packages import lib_refine
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
@ -9,6 +12,7 @@ from pubsublogger import publisher
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -27,51 +31,49 @@ def main():
port=cfg.getint("Redis_Data_Merging", "port"), port=cfg.getint("Redis_Data_Merging", "port"),
db=cfg.getint("Redis_Data_Merging", "db")) db=cfg.getint("Redis_Data_Merging", "db"))
p_serv = r_serv.pipeline(False)
# LOGGING # # LOGGING #
publisher.channel = "Script" publisher.channel = "Script"
# ZMQ # # ZMQ #
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards") sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards")
# FUNCTIONS # # FUNCTIONS #
publisher.info("Creditcard script subscribed to channel creditcard_categ") publisher.info("Creditcard script subscribed to channel creditcard_categ")
message = Sub.get_msg_from_queue(r_serv) message = sub.get_msg_from_queue(r_serv)
prec_filename = None prec_filename = None
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?" creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}" # mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}" # visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|[3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}" # discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|[3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}"
jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}" # jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}"
amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}" # amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}"
chinaUP_regex = "62[0-5]\d{13,16}" # chinaUP_regex = "62[0-5]\d{13,16}"
maestro_regex = "(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}" # maestro_regex = "(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}"
while True: while True:
if message != None: if message is not None:
channel, filename, word, score = message.split() channel, filename, word, score = message.split()
if prec_filename == None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
Creditcard_set = set([]) creditcard_set = set([])
PST = P.Paste(filename) PST = Paste.Paste(filename)
for x in PST.get_regex(creditcard_regex): for x in PST.get_regex(creditcard_regex):
if lib_refine.is_luhn_valid(x): if lib_refine.is_luhn_valid(x):
Creditcard_set.add(x) creditcard_set.add(x)
PST.__setattr__(channel, creditcard_set)
PST.save_attribute_redis(r_serv1, channel, creditcard_set)
PST.__setattr__(channel, Creditcard_set) pprint.pprint(creditcard_set)
PST.save_attribute_redis(r_serv1, channel, Creditcard_set) to_print = 'CreditCard;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name)
if (len(creditcard_set) > 0):
pprint.pprint(Creditcard_set) publisher.critical('{}Checked {} valid number(s)'.format(to_print, len(creditcard_set)))
if (len(Creditcard_set) > 0):
publisher.critical('{0};{1};{2};{3};{4}'.format("CreditCard", PST.p_source, PST.p_date, PST.p_name,"Checked " + str(len(Creditcard_set))+" valid number(s)" ))
else: else:
publisher.info('{0};{1};{2};{3};{4}'.format("CreditCard", PST.p_source, PST.p_date, PST.p_name, "CreditCard related" )) publisher.info('{}CreditCard related'.format(to_print))
prec_filename = filename prec_filename = filename
@ -84,7 +86,7 @@ def main():
publisher.debug("Script creditcard is idling 1m") publisher.debug("Script creditcard is idling 1m")
time.sleep(60) time.sleep(60)
message = Sub.get_msg_from_queue(r_serv) message = sub.get_msg_from_queue(r_serv)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,12 +1,14 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis, zmq, ConfigParser import redis
import ConfigParser
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -24,13 +26,13 @@ def main():
publisher.channel = "Queuing" publisher.channel = "Queuing"
# ZMQ # # ZMQ #
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards") sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards")
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Suscribed to channel {0}""".format("creditcard_categ")) publisher.info("""Suscribed to channel {0}""".format("creditcard_categ"))
while True: while True:
Sub.get_and_lpush(r_serv) sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Creditcards_Q"): if r_serv.sismember("SHUTDOWN_FLAGS", "Creditcards_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Creditcards_Q") r_serv.srem("SHUTDOWN_FLAGS", "Creditcards_Q")

View file

@ -21,7 +21,9 @@ Requirements
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. *Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
""" """
import redis, argparse, zmq, ConfigParser, time import redis
import ConfigParser
import time
from packages import Paste as P from packages import Paste as P
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
@ -29,6 +31,7 @@ from packages import lib_words
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -36,20 +39,6 @@ def main():
cfg = ConfigParser.ConfigParser() cfg = ConfigParser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
# SCRIPT PARSER #
parser = argparse.ArgumentParser(
description = '''This script is a part of the Analysis Information
Leak framework.''',
epilog = '''''')
parser.add_argument('-l',
type = str,
default = "../files/list_categ_files",
help = 'Path to the list_categ_files (../files/list_categ_files)',
action = 'store')
args = parser.parse_args()
# REDIS # # REDIS #
r_serv = redis.StrictRedis( r_serv = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"), host=cfg.get("Redis_Queues", "host"),
@ -69,7 +58,7 @@ def main():
subscriber_name = "curve" subscriber_name = "curve"
subscriber_config_section = "PubSub_Words" subscriber_config_section = "PubSub_Words"
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script Curve subscribed to channel {0}".format(cfg.get("PubSub_Words", "channel_0"))) publisher.info("Script Curve subscribed to channel {0}".format(cfg.get("PubSub_Words", "channel_0")))
@ -78,20 +67,19 @@ def main():
csv_path = cfg.get("Directories", "wordtrending_csv") csv_path = cfg.get("Directories", "wordtrending_csv")
wordfile_path = cfg.get("Directories", "wordsfile") wordfile_path = cfg.get("Directories", "wordsfile")
paste_words = [] message = sub.get_msg_from_queue(r_serv)
message = Sub.get_msg_from_queue(r_serv)
prec_filename = None prec_filename = None
while True: while True:
if message != None: if message is not None:
channel, filename, word, score = message.split() channel, filename, word, score = message.split()
if prec_filename == None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
PST = P.Paste(filename) PST = P.Paste(filename)
lib_words.create_curve_with_word_file(r_serv1, csv_path, wordfile_path, int(PST.p_date.year), int(PST.p_date.month)) lib_words.create_curve_with_word_file(r_serv1, csv_path, wordfile_path, int(PST.p_date.year), int(PST.p_date.month))
prec_filename = filename prec_filename = filename
prev_score = r_serv1.hget(word.lower(), PST.p_date) prev_score = r_serv1.hget(word.lower(), PST.p_date)
print prev_score print prev_score
if prev_score != None: if prev_score is not None:
r_serv1.hset(word.lower(), PST.p_date, int(prev_score) + int(score)) r_serv1.hset(word.lower(), PST.p_date, int(prev_score) + int(score))
else: else:
r_serv1.hset(word.lower(), PST.p_date, score) r_serv1.hset(word.lower(), PST.p_date, score)
@ -107,7 +95,7 @@ def main():
print "sleepin" print "sleepin"
time.sleep(1) time.sleep(1)
message = Sub.get_msg_from_queue(r_serv) message = sub.get_msg_from_queue(r_serv)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -17,12 +17,14 @@ Requirements
*Should register to the Publisher "ZMQ_PubSub_Tokenize" *Should register to the Publisher "ZMQ_PubSub_Tokenize"
""" """
import redis, zmq, ConfigParser import redis
import ConfigParser
from pubsublogger import publisher from pubsublogger import publisher
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -44,12 +46,12 @@ def main():
subscriber_name = "curve" subscriber_name = "curve"
subscriber_config_section = "PubSub_Words" subscriber_config_section = "PubSub_Words"
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Suscribed to channel {0}""".format(channel)) publisher.info("""Suscribed to channel {0}""".format(channel))
while True: while True:
Sub.get_and_lpush(r_serv) sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Curve_Q"): if r_serv.sismember("SHUTDOWN_FLAGS", "Curve_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Curve_Q") r_serv.srem("SHUTDOWN_FLAGS", "Curve_Q")

View file

@ -12,15 +12,18 @@ Requirements:
""" """
import redis, zmq, ConfigParser, time, datetime, pprint, time, os import redis
from packages import Paste as P import ConfigParser
import os
import time
from packages import Paste
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from datetime import date
from pubsublogger import publisher from pubsublogger import publisher
from pybloomfilter import BloomFilter from pybloomfilter import BloomFilter
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -40,7 +43,6 @@ def main():
port=cfg.getint("Redis_Data_Merging", "port"), port=cfg.getint("Redis_Data_Merging", "port"),
db=cfg.getint("Redis_Data_Merging", "db")) db=cfg.getint("Redis_Data_Merging", "db"))
# REDIS # # REDIS #
# DB OBJECT & HASHS ( DISK ) # DB OBJECT & HASHS ( DISK )
dico_redis = {} dico_redis = {}
@ -59,7 +61,7 @@ def main():
subscriber_name = "duplicate" subscriber_name = "duplicate"
subscriber_config_section = "PubSub_Global" subscriber_config_section = "PubSub_Global"
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Script duplicate subscribed to channel {0}""".format(cfg.get("PubSub_Global", "channel"))) publisher.info("""Script duplicate subscribed to channel {0}""".format(cfg.get("PubSub_Global", "channel")))
@ -75,10 +77,10 @@ def main():
x = time.time() x = time.time()
message = Sub.get_msg_from_queue(r_Q_serv) message = sub.get_msg_from_queue(r_Q_serv)
if message != None: if message is not None:
path = message.split(" ", -1)[-1] path = message.split(" ", -1)[-1]
PST = P.Paste(path) PST = Paste.Paste(path)
else: else:
publisher.debug("Script Attribute is idling 10s") publisher.debug("Script Attribute is idling 10s")
time.sleep(10) time.sleep(10)
@ -164,7 +166,6 @@ def main():
# current = current paste, phash_dico = {hash: set, ...} # current = current paste, phash_dico = {hash: set, ...}
occur_dico = {} occur_dico = {}
for current, phash_dico in super_dico.items(): for current, phash_dico in super_dico.items():
nb_similar_hash = len(phash_dico)
# phash = hash, pset = set([ pastes ...]) # phash = hash, pset = set([ pastes ...])
for phash, pset in hash_dico.items(): for phash, pset in hash_dico.items():
@ -180,17 +181,18 @@ def main():
dupl.append((paste, percentage)) dupl.append((paste, percentage))
# Creating the object attribute and save it. # Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name)
if dupl != []: if dupl != []:
PST.__setattr__("p_duplicate", dupl) PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis(r_serv_merge, "p_duplicate", dupl) PST.save_attribute_redis(r_serv_merge, "p_duplicate", dupl)
publisher.info('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name,"Detected " + str(len(dupl)))) publisher.info('{}Detected {}'.format(to_print, len(dupl)))
y = time.time() y = time.time()
publisher.debug('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "Processed in "+str(y-x)+ " sec" )) publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
except IOError: except IOError:
print "CRC Checksum Failed on :", PST.p_path print "CRC Checksum Failed on :", PST.p_path
publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" )) publisher.error('{}CRC Checksum Failed'.format(to_print))
pass pass
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,12 +1,14 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis, zmq, ConfigParser import redis
import ConfigParser
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -25,13 +27,13 @@ def main():
# ZMQ # # ZMQ #
channel = cfg.get("PubSub_Global", "channel") channel = cfg.get("PubSub_Global", "channel")
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, "duplicate") sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, "duplicate")
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Suscribed to channel {0}""".format(channel)) publisher.info("""Suscribed to channel {0}""".format(channel))
while True: while True:
Sub.get_and_lpush(r_serv) sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Duplicate_Q"): if r_serv.sismember("SHUTDOWN_FLAGS", "Duplicate_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Duplicate_Q") r_serv.srem("SHUTDOWN_FLAGS", "Duplicate_Q")

View file

@ -9,17 +9,20 @@ The ZMQ_Sub_Indexer modules is fetching the list of files to be processed
and index each file with a full-text indexer (Whoosh until now). and index each file with a full-text indexer (Whoosh until now).
""" """
import redis, zmq, ConfigParser, time import redis
from packages import Paste as P import ConfigParser
import time
from packages import Paste
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
from whoosh.index import create_in, exists_in, open_dir from whoosh.index import create_in, exists_in, open_dir
from whoosh.fields import * from whoosh.fields import Schema, TEXT, ID
import os import os
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -38,10 +41,8 @@ def main():
indexertype = cfg.get("Indexer", "type") indexertype = cfg.get("Indexer", "type")
if indexertype == "whoosh": if indexertype == "whoosh":
schema = Schema(title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT) schema = Schema(title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT)
if not os.path.exists(indexpath): if not os.path.exists(indexpath):
os.mkdir(indexpath) os.mkdir(indexpath)
if not exists_in(indexpath): if not exists_in(indexpath):
ix = create_in(indexpath, schema) ix = create_in(indexpath, schema)
else: else:
@ -56,17 +57,17 @@ def main():
subscriber_name = "indexer" subscriber_name = "indexer"
subscriber_config_section = "PubSub_Global" subscriber_config_section = "PubSub_Global"
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
# FUNCTIONS # # FUNCTIONS #
publisher.info("""ZMQ Indexer is Running""") publisher.info("""ZMQ Indexer is Running""")
while True: while True:
try: try:
message = Sub.get_msg_from_queue(r_serv1) message = sub.get_msg_from_queue(r_serv1)
if message != None: if message is not None:
PST = P.Paste(message.split(" ",-1)[-1]) PST = Paste.Paste(message.split(" ", -1)[-1])
else: else:
if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"): if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"):
r_serv1.srem("SHUTDOWN_FLAGS", "Indexer") r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
@ -80,11 +81,14 @@ def main():
print "Indexing :", docpath print "Indexing :", docpath
if indexertype == "whoosh": if indexertype == "whoosh":
indexwriter = ix.writer() indexwriter = ix.writer()
indexwriter.update_document(title=unicode(docpath, errors='ignore'),path=unicode(docpath, errors='ignore'),content=unicode(paste, errors='ignore')) indexwriter.update_document(
title=unicode(docpath, errors='ignore'),
path=unicode(docpath, errors='ignore'),
content=unicode(paste, errors='ignore'))
indexwriter.commit() indexwriter.commit()
except IOError: except IOError:
print "CRC Checksum Failed on :", PST.p_path print "CRC Checksum Failed on :", PST.p_path
publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" )) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(PST.p_source, PST.p_date, PST.p_name))
pass pass

View file

@ -12,12 +12,14 @@ handling the indexing process of the files seen.
""" """
import redis, zmq, ConfigParser import redis
import ConfigParser
from pubsublogger import publisher from pubsublogger import publisher
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -38,7 +40,7 @@ def main():
channel = cfg.get("PubSub_Global", "channel") channel = cfg.get("PubSub_Global", "channel")
subscriber_name = "indexer" subscriber_name = "indexer"
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name)
publisher.info("""Suscribed to channel {0}""".format(channel)) publisher.info("""Suscribed to channel {0}""".format(channel))
@ -46,7 +48,7 @@ def main():
# will get the data from the global ZMQ queue and buffer it in Redis. # will get the data from the global ZMQ queue and buffer it in Redis.
while True: while True:
Sub.get_and_lpush(r_serv) sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Indexer_Q"): if r_serv.sismember("SHUTDOWN_FLAGS", "Indexer_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Indexer_Q") r_serv.srem("SHUTDOWN_FLAGS", "Indexer_Q")

View file

@ -1,43 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
import redis, zmq, ConfigParser
from packages import Paste as P
from packages import ZMQ_PubSub
from pubsublogger import publisher
configfile = './packages/config.cfg'
def main():
"""Main Function"""
# CONFIG #
cfg = ConfigParser.ConfigParser()
cfg.read(configfile)
# REDIS #
r_serv = redis.StrictRedis(
host = cfg.get("Redis_default", "host"),
port = cfg.getint("Redis_default", "port"),
db = args.db)
p_serv = r_serv.pipeline(False)
# LOGGING #
publisher.channel = "Script"
# ZMQ #
channel = cfg.get("PubSub_Longlines", "channel_0")
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Longlines", channel)
# FUNCTIONS #
publisher.info("Longlines ubscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_0")))
while True:
PST = P.Paste(Sub.get_message().split(" ", -1)[-1])
r_serv.sadd("Longlines", PST.p_mime)
PST.save_in_redis(r_serv, PST.p_mime)
if __name__ == "__main__":
main()

View file

@ -1,7 +1,10 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis, zmq, ConfigParser, json, pprint, time import redis
import ConfigParser
import pprint
import time
import dns.exception import dns.exception
from packages import Paste as P from packages import Paste as P
from packages import lib_refine from packages import lib_refine
@ -10,6 +13,7 @@ from pubsublogger import publisher
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -37,22 +41,22 @@ def main():
publisher.channel = "Script" publisher.channel = "Script"
# ZMQ # # ZMQ #
Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "mails_categ", "emails") sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "mails_categ", "emails")
# FUNCTIONS # # FUNCTIONS #
publisher.info("Suscribed to channel mails_categ") publisher.info("Suscribed to channel mails_categ")
message = Sub.get_msg_from_queue(r_serv) message = sub.get_msg_from_queue(r_serv)
prec_filename = None prec_filename = None
email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}" email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
while True: while True:
try: try:
if message != None: if message is not None:
channel, filename, word, score = message.split() channel, filename, word, score = message.split()
if prec_filename == None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
PST = P.Paste(filename) PST = P.Paste(filename)
MX_values = lib_refine.checking_MX_record(r_serv2, PST.get_regex(email_regex)) MX_values = lib_refine.checking_MX_record(r_serv2, PST.get_regex(email_regex))
@ -62,10 +66,11 @@ def main():
PST.save_attribute_redis(r_serv1, channel, (MX_values[0], list(MX_values[1]))) PST.save_attribute_redis(r_serv1, channel, (MX_values[0], list(MX_values[1])))
pprint.pprint(MX_values) pprint.pprint(MX_values)
to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.format(PST.p_source, PST.p_date, PST.p_name, MX_values[0])
if MX_values[0] > 10: if MX_values[0] > 10:
publisher.warning('{0};{1};{2};{3};{4}'.format("Mails", PST.p_source, PST.p_date, PST.p_name,"Checked "+ str(MX_values[0])+ " e-mails" )) publisher.warning(to_print)
else: else:
publisher.info('{0};{1};{2};{3};{4}'.format("Mails", PST.p_source, PST.p_date, PST.p_name,"Checked "+ str(MX_values[0])+ " e-mail(s)" )) publisher.info(to_print)
prec_filename = filename prec_filename = filename
else: else:
@ -77,7 +82,7 @@ def main():
publisher.debug("Script Mails is Idling 10s") publisher.debug("Script Mails is Idling 10s")
time.sleep(10) time.sleep(10)
message = Sub.get_msg_from_queue(r_serv) message = sub.get_msg_from_queue(r_serv)
except dns.exception.Timeout: except dns.exception.Timeout:
print "dns.exception.Timeout" print "dns.exception.Timeout"
pass pass

View file

@ -1,12 +1,14 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis, zmq, ConfigParser import redis
import ConfigParser
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -24,13 +26,13 @@ def main():
publisher.channel = "Queuing" publisher.channel = "Queuing"
# ZMQ # # ZMQ #
Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "mails_categ", "emails") sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "mails_categ", "emails")
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Suscribed to channel {0}""".format("mails_categ")) publisher.info("""Suscribed to channel {0}""".format("mails_categ"))
while True: while True:
Sub.get_and_lpush(r_serv) sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Mails_Q"): if r_serv.sismember("SHUTDOWN_FLAGS", "Mails_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Mails_Q") r_serv.srem("SHUTDOWN_FLAGS", "Mails_Q")

View file

@ -21,13 +21,17 @@ Requirements
*Need the ZMQ_Sub_Onion_Q Module running to be able to work properly. *Need the ZMQ_Sub_Onion_Q Module running to be able to work properly.
""" """
import redis, zmq, ConfigParser, json, pprint, time import redis
from packages import Paste as P import ConfigParser
import pprint
import time
from packages import Paste
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -46,12 +50,6 @@ def main():
port=cfg.getint("Redis_Data_Merging", "port"), port=cfg.getint("Redis_Data_Merging", "port"),
db=cfg.getint("Redis_Data_Merging", "db")) db=cfg.getint("Redis_Data_Merging", "db"))
r_serv2 = redis.StrictRedis(
host = cfg.get("Redis_Cache", "host"),
port = cfg.getint("Redis_Cache", "port"),
db = cfg.getint("Redis_Cache", "db"))
# LOGGING # # LOGGING #
publisher.channel = "Script" publisher.channel = "Script"
@ -61,7 +59,6 @@ def main():
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script subscribed to channel onion_categ") publisher.info("Script subscribed to channel onion_categ")
# Getting the first message from redis. # Getting the first message from redis.
message = Sub.get_msg_from_queue(r_serv) message = Sub.get_msg_from_queue(r_serv)
prec_filename = None prec_filename = None
@ -71,13 +68,13 @@ def main():
url_regex = "([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|onion|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*" url_regex = "([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|onion|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
while True: while True:
if message != None: if message is not None:
channel, filename, word, score = message.split() channel, filename, word, score = message.split()
# "For each new paste" # "For each new paste"
if prec_filename == None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
domains_list = [] domains_list = []
PST = P.Paste(filename) PST = Paste.Paste(filename)
for x in PST.get_regex(url_regex): for x in PST.get_regex(url_regex):
# Extracting url with regex # Extracting url with regex
@ -91,10 +88,11 @@ def main():
PST.save_attribute_redis(r_serv1, channel, domains_list) PST.save_attribute_redis(r_serv1, channel, domains_list)
pprint.pprint(domains_list) pprint.pprint(domains_list)
print PST.p_path print PST.p_path
to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name)
if len(domains_list) > 0: if len(domains_list) > 0:
publisher.warning('{0};{1};{2};{3};{4}'.format("Onion", PST.p_source, PST.p_date, PST.p_name,"Detected " + str(len(domains_list))+" .onion(s)" )) publisher.warning('{}Detected {} .onion(s)'.format(to_print, len(domains_list)))
else: else:
publisher.info('{0};{1};{2};{3};{4}'.format("Onion", PST.p_source, PST.p_date, PST.p_name, "Onion related" )) publisher.info('{}Onion related'.format(to_print))
prec_filename = filename prec_filename = filename

View file

@ -17,12 +17,14 @@ Requirements
*Should register to the Publisher "ZMQ_PubSub_Categ" *Should register to the Publisher "ZMQ_PubSub_Categ"
""" """
import redis, zmq, ConfigParser import redis
import ConfigParser
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -40,13 +42,13 @@ def main():
publisher.channel = "Queuing" publisher.channel = "Queuing"
# ZMQ # # ZMQ #
Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "onion_categ", "tor") sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "onion_categ", "tor")
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Suscribed to channel {0}""".format("onion_categ")) publisher.info("""Suscribed to channel {0}""".format("onion_categ"))
while True: while True:
Sub.get_and_lpush(r_serv) sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Onion_Q"): if r_serv.sismember("SHUTDOWN_FLAGS", "Onion_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Onion_Q") r_serv.srem("SHUTDOWN_FLAGS", "Onion_Q")

View file

@ -1,8 +1,11 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis, zmq, ConfigParser, json, pprint, time import redis
import ConfigParser
import pprint
import time
import dns.exception import dns.exception
from packages import Paste as P from packages import Paste
from packages import lib_refine from packages import lib_refine
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
@ -15,6 +18,7 @@ import ipaddress
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -38,7 +42,6 @@ def main():
port=cfg.getint("Redis_Cache", "port"), port=cfg.getint("Redis_Cache", "port"),
db=cfg.getint("Redis_Cache", "db")) db=cfg.getint("Redis_Cache", "db"))
# LOGGING # # LOGGING #
publisher.channel = "Script" publisher.channel = "Script"
@ -55,31 +58,31 @@ def main():
# Country to log as critical # Country to log as critical
cc_critical = cfg.get("PubSub_Url", "cc_critical") cc_critical = cfg.get("PubSub_Url", "cc_critical")
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, "web_categ", subscriber_name) sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, "web_categ", subscriber_name)
Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script URL subscribed to channel web_categ") publisher.info("Script URL subscribed to channel web_categ")
message = Sub.get_msg_from_queue(r_serv) message = sub.get_msg_from_queue(r_serv)
prec_filename = None prec_filename = None
url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*" url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
while True: while True:
try: try:
if message != None: if message is not None:
channel, filename, word, score = message.split() channel, filename, word, score = message.split()
if prec_filename == None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
domains_list = [] domains_list = []
PST = P.Paste(filename) PST = Paste.Paste(filename)
client = ip2asn() client = ip2asn()
for x in PST.get_regex(url_regex): for x in PST.get_regex(url_regex):
scheme, credential, subdomain, domain, host, tld, port, resource_path, query_string, f1, f2, f3, f4 = x scheme, credential, subdomain, domain, host, tld, port, resource_path, query_string, f1, f2, f3, f4 = x
domains_list.append(domain) domains_list.append(domain)
msg = pubchannel + " " + str(x) msg = pubchannel + " " + str(x)
Pub.send_message(msg) pub.send_message(msg)
publisher.debug('{0} Published'.format(x)) publisher.debug('{0} Published'.format(x))
if f1 == "onion": if f1 == "onion":
@ -106,6 +109,7 @@ def main():
if cc is not None and cc != "EU": if cc is not None and cc != "EU":
print hostl, asn, cc, pycountry.countries.get(alpha2=cc).name print hostl, asn, cc, pycountry.countries.get(alpha2=cc).name
if cc == cc_critical: if cc == cc_critical:
# FIXME: That's going to fail.
publisher.warning('{0};{1};{2};{3};{4}'.format("Url", PST.p_source, PST.p_date, PST.p_name, "Detected " + str(A_values[0]) + " " + hostl + " " + cc)) publisher.warning('{0};{1};{2};{3};{4}'.format("Url", PST.p_source, PST.p_date, PST.p_name, "Detected " + str(A_values[0]) + " " + hostl + " " + cc))
else: else:
print hostl, asn, cc print hostl, asn, cc
@ -128,7 +132,7 @@ def main():
publisher.debug("Script url is Idling 10s") publisher.debug("Script url is Idling 10s")
time.sleep(10) time.sleep(10)
message = Sub.get_msg_from_queue(r_serv) message = sub.get_msg_from_queue(r_serv)
except dns.exception.Timeout: except dns.exception.Timeout:
print "dns.exception.Timeout", A_values print "dns.exception.Timeout", A_values
pass pass

View file

@ -1,12 +1,14 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis, zmq, ConfigParser import redis
import ConfigParser
from packages import ZMQ_PubSub from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
configfile = './packages/config.cfg' configfile = './packages/config.cfg'
def main(): def main():
"""Main Function""" """Main Function"""
@ -24,13 +26,13 @@ def main():
publisher.channel = "Queuing" publisher.channel = "Queuing"
# ZMQ # # ZMQ #
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "web_categ", "urls") sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "web_categ", "urls")
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Suscribed to channel {0}""".format("web_categ")) publisher.info("""Suscribed to channel {0}""".format("web_categ"))
while True: while True:
Sub.get_and_lpush(r_serv) sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Urls_Q"): if r_serv.sismember("SHUTDOWN_FLAGS", "Urls_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Urls_Q") r_serv.srem("SHUTDOWN_FLAGS", "Urls_Q")

View file

@ -12,9 +12,9 @@
import ConfigParser import ConfigParser
import argparse import argparse
import sys
import gzip import gzip
def readdoc(path=None): def readdoc(path=None):
if path is None: if path is None:
return False return False
@ -40,8 +40,8 @@ argParser.add_argument('-s', action='append', help='search similar documents')
args = argParser.parse_args() args = argParser.parse_args()
from whoosh import index from whoosh import index
from whoosh.fields import * from whoosh.fields import Schema, TEXT, ID
import whoosh
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
ix = index.open_dir(indexpath) ix = index.open_dir(indexpath)

View file

@ -1,9 +1,10 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis, ConfigParser, json import redis
from datetime import date, datetime, time import ConfigParser
from flask import Flask, request, render_template, jsonify import json
from flask import Flask, render_template, jsonify
import flask import flask
# CONFIG # # CONFIG #
@ -24,6 +25,7 @@ r_serv_log = redis.StrictRedis(
app = Flask(__name__, static_url_path='/static/') app = Flask(__name__, static_url_path='/static/')
def event_stream(): def event_stream():
pubsub = r_serv_log.pubsub() pubsub = r_serv_log.pubsub()
pubsub.psubscribe("Script" + '.*') pubsub.psubscribe("Script" + '.*')
@ -32,6 +34,7 @@ def event_stream():
if msg['type'] == 'pmessage' and level != "DEBUG": if msg['type'] == 'pmessage' and level != "DEBUG":
yield 'data: %s\n\n' % json.dumps(msg) yield 'data: %s\n\n' % json.dumps(msg)
@app.route("/_logs") @app.route("/_logs")
def logs(): def logs():
return flask.Response(event_stream(), mimetype="text/event-stream") return flask.Response(event_stream(), mimetype="text/event-stream")
@ -44,6 +47,7 @@ def stuff():
row1.append((queue, r_serv.llen(queue))) row1.append((queue, r_serv.llen(queue)))
return jsonify(row1=row1) return jsonify(row1=row1)
@app.route("/") @app.route("/")
def index(): def index():
row = [] row = []
@ -52,11 +56,13 @@ def index():
return render_template("index.html", queues_name=row) return render_template("index.html", queues_name=row)
@app.route("/monitoring/") @app.route("/monitoring/")
def monitoring(): def monitoring():
for queue in r_serv.smembers("queues"): for queue in r_serv.smembers("queues"):
return render_template("Queue_live_Monitoring.html", last_value=queue) return render_template("Queue_live_Monitoring.html", last_value=queue)
@app.route("/wordstrending/") @app.route("/wordstrending/")
def wordstrending(): def wordstrending():
return render_template("Wordstrending.html") return render_template("Wordstrending.html")