mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-27 00:07:16 +00:00
Big cleanup, pep8
This commit is contained in:
parent
715adb4546
commit
078c8ea836
25 changed files with 394 additions and 408 deletions
|
@ -1,14 +1,13 @@
|
||||||
#!/usr/bin/python2.7
|
#!/usr/bin/python2.7
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis, argparse, zmq, ConfigParser, time, os
|
import redis
|
||||||
|
import argparse
|
||||||
|
import ConfigParser
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
import texttable
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from pylab import *
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
@ -19,24 +18,20 @@ def main():
|
||||||
|
|
||||||
# SCRIPT PARSER #
|
# SCRIPT PARSER #
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description = '''This script is a part of the Analysis Information
|
description='''This script is a part of the Analysis Information Leak framework.''',
|
||||||
Leak framework.''',
|
epilog='''''')
|
||||||
epilog = '''''')
|
|
||||||
|
|
||||||
parser.add_argument('-f',
|
parser.add_argument('-f', type=str, metavar="filename", default="figure",
|
||||||
type = str,
|
help='The absolute path name of the "figure.png"',
|
||||||
metavar = "filename",
|
action='store')
|
||||||
default = "figure",
|
|
||||||
help = 'The absolute path name of the "figure.png"',
|
|
||||||
action = 'store')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Level_DB_Hashs", "host"),
|
host=cfg.get("Redis_Level_DB_Hashs", "host"),
|
||||||
port = cfg.getint("Redis_Level_DB_Hashs", "port"),
|
port=cfg.getint("Redis_Level_DB_Hashs", "port"),
|
||||||
db = cfg.getint("Redis_Level_DB_Hashs", "db"))
|
db=cfg.getint("Redis_Level_DB_Hashs", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Graph"
|
publisher.channel = "Graph"
|
||||||
|
@ -68,12 +63,11 @@ def main():
|
||||||
pastie_list.append(pastie)
|
pastie_list.append(pastie)
|
||||||
pastebin_list.append(pastebin)
|
pastebin_list.append(pastebin)
|
||||||
|
|
||||||
codepad_list.sort(reverse = True)
|
codepad_list.sort(reverse=True)
|
||||||
pastie_list.sort(reverse = True)
|
pastie_list.sort(reverse=True)
|
||||||
pastebin_list.sort(reverse = True)
|
pastebin_list.sort(reverse=True)
|
||||||
|
|
||||||
total_list.sort(reverse = True)
|
|
||||||
|
|
||||||
|
total_list.sort(reverse=True)
|
||||||
|
|
||||||
plt.plot(codepad_list, 'b', label='Codepad.org')
|
plt.plot(codepad_list, 'b', label='Codepad.org')
|
||||||
plt.plot(pastebin_list, 'g', label='Pastebin.org')
|
plt.plot(pastebin_list, 'g', label='Pastebin.org')
|
||||||
|
|
|
@ -17,12 +17,14 @@ Requirements
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
|
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis, zmq, ConfigParser
|
import redis
|
||||||
|
import ConfigParser
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -32,9 +34,9 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Queuing"
|
publisher.channel = "Queuing"
|
||||||
|
@ -44,12 +46,12 @@ def main():
|
||||||
subscriber_name = "categ"
|
subscriber_name = "categ"
|
||||||
subscriber_config_section = "PubSub_Words"
|
subscriber_config_section = "PubSub_Words"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
publisher.info("""Suscribed to channel {0}""".format(channel))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
Sub.get_and_lpush(r_serv)
|
sub.get_and_lpush(r_serv)
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Categ_Q"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Categ_Q"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Categ_Q")
|
r_serv.srem("SHUTDOWN_FLAGS", "Categ_Q")
|
||||||
|
|
|
@ -26,13 +26,17 @@ Requirements
|
||||||
*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.
|
*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis, argparse, zmq, ConfigParser, time
|
import redis
|
||||||
from packages import Paste as P
|
import argparse
|
||||||
|
import ConfigParser
|
||||||
|
import time
|
||||||
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -42,62 +46,58 @@ def main():
|
||||||
|
|
||||||
# SCRIPT PARSER #
|
# SCRIPT PARSER #
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description = '''This script is a part of the Analysis Information
|
description='''This script is a part of the Analysis Information Leak framework.''',
|
||||||
Leak framework.''',
|
epilog='''''')
|
||||||
epilog = '''''')
|
|
||||||
|
|
||||||
parser.add_argument('-max',
|
parser.add_argument('-max', type=int, default=500,
|
||||||
type = int,
|
help='The limit between "short lines" and "long lines" (500)',
|
||||||
default = 500,
|
action='store')
|
||||||
help = 'The limit between "short lines" and "long lines" (500)',
|
|
||||||
action = 'store')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Data_Merging", "host"),
|
host=cfg.get("Redis_Data_Merging", "host"),
|
||||||
port = cfg.getint("Redis_Data_Merging", "port"),
|
port=cfg.getint("Redis_Data_Merging", "port"),
|
||||||
db = cfg.getint("Redis_Data_Merging", "db"))
|
db=cfg.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
r_serv1 = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
p_serv = r_serv.pipeline(False)
|
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
# ZMQ #
|
||||||
#Subscriber
|
# Subscriber
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
channel = cfg.get("PubSub_Global", "channel")
|
||||||
subscriber_name = "line"
|
subscriber_name = "line"
|
||||||
subscriber_config_section = "PubSub_Global"
|
subscriber_config_section = "PubSub_Global"
|
||||||
|
|
||||||
#Publisher
|
# Publisher
|
||||||
publisher_config_section = "PubSub_Longlines"
|
publisher_config_section = "PubSub_Longlines"
|
||||||
publisher_name = "publine"
|
publisher_name = "publine"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
||||||
|
|
||||||
Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
||||||
|
|
||||||
channel_0 = cfg.get("PubSub_Longlines", "channel_0")
|
channel_0 = cfg.get("PubSub_Longlines", "channel_0")
|
||||||
channel_1 = cfg.get("PubSub_Longlines", "channel_1")
|
channel_1 = cfg.get("PubSub_Longlines", "channel_1")
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Lines script Subscribed to channel {0} and Start to publish
|
tmp_string = "Lines script Subscribed to channel {} and Start to publish on channel {}, {}"
|
||||||
on channel {1}, {2}""".format(cfg.get("PubSub_Global", "channel"),
|
publisher.info(tmp_string.format(
|
||||||
|
cfg.get("PubSub_Global", "channel"),
|
||||||
cfg.get("PubSub_Longlines", "channel_0"),
|
cfg.get("PubSub_Longlines", "channel_0"),
|
||||||
cfg.get("PubSub_Longlines", "channel_1")))
|
cfg.get("PubSub_Longlines", "channel_1")))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
message = Sub.get_msg_from_queue(r_serv1)
|
message = sub.get_msg_from_queue(r_serv1)
|
||||||
if message != None:
|
if message is not None:
|
||||||
PST = P.Paste(message.split(" ",-1)[-1])
|
PST = Paste.Paste(message.split(" ", -1)[-1])
|
||||||
else:
|
else:
|
||||||
if r_serv1.sismember("SHUTDOWN_FLAGS", "Lines"):
|
if r_serv1.sismember("SHUTDOWN_FLAGS", "Lines"):
|
||||||
r_serv1.srem("SHUTDOWN_FLAGS", "Lines")
|
r_serv1.srem("SHUTDOWN_FLAGS", "Lines")
|
||||||
|
@ -113,13 +113,13 @@ def main():
|
||||||
PST.save_attribute_redis(r_serv, "p_nb_lines", lines_infos[0])
|
PST.save_attribute_redis(r_serv, "p_nb_lines", lines_infos[0])
|
||||||
PST.save_attribute_redis(r_serv, "p_max_length_line", lines_infos[1])
|
PST.save_attribute_redis(r_serv, "p_max_length_line", lines_infos[1])
|
||||||
|
|
||||||
r_serv.sadd("Pastes_Objects",PST.p_path)
|
r_serv.sadd("Pastes_Objects", PST.p_path)
|
||||||
if lines_infos[1] >= args.max:
|
if lines_infos[1] >= args.max:
|
||||||
msg = channel_0+" "+PST.p_path
|
msg = channel_0+" "+PST.p_path
|
||||||
else:
|
else:
|
||||||
msg = channel_1+" "+PST.p_path
|
msg = channel_1+" "+PST.p_path
|
||||||
|
|
||||||
Pub.send_message(msg)
|
pub.send_message(msg)
|
||||||
except IOError:
|
except IOError:
|
||||||
print "CRC Checksum Error on : ", PST.p_path
|
print "CRC Checksum Error on : ", PST.p_path
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -18,12 +18,14 @@ Requirements
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import redis, zmq, ConfigParser
|
import redis
|
||||||
|
import ConfigParser
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -33,9 +35,9 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Queuing"
|
publisher.channel = "Queuing"
|
||||||
|
@ -44,13 +46,13 @@ def main():
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
channel = cfg.get("PubSub_Global", "channel")
|
||||||
subscriber_name = "line"
|
subscriber_name = "line"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Global", channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
publisher.info("""Suscribed to channel {0}""".format(channel))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
Sub.get_and_lpush(r_serv)
|
sub.get_and_lpush(r_serv)
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Lines_Q"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Lines_Q"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Lines_Q")
|
r_serv.srem("SHUTDOWN_FLAGS", "Lines_Q")
|
||||||
|
|
|
@ -21,13 +21,16 @@ Requirements
|
||||||
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis, zmq, ConfigParser, time
|
import redis
|
||||||
from packages import Paste as P
|
import ConfigParser
|
||||||
|
import time
|
||||||
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -37,9 +40,9 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
@ -49,12 +52,12 @@ def main():
|
||||||
subscriber_name = "tokenize"
|
subscriber_name = "tokenize"
|
||||||
subscriber_config_section = "PubSub_Longlines"
|
subscriber_config_section = "PubSub_Longlines"
|
||||||
|
|
||||||
#Publisher
|
# Publisher
|
||||||
publisher_config_section = "PubSub_Words"
|
publisher_config_section = "PubSub_Words"
|
||||||
publisher_name = "pubtokenize"
|
publisher_name = "pubtokenize"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
||||||
Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
||||||
|
|
||||||
channel_0 = cfg.get("PubSub_Words", "channel_0")
|
channel_0 = cfg.get("PubSub_Words", "channel_0")
|
||||||
|
|
||||||
|
@ -62,10 +65,10 @@ def main():
|
||||||
publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1")))
|
publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1")))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = sub.get_msg_from_queue(r_serv)
|
||||||
print message
|
print message
|
||||||
if message != None:
|
if message is not None:
|
||||||
PST = P.Paste(message.split(" ",-1)[-1])
|
PST = Paste.Paste(message.split(" ", -1)[-1])
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Tokenize")
|
r_serv.srem("SHUTDOWN_FLAGS", "Tokenize")
|
||||||
|
@ -80,7 +83,7 @@ def main():
|
||||||
for word, score in PST._get_top_words().items():
|
for word, score in PST._get_top_words().items():
|
||||||
if len(word) >= 4:
|
if len(word) >= 4:
|
||||||
msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score)
|
msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score)
|
||||||
Pub.send_message(msg)
|
pub.send_message(msg)
|
||||||
print msg
|
print msg
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -17,12 +17,14 @@ Requirements
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Line" channel 1
|
*Should register to the Publisher "ZMQ_PubSub_Line" channel 1
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis, zmq, ConfigParser
|
import redis
|
||||||
|
import ConfigParser
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -32,9 +34,9 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Queuing"
|
publisher.channel = "Queuing"
|
||||||
|
@ -44,13 +46,13 @@ def main():
|
||||||
subscriber_name = "tokenize"
|
subscriber_name = "tokenize"
|
||||||
subscriber_config_section = "PubSub_Longlines"
|
subscriber_config_section = "PubSub_Longlines"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
publisher.info("""Suscribed to channel {0}""".format(channel))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
Sub.get_and_lpush(r_serv)
|
sub.get_and_lpush(r_serv)
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize_Q"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize_Q"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Tokenize_Q")
|
r_serv.srem("SHUTDOWN_FLAGS", "Tokenize_Q")
|
||||||
|
|
|
@ -26,13 +26,16 @@ Requirements
|
||||||
*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.
|
*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis, zmq, ConfigParser, time
|
import redis
|
||||||
from packages import Paste as P
|
import ConfigParser
|
||||||
|
import time
|
||||||
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -42,37 +45,35 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Data_Merging", "host"),
|
host=cfg.get("Redis_Data_Merging", "host"),
|
||||||
port = cfg.getint("Redis_Data_Merging", "port"),
|
port=cfg.getint("Redis_Data_Merging", "port"),
|
||||||
db = cfg.getint("Redis_Data_Merging", "db"))
|
db=cfg.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
r_serv1 = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
p_serv = r_serv.pipeline(False)
|
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
# ZMQ #
|
||||||
#Subscriber
|
# Subscriber
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
channel = cfg.get("PubSub_Global", "channel")
|
||||||
subscriber_name = "attributes"
|
subscriber_name = "attributes"
|
||||||
subscriber_config_section = "PubSub_Global"
|
subscriber_config_section = "PubSub_Global"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""ZMQ Attribute is Running""")
|
publisher.info("""ZMQ Attribute is Running""")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
message = Sub.get_msg_from_queue(r_serv1)
|
message = sub.get_msg_from_queue(r_serv1)
|
||||||
|
|
||||||
if message != None:
|
if message is not None:
|
||||||
PST = P.Paste(message.split(" ",-1)[-1])
|
PST = Paste.Paste(message.split(" ", -1)[-1])
|
||||||
else:
|
else:
|
||||||
if r_serv1.sismember("SHUTDOWN_FLAGS", "Attributes"):
|
if r_serv1.sismember("SHUTDOWN_FLAGS", "Attributes"):
|
||||||
r_serv1.srem("SHUTDOWN_FLAGS", "Attributes")
|
r_serv1.srem("SHUTDOWN_FLAGS", "Attributes")
|
||||||
|
@ -89,12 +90,12 @@ def main():
|
||||||
PST.save_attribute_redis(r_serv, "p_encoding", encoding)
|
PST.save_attribute_redis(r_serv, "p_encoding", encoding)
|
||||||
PST.save_attribute_redis(r_serv, "p_language", language)
|
PST.save_attribute_redis(r_serv, "p_language", language)
|
||||||
|
|
||||||
r_serv.sadd("Pastes_Objects",PST.p_path)
|
r_serv.sadd("Pastes_Objects", PST.p_path)
|
||||||
|
|
||||||
PST.save_all_attributes_redis(r_serv)
|
PST.save_all_attributes_redis(r_serv)
|
||||||
except IOError:
|
except IOError:
|
||||||
print "CRC Checksum Failed on :", PST.p_path
|
print "CRC Checksum Failed on :", PST.p_path
|
||||||
publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" ))
|
publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed"))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -18,12 +18,14 @@ Requirements
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import redis, zmq, ConfigParser
|
import redis
|
||||||
|
import ConfigParser
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -33,9 +35,9 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Queuing"
|
publisher.channel = "Queuing"
|
||||||
|
@ -44,13 +46,13 @@ def main():
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
channel = cfg.get("PubSub_Global", "channel")
|
||||||
subscriber_name = "attributes"
|
subscriber_name = "attributes"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
publisher.info("""Suscribed to channel {0}""".format(channel))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
Sub.get_and_lpush(r_serv)
|
sub.get_and_lpush(r_serv)
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Attributes_Q"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Attributes_Q"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Attributes_Q")
|
r_serv.srem("SHUTDOWN_FLAGS", "Attributes_Q")
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
import redis, zmq, ConfigParser, json, pprint, time
|
import redis
|
||||||
from packages import Paste as P
|
import ConfigParser
|
||||||
|
import pprint
|
||||||
|
import time
|
||||||
|
from packages import Paste
|
||||||
from packages import lib_refine
|
from packages import lib_refine
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
@ -9,6 +12,7 @@ from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -18,60 +22,58 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
r_serv1 = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Data_Merging", "host"),
|
host=cfg.get("Redis_Data_Merging", "host"),
|
||||||
port = cfg.getint("Redis_Data_Merging", "port"),
|
port=cfg.getint("Redis_Data_Merging", "port"),
|
||||||
db = cfg.getint("Redis_Data_Merging", "db"))
|
db=cfg.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
p_serv = r_serv.pipeline(False)
|
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
# ZMQ #
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards")
|
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards")
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Creditcard script subscribed to channel creditcard_categ")
|
publisher.info("Creditcard script subscribed to channel creditcard_categ")
|
||||||
|
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = sub.get_msg_from_queue(r_serv)
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
|
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
|
||||||
|
|
||||||
mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
# mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
||||||
visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
# visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
||||||
discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|[3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}"
|
# discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|[3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}"
|
||||||
jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
# jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
||||||
amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}"
|
# amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}"
|
||||||
chinaUP_regex = "62[0-5]\d{13,16}"
|
# chinaUP_regex = "62[0-5]\d{13,16}"
|
||||||
maestro_regex = "(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}"
|
# maestro_regex = "(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}"
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if message != None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
channel, filename, word, score = message.split()
|
||||||
|
|
||||||
if prec_filename == None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
Creditcard_set = set([])
|
creditcard_set = set([])
|
||||||
PST = P.Paste(filename)
|
PST = Paste.Paste(filename)
|
||||||
|
|
||||||
for x in PST.get_regex(creditcard_regex):
|
for x in PST.get_regex(creditcard_regex):
|
||||||
if lib_refine.is_luhn_valid(x):
|
if lib_refine.is_luhn_valid(x):
|
||||||
Creditcard_set.add(x)
|
creditcard_set.add(x)
|
||||||
|
|
||||||
|
PST.__setattr__(channel, creditcard_set)
|
||||||
|
PST.save_attribute_redis(r_serv1, channel, creditcard_set)
|
||||||
|
|
||||||
PST.__setattr__(channel, Creditcard_set)
|
pprint.pprint(creditcard_set)
|
||||||
PST.save_attribute_redis(r_serv1, channel, Creditcard_set)
|
to_print = 'CreditCard;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name)
|
||||||
|
if (len(creditcard_set) > 0):
|
||||||
pprint.pprint(Creditcard_set)
|
publisher.critical('{}Checked {} valid number(s)'.format(to_print, len(creditcard_set)))
|
||||||
if (len(Creditcard_set) > 0):
|
|
||||||
publisher.critical('{0};{1};{2};{3};{4}'.format("CreditCard", PST.p_source, PST.p_date, PST.p_name,"Checked " + str(len(Creditcard_set))+" valid number(s)" ))
|
|
||||||
else:
|
else:
|
||||||
publisher.info('{0};{1};{2};{3};{4}'.format("CreditCard", PST.p_source, PST.p_date, PST.p_name, "CreditCard related" ))
|
publisher.info('{}CreditCard related'.format(to_print))
|
||||||
|
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
|
@ -84,7 +86,7 @@ def main():
|
||||||
publisher.debug("Script creditcard is idling 1m")
|
publisher.debug("Script creditcard is idling 1m")
|
||||||
time.sleep(60)
|
time.sleep(60)
|
||||||
|
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = sub.get_msg_from_queue(r_serv)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis, zmq, ConfigParser
|
import redis
|
||||||
|
import ConfigParser
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -16,21 +18,21 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Queuing"
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
# ZMQ #
|
# ZMQ #
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards")
|
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards")
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Suscribed to channel {0}""".format("creditcard_categ"))
|
publisher.info("""Suscribed to channel {0}""".format("creditcard_categ"))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
Sub.get_and_lpush(r_serv)
|
sub.get_and_lpush(r_serv)
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Creditcards_Q"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Creditcards_Q"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Creditcards_Q")
|
r_serv.srem("SHUTDOWN_FLAGS", "Creditcards_Q")
|
||||||
|
|
|
@ -21,7 +21,9 @@ Requirements
|
||||||
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis, argparse, zmq, ConfigParser, time
|
import redis
|
||||||
|
import ConfigParser
|
||||||
|
import time
|
||||||
from packages import Paste as P
|
from packages import Paste as P
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
@ -29,6 +31,7 @@ from packages import lib_words
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -36,30 +39,16 @@ def main():
|
||||||
cfg = ConfigParser.ConfigParser()
|
cfg = ConfigParser.ConfigParser()
|
||||||
cfg.read(configfile)
|
cfg.read(configfile)
|
||||||
|
|
||||||
# SCRIPT PARSER #
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description = '''This script is a part of the Analysis Information
|
|
||||||
Leak framework.''',
|
|
||||||
epilog = '''''')
|
|
||||||
|
|
||||||
parser.add_argument('-l',
|
|
||||||
type = str,
|
|
||||||
default = "../files/list_categ_files",
|
|
||||||
help = 'Path to the list_categ_files (../files/list_categ_files)',
|
|
||||||
action = 'store')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
r_serv1 = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Level_DB", "host"),
|
host=cfg.get("Redis_Level_DB", "host"),
|
||||||
port = cfg.get("Redis_Level_DB", "port"),
|
port=cfg.get("Redis_Level_DB", "port"),
|
||||||
db = 0)
|
db=0)
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
@ -69,7 +58,7 @@ def main():
|
||||||
subscriber_name = "curve"
|
subscriber_name = "curve"
|
||||||
subscriber_config_section = "PubSub_Words"
|
subscriber_config_section = "PubSub_Words"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script Curve subscribed to channel {0}".format(cfg.get("PubSub_Words", "channel_0")))
|
publisher.info("Script Curve subscribed to channel {0}".format(cfg.get("PubSub_Words", "channel_0")))
|
||||||
|
@ -78,24 +67,23 @@ def main():
|
||||||
csv_path = cfg.get("Directories", "wordtrending_csv")
|
csv_path = cfg.get("Directories", "wordtrending_csv")
|
||||||
wordfile_path = cfg.get("Directories", "wordsfile")
|
wordfile_path = cfg.get("Directories", "wordsfile")
|
||||||
|
|
||||||
paste_words = []
|
message = sub.get_msg_from_queue(r_serv)
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
while True:
|
while True:
|
||||||
if message != None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
channel, filename, word, score = message.split()
|
||||||
if prec_filename == None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
PST = P.Paste(filename)
|
PST = P.Paste(filename)
|
||||||
lib_words.create_curve_with_word_file(r_serv1, csv_path, wordfile_path, int(PST.p_date.year), int(PST.p_date.month))
|
lib_words.create_curve_with_word_file(r_serv1, csv_path, wordfile_path, int(PST.p_date.year), int(PST.p_date.month))
|
||||||
|
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
prev_score = r_serv1.hget(word.lower(), PST.p_date)
|
prev_score = r_serv1.hget(word.lower(), PST.p_date)
|
||||||
print prev_score
|
print prev_score
|
||||||
if prev_score != None:
|
if prev_score is not None:
|
||||||
r_serv1.hset(word.lower(), PST.p_date, int(prev_score) + int(score))
|
r_serv1.hset(word.lower(), PST.p_date, int(prev_score) + int(score))
|
||||||
else:
|
else:
|
||||||
r_serv1.hset(word.lower(), PST.p_date, score)
|
r_serv1.hset(word.lower(), PST.p_date, score)
|
||||||
#r_serv.expire(word,86400) #1day
|
# r_serv.expire(word,86400) #1day
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Curve"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Curve"):
|
||||||
|
@ -107,7 +95,7 @@ def main():
|
||||||
print "sleepin"
|
print "sleepin"
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = sub.get_msg_from_queue(r_serv)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -17,12 +17,14 @@ Requirements
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
|
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis, zmq, ConfigParser
|
import redis
|
||||||
|
import ConfigParser
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -32,9 +34,9 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Queuing"
|
publisher.channel = "Queuing"
|
||||||
|
@ -44,12 +46,12 @@ def main():
|
||||||
subscriber_name = "curve"
|
subscriber_name = "curve"
|
||||||
subscriber_config_section = "PubSub_Words"
|
subscriber_config_section = "PubSub_Words"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
publisher.info("""Suscribed to channel {0}""".format(channel))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
Sub.get_and_lpush(r_serv)
|
sub.get_and_lpush(r_serv)
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Curve_Q"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Curve_Q"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Curve_Q")
|
r_serv.srem("SHUTDOWN_FLAGS", "Curve_Q")
|
||||||
|
|
|
@ -12,15 +12,18 @@ Requirements:
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis, zmq, ConfigParser, time, datetime, pprint, time, os
|
import redis
|
||||||
from packages import Paste as P
|
import ConfigParser
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from datetime import date
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from pybloomfilter import BloomFilter
|
from pybloomfilter import BloomFilter
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -31,25 +34,24 @@ def main():
|
||||||
# REDIS #
|
# REDIS #
|
||||||
# DB QUEUE ( MEMORY )
|
# DB QUEUE ( MEMORY )
|
||||||
r_Q_serv = redis.StrictRedis(
|
r_Q_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
r_serv_merge = redis.StrictRedis(
|
r_serv_merge = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Data_Merging", "host"),
|
host=cfg.get("Redis_Data_Merging", "host"),
|
||||||
port = cfg.getint("Redis_Data_Merging", "port"),
|
port=cfg.getint("Redis_Data_Merging", "port"),
|
||||||
db = cfg.getint("Redis_Data_Merging", "db"))
|
db=cfg.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
# DB OBJECT & HASHS ( DISK )
|
# DB OBJECT & HASHS ( DISK )
|
||||||
dico_redis = {}
|
dico_redis = {}
|
||||||
for year in xrange(2013, 2015):
|
for year in xrange(2013, 2015):
|
||||||
for month in xrange(0,16):
|
for month in xrange(0, 16):
|
||||||
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
|
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Level_DB", "host"),
|
host=cfg.get("Redis_Level_DB", "host"),
|
||||||
port = year,
|
port=year,
|
||||||
db = month)
|
db=month)
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
@ -59,7 +61,7 @@ def main():
|
||||||
subscriber_name = "duplicate"
|
subscriber_name = "duplicate"
|
||||||
subscriber_config_section = "PubSub_Global"
|
subscriber_config_section = "PubSub_Global"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Script duplicate subscribed to channel {0}""".format(cfg.get("PubSub_Global", "channel")))
|
publisher.info("""Script duplicate subscribed to channel {0}""".format(cfg.get("PubSub_Global", "channel")))
|
||||||
|
@ -75,10 +77,10 @@ def main():
|
||||||
|
|
||||||
x = time.time()
|
x = time.time()
|
||||||
|
|
||||||
message = Sub.get_msg_from_queue(r_Q_serv)
|
message = sub.get_msg_from_queue(r_Q_serv)
|
||||||
if message != None:
|
if message is not None:
|
||||||
path = message.split(" ",-1)[-1]
|
path = message.split(" ", -1)[-1]
|
||||||
PST = P.Paste(path)
|
PST = Paste.Paste(path)
|
||||||
else:
|
else:
|
||||||
publisher.debug("Script Attribute is idling 10s")
|
publisher.debug("Script Attribute is idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
@ -91,17 +93,17 @@ def main():
|
||||||
|
|
||||||
PST._set_p_hash_kind("md5")
|
PST._set_p_hash_kind("md5")
|
||||||
|
|
||||||
#Assignate the correct redis connexion
|
# Assignate the correct redis connexion
|
||||||
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
|
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
|
||||||
|
|
||||||
#Creating the bloom filter name: bloomyyyymm
|
# Creating the bloom filter name: bloomyyyymm
|
||||||
bloomname = 'bloom' + PST.p_date.year + PST.p_date.month
|
bloomname = 'bloom' + PST.p_date.year + PST.p_date.month
|
||||||
|
|
||||||
bloompath = cfg.get("Directories", "bloomfilters")
|
bloompath = cfg.get("Directories", "bloomfilters")
|
||||||
|
|
||||||
filebloompath = bloompath + bloomname
|
filebloompath = bloompath + bloomname
|
||||||
|
|
||||||
#datetime.date(int(PST.p_date.year),int(PST.p_date.month),int(PST.p_date.day)).timetuple().tm_yday % 7
|
# datetime.date(int(PST.p_date.year),int(PST.p_date.month),int(PST.p_date.day)).timetuple().tm_yday % 7
|
||||||
|
|
||||||
if os.path.exists(filebloompath):
|
if os.path.exists(filebloompath):
|
||||||
bloom = BloomFilter.open(filebloompath)
|
bloom = BloomFilter.open(filebloompath)
|
||||||
|
@ -117,40 +119,40 @@ def main():
|
||||||
r_serv1.set(index, PST.p_path)
|
r_serv1.set(index, PST.p_path)
|
||||||
r_serv1.sadd("INDEX", index)
|
r_serv1.sadd("INDEX", index)
|
||||||
|
|
||||||
#For each bloom filter
|
# For each bloom filter
|
||||||
opened_bloom = []
|
opened_bloom = []
|
||||||
for bloo in r_Q_serv.smembers("bloomlist"):
|
for bloo in r_Q_serv.smembers("bloomlist"):
|
||||||
#Opening blooms
|
# Opening blooms
|
||||||
opened_bloom.append(BloomFilter.open(bloo))
|
opened_bloom.append(BloomFilter.open(bloo))
|
||||||
|
|
||||||
# For each hash of the paste
|
# For each hash of the paste
|
||||||
for hash in PST._get_hash_lines(min = 5, start = 1, jump = 0):
|
for hash in PST._get_hash_lines(min=5, start=1, jump=0):
|
||||||
nb_hash_current += 1
|
nb_hash_current += 1
|
||||||
|
|
||||||
#Adding the hash in Redis & limiting the set
|
# Adding the hash in Redis & limiting the set
|
||||||
if r_serv1.scard(hash) <= set_limit:
|
if r_serv1.scard(hash) <= set_limit:
|
||||||
r_serv1.sadd(hash, index)
|
r_serv1.sadd(hash, index)
|
||||||
r_serv1.sadd("HASHS", hash)
|
r_serv1.sadd("HASHS", hash)
|
||||||
#Adding the hash in the bloom of the month
|
# Adding the hash in the bloom of the month
|
||||||
bloom.add(hash)
|
bloom.add(hash)
|
||||||
|
|
||||||
#Go throught the Database of the bloom filter (of the month)
|
# Go throught the Database of the bloom filter (of the month)
|
||||||
for bloo in opened_bloom:
|
for bloo in opened_bloom:
|
||||||
if hash in bloo:
|
if hash in bloo:
|
||||||
db = bloo.name[-6:]
|
db = bloo.name[-6:]
|
||||||
#Go throught the Database of the bloom filter (of the month)
|
# Go throught the Database of the bloom filter (of the month)
|
||||||
r_serv_bloom = dico_redis[db]
|
r_serv_bloom = dico_redis[db]
|
||||||
|
|
||||||
#set of index paste: set([1,2,4,65])
|
# set of index paste: set([1,2,4,65])
|
||||||
hash_current = r_serv_bloom.smembers(hash)
|
hash_current = r_serv_bloom.smembers(hash)
|
||||||
#removing itself from the list
|
# removing itself from the list
|
||||||
hash_current = hash_current - set([index])
|
hash_current = hash_current - set([index])
|
||||||
|
|
||||||
# if the hash is present at least in 1 files (already processed)
|
# if the hash is present at least in 1 files (already processed)
|
||||||
if len(hash_current) != 0:
|
if len(hash_current) != 0:
|
||||||
hash_dico[hash] = hash_current
|
hash_dico[hash] = hash_current
|
||||||
|
|
||||||
#if there is data in this dictionnary
|
# if there is data in this dictionnary
|
||||||
if len(hash_dico) != 0:
|
if len(hash_dico) != 0:
|
||||||
super_dico[index] = hash_dico
|
super_dico[index] = hash_dico
|
||||||
else:
|
else:
|
||||||
|
@ -159,12 +161,11 @@ def main():
|
||||||
|
|
||||||
###########################################################################################
|
###########################################################################################
|
||||||
|
|
||||||
#if there is data in this dictionnary
|
# if there is data in this dictionnary
|
||||||
if len(super_dico) != 0:
|
if len(super_dico) != 0:
|
||||||
# current = current paste, phash_dico = {hash: set, ...}
|
# current = current paste, phash_dico = {hash: set, ...}
|
||||||
occur_dico = {}
|
occur_dico = {}
|
||||||
for current, phash_dico in super_dico.items():
|
for current, phash_dico in super_dico.items():
|
||||||
nb_similar_hash = len(phash_dico)
|
|
||||||
# phash = hash, pset = set([ pastes ...])
|
# phash = hash, pset = set([ pastes ...])
|
||||||
for phash, pset in hash_dico.items():
|
for phash, pset in hash_dico.items():
|
||||||
|
|
||||||
|
@ -180,17 +181,18 @@ def main():
|
||||||
dupl.append((paste, percentage))
|
dupl.append((paste, percentage))
|
||||||
|
|
||||||
# Creating the object attribute and save it.
|
# Creating the object attribute and save it.
|
||||||
|
to_print = 'Duplicate;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name)
|
||||||
if dupl != []:
|
if dupl != []:
|
||||||
PST.__setattr__("p_duplicate", dupl)
|
PST.__setattr__("p_duplicate", dupl)
|
||||||
PST.save_attribute_redis(r_serv_merge, "p_duplicate", dupl)
|
PST.save_attribute_redis(r_serv_merge, "p_duplicate", dupl)
|
||||||
publisher.info('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name,"Detected " + str(len(dupl))))
|
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
|
||||||
|
|
||||||
y = time.time()
|
y = time.time()
|
||||||
|
|
||||||
publisher.debug('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "Processed in "+str(y-x)+ " sec" ))
|
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
|
||||||
except IOError:
|
except IOError:
|
||||||
print "CRC Checksum Failed on :", PST.p_path
|
print "CRC Checksum Failed on :", PST.p_path
|
||||||
publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" ))
|
publisher.error('{}CRC Checksum Failed'.format(to_print))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis, zmq, ConfigParser
|
import redis
|
||||||
|
import ConfigParser
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -16,22 +18,22 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Queuing"
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
# ZMQ #
|
# ZMQ #
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
channel = cfg.get("PubSub_Global", "channel")
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, "duplicate")
|
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, "duplicate")
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
publisher.info("""Suscribed to channel {0}""".format(channel))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
Sub.get_and_lpush(r_serv)
|
sub.get_and_lpush(r_serv)
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Duplicate_Q"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Duplicate_Q"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Duplicate_Q")
|
r_serv.srem("SHUTDOWN_FLAGS", "Duplicate_Q")
|
||||||
|
|
|
@ -9,17 +9,20 @@ The ZMQ_Sub_Indexer modules is fetching the list of files to be processed
|
||||||
and index each file with a full-text indexer (Whoosh until now).
|
and index each file with a full-text indexer (Whoosh until now).
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis, zmq, ConfigParser, time
|
import redis
|
||||||
from packages import Paste as P
|
import ConfigParser
|
||||||
|
import time
|
||||||
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
from whoosh.index import create_in, exists_in, open_dir
|
from whoosh.index import create_in, exists_in, open_dir
|
||||||
from whoosh.fields import *
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
import os
|
import os
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -29,19 +32,17 @@ def main():
|
||||||
|
|
||||||
# Redis
|
# Redis
|
||||||
r_serv1 = redis.StrictRedis(
|
r_serv1 = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# Indexer configuration - index dir and schema setup
|
# Indexer configuration - index dir and schema setup
|
||||||
indexpath = cfg.get("Indexer", "path")
|
indexpath = cfg.get("Indexer", "path")
|
||||||
indexertype = cfg.get("Indexer", "type")
|
indexertype = cfg.get("Indexer", "type")
|
||||||
if indexertype == "whoosh":
|
if indexertype == "whoosh":
|
||||||
schema = Schema(title=TEXT(stored=True), path=ID(stored=True,unique=True), content=TEXT)
|
schema = Schema(title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT)
|
||||||
|
|
||||||
if not os.path.exists(indexpath):
|
if not os.path.exists(indexpath):
|
||||||
os.mkdir(indexpath)
|
os.mkdir(indexpath)
|
||||||
|
|
||||||
if not exists_in(indexpath):
|
if not exists_in(indexpath):
|
||||||
ix = create_in(indexpath, schema)
|
ix = create_in(indexpath, schema)
|
||||||
else:
|
else:
|
||||||
|
@ -51,22 +52,22 @@ def main():
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
# ZMQ #
|
||||||
#Subscriber
|
# Subscriber
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
channel = cfg.get("PubSub_Global", "channel")
|
||||||
subscriber_name = "indexer"
|
subscriber_name = "indexer"
|
||||||
subscriber_config_section = "PubSub_Global"
|
subscriber_config_section = "PubSub_Global"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""ZMQ Indexer is Running""")
|
publisher.info("""ZMQ Indexer is Running""")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
message = Sub.get_msg_from_queue(r_serv1)
|
message = sub.get_msg_from_queue(r_serv1)
|
||||||
|
|
||||||
if message != None:
|
if message is not None:
|
||||||
PST = P.Paste(message.split(" ",-1)[-1])
|
PST = Paste.Paste(message.split(" ", -1)[-1])
|
||||||
else:
|
else:
|
||||||
if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"):
|
if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"):
|
||||||
r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
|
r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
|
||||||
|
@ -75,16 +76,19 @@ def main():
|
||||||
publisher.debug("Script Indexer is idling 10s")
|
publisher.debug("Script Indexer is idling 10s")
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
continue
|
continue
|
||||||
docpath = message.split(" ",-1)[-1]
|
docpath = message.split(" ", -1)[-1]
|
||||||
paste = PST.get_p_content()
|
paste = PST.get_p_content()
|
||||||
print "Indexing :", docpath
|
print "Indexing :", docpath
|
||||||
if indexertype == "whoosh":
|
if indexertype == "whoosh":
|
||||||
indexwriter = ix.writer()
|
indexwriter = ix.writer()
|
||||||
indexwriter.update_document(title=unicode(docpath, errors='ignore'),path=unicode(docpath, errors='ignore'),content=unicode(paste, errors='ignore'))
|
indexwriter.update_document(
|
||||||
|
title=unicode(docpath, errors='ignore'),
|
||||||
|
path=unicode(docpath, errors='ignore'),
|
||||||
|
content=unicode(paste, errors='ignore'))
|
||||||
indexwriter.commit()
|
indexwriter.commit()
|
||||||
except IOError:
|
except IOError:
|
||||||
print "CRC Checksum Failed on :", PST.p_path
|
print "CRC Checksum Failed on :", PST.p_path
|
||||||
publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" ))
|
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(PST.p_source, PST.p_date, PST.p_name))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,12 +12,14 @@ handling the indexing process of the files seen.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import redis, zmq, ConfigParser
|
import redis
|
||||||
|
import ConfigParser
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -27,9 +29,9 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Queuing"
|
publisher.channel = "Queuing"
|
||||||
|
@ -38,7 +40,7 @@ def main():
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
channel = cfg.get("PubSub_Global", "channel")
|
||||||
subscriber_name = "indexer"
|
subscriber_name = "indexer"
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name)
|
||||||
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
publisher.info("""Suscribed to channel {0}""".format(channel))
|
||||||
|
|
||||||
|
@ -46,7 +48,7 @@ def main():
|
||||||
# will get the data from the global ZMQ queue and buffer it in Redis.
|
# will get the data from the global ZMQ queue and buffer it in Redis.
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
Sub.get_and_lpush(r_serv)
|
sub.get_and_lpush(r_serv)
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Indexer_Q"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Indexer_Q"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Indexer_Q")
|
r_serv.srem("SHUTDOWN_FLAGS", "Indexer_Q")
|
||||||
|
|
|
@ -1,43 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
import redis, zmq, ConfigParser
|
|
||||||
from packages import Paste as P
|
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host = cfg.get("Redis_default", "host"),
|
|
||||||
port = cfg.getint("Redis_default", "port"),
|
|
||||||
db = args.db)
|
|
||||||
|
|
||||||
p_serv = r_serv.pipeline(False)
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Longlines", "channel_0")
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Longlines", channel)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("Longlines ubscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_0")))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
PST = P.Paste(Sub.get_message().split(" ", -1)[-1])
|
|
||||||
r_serv.sadd("Longlines", PST.p_mime)
|
|
||||||
PST.save_in_redis(r_serv, PST.p_mime)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
|
@ -1,7 +1,10 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis, zmq, ConfigParser, json, pprint, time
|
import redis
|
||||||
|
import ConfigParser
|
||||||
|
import pprint
|
||||||
|
import time
|
||||||
import dns.exception
|
import dns.exception
|
||||||
from packages import Paste as P
|
from packages import Paste as P
|
||||||
from packages import lib_refine
|
from packages import lib_refine
|
||||||
|
@ -10,6 +13,7 @@ from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -19,40 +23,40 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
r_serv1 = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Data_Merging", "host"),
|
host=cfg.get("Redis_Data_Merging", "host"),
|
||||||
port = cfg.getint("Redis_Data_Merging", "port"),
|
port=cfg.getint("Redis_Data_Merging", "port"),
|
||||||
db = cfg.getint("Redis_Data_Merging", "db"))
|
db=cfg.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
r_serv2 = redis.StrictRedis(
|
r_serv2 = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Cache", "host"),
|
host=cfg.get("Redis_Cache", "host"),
|
||||||
port = cfg.getint("Redis_Cache", "port"),
|
port=cfg.getint("Redis_Cache", "port"),
|
||||||
db = cfg.getint("Redis_Cache", "db"))
|
db=cfg.getint("Redis_Cache", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
# ZMQ #
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "mails_categ", "emails")
|
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "mails_categ", "emails")
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Suscribed to channel mails_categ")
|
publisher.info("Suscribed to channel mails_categ")
|
||||||
|
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = sub.get_msg_from_queue(r_serv)
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
|
email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
if message != None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
channel, filename, word, score = message.split()
|
||||||
|
|
||||||
if prec_filename == None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
PST = P.Paste(filename)
|
PST = P.Paste(filename)
|
||||||
MX_values = lib_refine.checking_MX_record(r_serv2, PST.get_regex(email_regex))
|
MX_values = lib_refine.checking_MX_record(r_serv2, PST.get_regex(email_regex))
|
||||||
|
|
||||||
|
@ -62,10 +66,11 @@ def main():
|
||||||
PST.save_attribute_redis(r_serv1, channel, (MX_values[0], list(MX_values[1])))
|
PST.save_attribute_redis(r_serv1, channel, (MX_values[0], list(MX_values[1])))
|
||||||
|
|
||||||
pprint.pprint(MX_values)
|
pprint.pprint(MX_values)
|
||||||
|
to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.format(PST.p_source, PST.p_date, PST.p_name, MX_values[0])
|
||||||
if MX_values[0] > 10:
|
if MX_values[0] > 10:
|
||||||
publisher.warning('{0};{1};{2};{3};{4}'.format("Mails", PST.p_source, PST.p_date, PST.p_name,"Checked "+ str(MX_values[0])+ " e-mails" ))
|
publisher.warning(to_print)
|
||||||
else:
|
else:
|
||||||
publisher.info('{0};{1};{2};{3};{4}'.format("Mails", PST.p_source, PST.p_date, PST.p_name,"Checked "+ str(MX_values[0])+ " e-mail(s)" ))
|
publisher.info(to_print)
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -77,7 +82,7 @@ def main():
|
||||||
publisher.debug("Script Mails is Idling 10s")
|
publisher.debug("Script Mails is Idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = sub.get_msg_from_queue(r_serv)
|
||||||
except dns.exception.Timeout:
|
except dns.exception.Timeout:
|
||||||
print "dns.exception.Timeout"
|
print "dns.exception.Timeout"
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis, zmq, ConfigParser
|
import redis
|
||||||
|
import ConfigParser
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -16,21 +18,21 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Queuing"
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
# ZMQ #
|
# ZMQ #
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "mails_categ", "emails")
|
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "mails_categ", "emails")
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Suscribed to channel {0}""".format("mails_categ"))
|
publisher.info("""Suscribed to channel {0}""".format("mails_categ"))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
Sub.get_and_lpush(r_serv)
|
sub.get_and_lpush(r_serv)
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Mails_Q"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Mails_Q"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Mails_Q")
|
r_serv.srem("SHUTDOWN_FLAGS", "Mails_Q")
|
||||||
|
|
|
@ -21,13 +21,17 @@ Requirements
|
||||||
*Need the ZMQ_Sub_Onion_Q Module running to be able to work properly.
|
*Need the ZMQ_Sub_Onion_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis, zmq, ConfigParser, json, pprint, time
|
import redis
|
||||||
from packages import Paste as P
|
import ConfigParser
|
||||||
|
import pprint
|
||||||
|
import time
|
||||||
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -37,20 +41,14 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
r_serv1 = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Data_Merging", "host"),
|
host=cfg.get("Redis_Data_Merging", "host"),
|
||||||
port = cfg.getint("Redis_Data_Merging", "port"),
|
port=cfg.getint("Redis_Data_Merging", "port"),
|
||||||
db = cfg.getint("Redis_Data_Merging", "db"))
|
db=cfg.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
r_serv2 = redis.StrictRedis(
|
|
||||||
host = cfg.get("Redis_Cache", "host"),
|
|
||||||
port = cfg.getint("Redis_Cache", "port"),
|
|
||||||
db = cfg.getint("Redis_Cache", "db"))
|
|
||||||
|
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
@ -61,40 +59,40 @@ def main():
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script subscribed to channel onion_categ")
|
publisher.info("Script subscribed to channel onion_categ")
|
||||||
|
|
||||||
|
# Getting the first message from redis.
|
||||||
#Getting the first message from redis.
|
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = Sub.get_msg_from_queue(r_serv)
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
#Thanks to Faup project for this regex
|
# Thanks to Faup project for this regex
|
||||||
# https://github.com/stricaud/faup
|
# https://github.com/stricaud/faup
|
||||||
url_regex = "([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|onion|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
|
url_regex = "([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|onion|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if message != None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
channel, filename, word, score = message.split()
|
||||||
|
|
||||||
# "For each new paste"
|
# "For each new paste"
|
||||||
if prec_filename == None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
domains_list = []
|
domains_list = []
|
||||||
PST = P.Paste(filename)
|
PST = Paste.Paste(filename)
|
||||||
|
|
||||||
for x in PST.get_regex(url_regex):
|
for x in PST.get_regex(url_regex):
|
||||||
#Extracting url with regex
|
# Extracting url with regex
|
||||||
credential, subdomain, domain, host, tld, port, resource_path, query_string, f1, f2, f3, f4 = x
|
credential, subdomain, domain, host, tld, port, resource_path, query_string, f1, f2, f3, f4 = x
|
||||||
|
|
||||||
if f1 == "onion":
|
if f1 == "onion":
|
||||||
domains_list.append(domain)
|
domains_list.append(domain)
|
||||||
|
|
||||||
#Saving the list of extracted onion domains.
|
# Saving the list of extracted onion domains.
|
||||||
PST.__setattr__(channel, domains_list)
|
PST.__setattr__(channel, domains_list)
|
||||||
PST.save_attribute_redis(r_serv1, channel, domains_list)
|
PST.save_attribute_redis(r_serv1, channel, domains_list)
|
||||||
pprint.pprint(domains_list)
|
pprint.pprint(domains_list)
|
||||||
print PST.p_path
|
print PST.p_path
|
||||||
|
to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name)
|
||||||
if len(domains_list) > 0:
|
if len(domains_list) > 0:
|
||||||
publisher.warning('{0};{1};{2};{3};{4}'.format("Onion", PST.p_source, PST.p_date, PST.p_name,"Detected " + str(len(domains_list))+" .onion(s)" ))
|
publisher.warning('{}Detected {} .onion(s)'.format(to_print, len(domains_list)))
|
||||||
else:
|
else:
|
||||||
publisher.info('{0};{1};{2};{3};{4}'.format("Onion", PST.p_source, PST.p_date, PST.p_name, "Onion related" ))
|
publisher.info('{}Onion related'.format(to_print))
|
||||||
|
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
|
|
|
@ -17,12 +17,14 @@ Requirements
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Categ"
|
*Should register to the Publisher "ZMQ_PubSub_Categ"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis, zmq, ConfigParser
|
import redis
|
||||||
|
import ConfigParser
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -32,21 +34,21 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Queuing"
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
# ZMQ #
|
# ZMQ #
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "onion_categ", "tor")
|
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "onion_categ", "tor")
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Suscribed to channel {0}""".format("onion_categ"))
|
publisher.info("""Suscribed to channel {0}""".format("onion_categ"))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
Sub.get_and_lpush(r_serv)
|
sub.get_and_lpush(r_serv)
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Onion_Q"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Onion_Q"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Onion_Q")
|
r_serv.srem("SHUTDOWN_FLAGS", "Onion_Q")
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
import redis, zmq, ConfigParser, json, pprint, time
|
import redis
|
||||||
|
import ConfigParser
|
||||||
|
import pprint
|
||||||
|
import time
|
||||||
import dns.exception
|
import dns.exception
|
||||||
from packages import Paste as P
|
from packages import Paste
|
||||||
from packages import lib_refine
|
from packages import lib_refine
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
@ -15,6 +18,7 @@ import ipaddress
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -24,62 +28,61 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
r_serv1 = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Data_Merging", "host"),
|
host=cfg.get("Redis_Data_Merging", "host"),
|
||||||
port = cfg.getint("Redis_Data_Merging", "port"),
|
port=cfg.getint("Redis_Data_Merging", "port"),
|
||||||
db = cfg.getint("Redis_Data_Merging", "db"))
|
db=cfg.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
r_serv2 = redis.StrictRedis(
|
r_serv2 = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Cache", "host"),
|
host=cfg.get("Redis_Cache", "host"),
|
||||||
port = cfg.getint("Redis_Cache", "port"),
|
port=cfg.getint("Redis_Cache", "port"),
|
||||||
db = cfg.getint("Redis_Cache", "db"))
|
db=cfg.getint("Redis_Cache", "db"))
|
||||||
|
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
# ZMQ #
|
||||||
#Subscriber
|
# Subscriber
|
||||||
subscriber_name = "urls"
|
subscriber_name = "urls"
|
||||||
subscriber_config_section = "PubSub_Categ"
|
subscriber_config_section = "PubSub_Categ"
|
||||||
|
|
||||||
#Publisher
|
# Publisher
|
||||||
publisher_config_section = "PubSub_Url"
|
publisher_config_section = "PubSub_Url"
|
||||||
publisher_name = "adress"
|
publisher_name = "adress"
|
||||||
pubchannel = cfg.get("PubSub_Url", "channel")
|
pubchannel = cfg.get("PubSub_Url", "channel")
|
||||||
|
|
||||||
#Country to log as critical
|
# Country to log as critical
|
||||||
cc_critical = cfg.get("PubSub_Url", "cc_critical")
|
cc_critical = cfg.get("PubSub_Url", "cc_critical")
|
||||||
|
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, "web_categ", subscriber_name)
|
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, "web_categ", subscriber_name)
|
||||||
Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script URL subscribed to channel web_categ")
|
publisher.info("Script URL subscribed to channel web_categ")
|
||||||
|
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = sub.get_msg_from_queue(r_serv)
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
|
url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
if message != None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
channel, filename, word, score = message.split()
|
||||||
|
|
||||||
if prec_filename == None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
domains_list = []
|
domains_list = []
|
||||||
PST = P.Paste(filename)
|
PST = Paste.Paste(filename)
|
||||||
client = ip2asn()
|
client = ip2asn()
|
||||||
for x in PST.get_regex(url_regex):
|
for x in PST.get_regex(url_regex):
|
||||||
scheme, credential, subdomain, domain, host, tld, port, resource_path, query_string, f1, f2, f3, f4 = x
|
scheme, credential, subdomain, domain, host, tld, port, resource_path, query_string, f1, f2, f3, f4 = x
|
||||||
domains_list.append(domain)
|
domains_list.append(domain)
|
||||||
msg = pubchannel + " " + str(x)
|
msg = pubchannel + " " + str(x)
|
||||||
Pub.send_message(msg)
|
pub.send_message(msg)
|
||||||
publisher.debug('{0} Published'.format(x))
|
publisher.debug('{0} Published'.format(x))
|
||||||
|
|
||||||
if f1 == "onion":
|
if f1 == "onion":
|
||||||
|
@ -95,28 +98,29 @@ def main():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
l = client.lookup(socket.inet_aton(ip),qType='IP')
|
l = client.lookup(socket.inet_aton(ip), qType='IP')
|
||||||
except ipaddress.AddressValueError:
|
except ipaddress.AddressValueError:
|
||||||
continue
|
continue
|
||||||
cc = getattr(l,'cc')
|
cc = getattr(l, 'cc')
|
||||||
asn = getattr(l,'asn')
|
asn = getattr(l, 'asn')
|
||||||
|
|
||||||
# EU is not an official ISO 3166 code (but used by RIPE
|
# EU is not an official ISO 3166 code (but used by RIPE
|
||||||
# IP allocation)
|
# IP allocation)
|
||||||
if cc is not None and cc != "EU":
|
if cc is not None and cc != "EU":
|
||||||
print hostl,asn,cc,pycountry.countries.get(alpha2=cc).name
|
print hostl, asn, cc, pycountry.countries.get(alpha2=cc).name
|
||||||
if cc == cc_critical:
|
if cc == cc_critical:
|
||||||
|
# FIXME: That's going to fail.
|
||||||
publisher.warning('{0};{1};{2};{3};{4}'.format("Url", PST.p_source, PST.p_date, PST.p_name, "Detected " + str(A_values[0]) + " " + hostl + " " + cc))
|
publisher.warning('{0};{1};{2};{3};{4}'.format("Url", PST.p_source, PST.p_date, PST.p_name, "Detected " + str(A_values[0]) + " " + hostl + " " + cc))
|
||||||
else:
|
else:
|
||||||
print hostl,asn,cc
|
print hostl, asn, cc
|
||||||
A_values = lib_refine.checking_A_record(r_serv2, domains_list)
|
A_values = lib_refine.checking_A_record(r_serv2, domains_list)
|
||||||
|
|
||||||
if A_values[0] >= 1:
|
if A_values[0] >= 1:
|
||||||
PST.__setattr__(channel, A_values)
|
PST.__setattr__(channel, A_values)
|
||||||
PST.save_attribute_redis(r_serv1, channel, (A_values[0],list(A_values[1])))
|
PST.save_attribute_redis(r_serv1, channel, (A_values[0], list(A_values[1])))
|
||||||
|
|
||||||
pprint.pprint(A_values)
|
pprint.pprint(A_values)
|
||||||
publisher.info('{0};{1};{2};{3};{4}'.format("Url", PST.p_source, PST.p_date, PST.p_name, "Checked " + str(A_values[0]) + " URL" ))
|
publisher.info('{0};{1};{2};{3};{4}'.format("Url", PST.p_source, PST.p_date, PST.p_name, "Checked " + str(A_values[0]) + " URL"))
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -128,7 +132,7 @@ def main():
|
||||||
publisher.debug("Script url is Idling 10s")
|
publisher.debug("Script url is Idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = sub.get_msg_from_queue(r_serv)
|
||||||
except dns.exception.Timeout:
|
except dns.exception.Timeout:
|
||||||
print "dns.exception.Timeout", A_values
|
print "dns.exception.Timeout", A_values
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis, zmq, ConfigParser
|
import redis
|
||||||
|
import ConfigParser
|
||||||
from packages import ZMQ_PubSub
|
from packages import ZMQ_PubSub
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = './packages/config.cfg'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main Function"""
|
"""Main Function"""
|
||||||
|
|
||||||
|
@ -16,21 +18,21 @@ def main():
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Queuing"
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
# ZMQ #
|
# ZMQ #
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "web_categ", "urls")
|
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "web_categ", "urls")
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Suscribed to channel {0}""".format("web_categ"))
|
publisher.info("""Suscribed to channel {0}""".format("web_categ"))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
Sub.get_and_lpush(r_serv)
|
sub.get_and_lpush(r_serv)
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Urls_Q"):
|
if r_serv.sismember("SHUTDOWN_FLAGS", "Urls_Q"):
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Urls_Q")
|
r_serv.srem("SHUTDOWN_FLAGS", "Urls_Q")
|
||||||
|
|
|
@ -12,13 +12,13 @@
|
||||||
|
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
import argparse
|
import argparse
|
||||||
import sys
|
|
||||||
import gzip
|
import gzip
|
||||||
|
|
||||||
|
|
||||||
def readdoc(path=None):
|
def readdoc(path=None):
|
||||||
if path is None:
|
if path is None:
|
||||||
return False
|
return False
|
||||||
f = gzip.open (path, 'r')
|
f = gzip.open(path, 'r')
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
configfile = '../packages/config.cfg'
|
configfile = '../packages/config.cfg'
|
||||||
|
@ -40,8 +40,8 @@ argParser.add_argument('-s', action='append', help='search similar documents')
|
||||||
args = argParser.parse_args()
|
args = argParser.parse_args()
|
||||||
|
|
||||||
from whoosh import index
|
from whoosh import index
|
||||||
from whoosh.fields import *
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
import whoosh
|
|
||||||
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
|
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
|
||||||
|
|
||||||
ix = index.open_dir(indexpath)
|
ix = index.open_dir(indexpath)
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis, ConfigParser, json
|
import redis
|
||||||
from datetime import date, datetime, time
|
import ConfigParser
|
||||||
from flask import Flask, request, render_template, jsonify
|
import json
|
||||||
|
from flask import Flask, render_template, jsonify
|
||||||
import flask
|
import flask
|
||||||
|
|
||||||
# CONFIG #
|
# CONFIG #
|
||||||
|
@ -12,18 +13,19 @@ cfg.read('../../bin/packages/config.cfg')
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Queues", "host"),
|
host=cfg.get("Redis_Queues", "host"),
|
||||||
port = cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db = cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
r_serv_log = redis.StrictRedis(
|
r_serv_log = redis.StrictRedis(
|
||||||
host = cfg.get("Redis_Log", "host"),
|
host=cfg.get("Redis_Log", "host"),
|
||||||
port = cfg.getint("Redis_Log", "port"),
|
port=cfg.getint("Redis_Log", "port"),
|
||||||
db = cfg.getint("Redis_Log", "db"))
|
db=cfg.getint("Redis_Log", "db"))
|
||||||
|
|
||||||
|
|
||||||
app = Flask(__name__, static_url_path='/static/')
|
app = Flask(__name__, static_url_path='/static/')
|
||||||
|
|
||||||
|
|
||||||
def event_stream():
|
def event_stream():
|
||||||
pubsub = r_serv_log.pubsub()
|
pubsub = r_serv_log.pubsub()
|
||||||
pubsub.psubscribe("Script" + '.*')
|
pubsub.psubscribe("Script" + '.*')
|
||||||
|
@ -32,30 +34,34 @@ def event_stream():
|
||||||
if msg['type'] == 'pmessage' and level != "DEBUG":
|
if msg['type'] == 'pmessage' and level != "DEBUG":
|
||||||
yield 'data: %s\n\n' % json.dumps(msg)
|
yield 'data: %s\n\n' % json.dumps(msg)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/_logs")
|
@app.route("/_logs")
|
||||||
def logs():
|
def logs():
|
||||||
return flask.Response(event_stream(), mimetype="text/event-stream")
|
return flask.Response(event_stream(), mimetype="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
@app.route("/_stuff", methods = ['GET'])
|
@app.route("/_stuff", methods=['GET'])
|
||||||
def stuff():
|
def stuff():
|
||||||
row1 = []
|
row1 = []
|
||||||
for queue in r_serv.smembers("queues"):
|
for queue in r_serv.smembers("queues"):
|
||||||
row1.append((queue, r_serv.llen(queue)))
|
row1.append((queue, r_serv.llen(queue)))
|
||||||
return jsonify(row1=row1)
|
return jsonify(row1=row1)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def index():
|
def index():
|
||||||
row = []
|
row = []
|
||||||
for queue in r_serv.smembers("queues"):
|
for queue in r_serv.smembers("queues"):
|
||||||
row.append((queue, r_serv.llen(queue)))
|
row.append((queue, r_serv.llen(queue)))
|
||||||
|
|
||||||
return render_template("index.html", queues_name = row)
|
return render_template("index.html", queues_name=row)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/monitoring/")
|
@app.route("/monitoring/")
|
||||||
def monitoring():
|
def monitoring():
|
||||||
for queue in r_serv.smembers("queues"):
|
for queue in r_serv.smembers("queues"):
|
||||||
return render_template("Queue_live_Monitoring.html",last_value = queue)
|
return render_template("Queue_live_Monitoring.html", last_value=queue)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/wordstrending/")
|
@app.route("/wordstrending/")
|
||||||
def wordstrending():
|
def wordstrending():
|
||||||
|
@ -63,4 +69,4 @@ def wordstrending():
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(host='0.0.0.0' ,port=7000, threaded=True)
|
app.run(host='0.0.0.0', port=7000, threaded=True)
|
||||||
|
|
Loading…
Reference in a new issue