Big refactoring, make the queues more flexible

This commit is contained in:
Raphaël Vinot 2014-08-29 19:37:56 +02:00
parent 623e876f3b
commit abfe13436b
40 changed files with 412 additions and 734 deletions

View file

@ -30,34 +30,28 @@ import time
from packages import Paste from packages import Paste
from pubsublogger import publisher from pubsublogger import publisher
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = 'PubSub_Global' config_section = 'Attributes'
config_channel = 'channel'
subscriber_name = 'attributes'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name) p = Process(config_section)
# FUNCTIONS # # FUNCTIONS #
publisher.info("""ZMQ Attribute is Running""") publisher.info("Attribute is Running")
while True: while True:
try: try:
message = h.redis_rpop() message = p.get_from_set()
if message is not None: if message is not None:
PST = Paste.Paste(message.split(" ", -1)[-1]) PST = Paste.Paste(message)
else: else:
if h.redis_queue_shutdown(): publisher.debug("Script Attribute is idling 1s")
print "Shutdown Flag Up: Terminating" time.sleep(1)
publisher.warning("Shutdown Flag Up: Terminating.")
break
publisher.debug("Script Attribute is idling 10s")
time.sleep(10)
continue continue
# FIXME do it directly in the class # FIXME do it directly in the class

View file

@ -42,21 +42,15 @@ import time
from pubsublogger import publisher from pubsublogger import publisher
from packages import Paste from packages import Paste
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = 'PubSub_Words' config_section = 'Categ'
config_channel = 'channel_0'
subscriber_name = 'categ'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name) p = Process(config_section)
# Publisher
pub_config_section = 'PubSub_Categ'
h.zmq_pub(pub_config_section, None)
# SCRIPT PARSER # # SCRIPT PARSER #
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
@ -71,11 +65,11 @@ if __name__ == "__main__":
args = parser.parse_args() args = parser.parse_args()
# FUNCTIONS # # FUNCTIONS #
publisher.info( publisher.info("Script Categ started")
"Script Categ subscribed to channel {}".format(h.sub_channel))
categories = ['CreditCards', 'Mail', 'Onion', 'Web']
tmp_dict = {} tmp_dict = {}
for filename in os.listdir(args.d): for filename in categories:
bname = os.path.basename(filename) bname = os.path.basename(filename)
tmp_dict[bname] = [] tmp_dict[bname] = []
with open(os.path.join(args.d, filename), 'r') as f: with open(os.path.join(args.d, filename), 'r') as f:
@ -85,9 +79,9 @@ if __name__ == "__main__":
prec_filename = None prec_filename = None
while True: while True:
message = h.redis_rpop() message = p.get_from_set()
if message is not None: if message is not None:
channel, filename, word, score = message.split() filename, word, score = message.split()
if prec_filename is None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
PST = Paste.Paste(filename) PST = Paste.Paste(filename)
@ -96,17 +90,14 @@ if __name__ == "__main__":
for categ, words_list in tmp_dict.items(): for categ, words_list in tmp_dict.items():
if word.lower() in words_list: if word.lower() in words_list:
h.pub_channel = categ msg = '{} {} {}'.format(PST.p_path, word, score)
h.zmq_pub_send('{} {} {}'.format(PST.p_path, word, score)) p.populate_set_out(msg, categ)
publisher.info( publisher.info(
'Categ;{};{};{};Detected {} "{}"'.format( 'Categ;{};{};{};Detected {} "{}"'.format(
PST.p_source, PST.p_date, PST.p_name, score, word)) PST.p_source, PST.p_date, PST.p_name, score, word))
else: else:
if h.redis_queue_shutdown():
print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.")
break
publisher.debug("Script Categ is Idling 10s") publisher.debug("Script Categ is Idling 10s")
print 'Sleeping'
time.sleep(10) time.sleep(10)

View file

@ -6,26 +6,27 @@ from packages import Paste
from packages import lib_refine from packages import lib_refine
from pubsublogger import publisher from pubsublogger import publisher
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = 'PubSub_Categ' config_section = 'CreditCards'
config_channel = 'channel_0'
subscriber_name = 'cards'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name) p = Process(config_section)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Creditcard script subscribed to channel creditcard_categ") publisher.info("Creditcard script subscribed to channel creditcard_categ")
message = h.redis_rpop() message = p.get_from_set()
prec_filename = None prec_filename = None
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?" creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
# FIXME For retro compatibility
channel = 'creditcard_categ'
# mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}" # mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
# visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}" # visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
# discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]| # discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|
@ -37,7 +38,7 @@ if __name__ == "__main__":
while True: while True:
if message is not None: if message is not None:
channel, filename, word, score = message.split() filename, word, score = message.split()
if prec_filename is None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
creditcard_set = set([]) creditcard_set = set([])
@ -62,11 +63,8 @@ if __name__ == "__main__":
prec_filename = filename prec_filename = filename
else: else:
if h.redis_queue_shutdown():
print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.")
break
publisher.debug("Script creditcard is idling 1m") publisher.debug("Script creditcard is idling 1m")
print 'Sleeping'
time.sleep(60) time.sleep(60)
message = h.redis_rpop() message = p.get_from_set()

View file

@ -29,38 +29,35 @@ from pubsublogger import publisher
from packages import lib_words from packages import lib_words
import os import os
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = 'PubSub_Words' config_section = 'Curve'
config_channel = 'channel_0' p = Process(config_section)
subscriber_name = "curve"
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
# REDIS # # REDIS #
r_serv1 = redis.StrictRedis( r_serv1 = redis.StrictRedis(
host=h.config.get("Redis_Level_DB", "host"), host=p.config.get("Redis_Level_DB", "host"),
port=h.config.get("Redis_Level_DB", "port"), port=p.config.get("Redis_Level_DB", "port"),
db=h.config.get("Redis_Level_DB", "db")) db=p.config.get("Redis_Level_DB", "db"))
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script Curve subscribed to {}".format(h.sub_channel)) publisher.info("Script Curve started")
# FILE CURVE SECTION # # FILE CURVE SECTION #
csv_path = os.path.join(os.environ['AIL_HOME'], csv_path = os.path.join(os.environ['AIL_HOME'],
h.config.get("Directories", "wordtrending_csv")) p.config.get("Directories", "wordtrending_csv"))
wordfile_path = os.path.join(os.environ['AIL_HOME'], wordfile_path = os.path.join(os.environ['AIL_HOME'],
h.config.get("Directories", "wordsfile")) p.config.get("Directories", "wordsfile"))
message = h.redis_rpop() message = p.get_from_set()
prec_filename = None prec_filename = None
while True: while True:
if message is not None: if message is not None:
channel, filename, word, score = message.split() filename, word, score = message.split()
if prec_filename is None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
PST = Paste.Paste(filename) PST = Paste.Paste(filename)
lib_words.create_curve_with_word_file( lib_words.create_curve_with_word_file(
@ -69,7 +66,6 @@ if __name__ == "__main__":
prec_filename = filename prec_filename = filename
prev_score = r_serv1.hget(word.lower(), PST.p_date) prev_score = r_serv1.hget(word.lower(), PST.p_date)
print prev_score
if prev_score is not None: if prev_score is not None:
r_serv1.hset(word.lower(), PST.p_date, r_serv1.hset(word.lower(), PST.p_date,
int(prev_score) + int(score)) int(prev_score) + int(score))
@ -77,12 +73,7 @@ if __name__ == "__main__":
r_serv1.hset(word.lower(), PST.p_date, score) r_serv1.hset(word.lower(), PST.p_date, score)
else: else:
if h.redis_queue_shutdown():
print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.")
break
publisher.debug("Script Curve is Idling") publisher.debug("Script Curve is Idling")
print "sleepin" print "sleeping"
time.sleep(1) time.sleep(1)
message = p.get_from_set()
message = h.redis_rpop()

View file

@ -19,17 +19,15 @@ from packages import Paste
from pubsublogger import publisher from pubsublogger import publisher
from pybloomfilter import BloomFilter from pybloomfilter import BloomFilter
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = 'PubSub_Global' config_section = 'Duplicates'
config_channel = 'channel'
subscriber_name = 'duplicate'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name) p = Process(config_section)
# REDIS # # REDIS #
# DB OBJECT & HASHS ( DISK ) # DB OBJECT & HASHS ( DISK )
@ -38,16 +36,15 @@ if __name__ == "__main__":
for year in xrange(2013, 2015): for year in xrange(2013, 2015):
for month in xrange(0, 16): for month in xrange(0, 16):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=h.config.get("Redis_Level_DB", "host"), port=year, host=p.config.get("Redis_Level_DB", "host"), port=year,
db=month) db=month)
# FUNCTIONS # # FUNCTIONS #
publisher.info("""Script duplicate subscribed to channel {0}""".format( publisher.info("Script duplicate started")
h.config.get("PubSub_Global", "channel")))
set_limit = 100 set_limit = 100
bloompath = os.path.join(os.environ['AIL_HOME'], bloompath = os.path.join(os.environ['AIL_HOME'],
h.config.get("Directories", "bloomfilters")) p.config.get("Directories", "bloomfilters"))
bloop_path_set = set() bloop_path_set = set()
while True: while True:
@ -59,17 +56,13 @@ if __name__ == "__main__":
x = time.time() x = time.time()
message = h.redis_rpop() message = p.get_from_set()
if message is not None: if message is not None:
path = message.split(" ", -1)[-1] path = message
PST = Paste.Paste(path) PST = Paste.Paste(path)
else: else:
publisher.debug("Script Attribute is idling 10s") publisher.debug("Script Attribute is idling 10s")
time.sleep(10) time.sleep(10)
if h.redis_queue_shutdown():
print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.")
break
continue continue
PST._set_p_hash_kind("md5") PST._set_p_hash_kind("md5")

View file

@ -25,56 +25,44 @@ import os
import time import time
from pubsublogger import publisher from pubsublogger import publisher
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == '__main__':
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = 'Script'
config_section = 'Feed' config_section = 'Global'
config_channel = 'topicfilter'
subscriber_name = 'feed'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name) p = Process(config_section)
# Publisher
pub_config_section = "PubSub_Global"
pub_config_channel = 'channel'
h.zmq_pub(pub_config_section, pub_config_channel)
# LOGGING # # LOGGING #
publisher.info("Feed Script started to receive & publish.") publisher.info("Feed Script started to receive & publish.")
while True: while True:
message = h.redis_rpop() message = p.get_from_set()
# Recovering the streamed message informations. # Recovering the streamed message informations.
if message is not None: if message is not None:
if len(message.split()) == 3: splitted = message.split()
topic, paste, gzip64encoded = message.split() if len(splitted) == 2:
print paste paste, gzip64encoded = splitted
else: else:
# TODO Store the name of the empty paste inside a Redis-list. # TODO Store the name of the empty paste inside a Redis-list.
print "Empty Paste: not processed" print "Empty Paste: not processed"
publisher.debug("Empty Paste: {0} not processed".format(paste)) publisher.debug("Empty Paste: {0} not processed".format(paste))
continue continue
else: else:
if h.redis_queue_shutdown():
print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.")
break
print "Empty Queues: Waiting..." print "Empty Queues: Waiting..."
time.sleep(10) time.sleep(1)
continue continue
# Creating the full filepath # Creating the full filepath
filename = os.path.join(os.environ['AIL_HOME'], filename = os.path.join(os.environ['AIL_HOME'],
h.config.get("Directories", "pastes"), paste) p.config.get("Directories", "pastes"), paste)
dirname = os.path.dirname(filename) dirname = os.path.dirname(filename)
if not os.path.exists(dirname): if not os.path.exists(dirname):
os.makedirs(dirname) os.makedirs(dirname)
with open(filename, 'wb') as f: with open(filename, 'wb') as f:
f.write(base64.standard_b64decode(gzip64encoded)) f.write(base64.standard_b64decode(gzip64encoded))
p.populate_set_out(filename)
h.zmq_pub_send(filename)

View file

@ -15,74 +15,144 @@ import redis
import ConfigParser import ConfigParser
import os import os
import zmq import zmq
import time
import json
class Redis_Queues(object): class PubSub(object):
def __init__(self, conf_section, conf_channel, subscriber_name): def __init__(self):
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
print configfile
if not os.path.exists(configfile): if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \ raise Exception('Unable to find the configuration file. \
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
self.config = ConfigParser.ConfigParser() self.config = ConfigParser.ConfigParser()
self.config.read(configfile) self.config.read(configfile)
self.subscriber_name = subscriber_name self.redis_sub = False
self.zmq_sub = False
self.subscriber = None
self.publishers = {'Redis': [], 'ZMQ': []}
self.sub_channel = self.config.get(conf_section, conf_channel) def setup_subscribe(self, conn_name):
self.sub_address = self.config.get(conf_section, 'address') if self.config.has_section(conn_name):
self.redis_channel = self.sub_channel + self.subscriber_name channel = self.config.get(conn_name, 'channel')
# Redis Queue
config_section = "Redis_Queues"
self.r_queues = redis.StrictRedis(
host=self.config.get(config_section, "host"),
port=self.config.getint(config_section, "port"),
db=self.config.getint(config_section, "db"))
def zmq_sub(self):
context = zmq.Context()
self.sub_socket = context.socket(zmq.SUB)
self.sub_socket.connect(self.sub_address)
self.sub_socket.setsockopt(zmq.SUBSCRIBE, self.sub_channel)
def zmq_pub(self, config_section, config_channel):
context = zmq.Context()
self.pub_socket = context.socket(zmq.PUB)
self.pub_socket.bind(self.config.get(config_section, 'adress'))
if config_channel is not None:
self.pub_channel = self.config.get(config_section, config_channel)
else: else:
# The publishing channel is defined dynamically channel = conn_name.split('_')[1]
self.pub_channel = None if conn_name.startswith('Redis'):
self.redis_sub = True
r = redis.StrictRedis(
host=self.config.get('RedisPubSub', 'host'),
port=self.config.get('RedisPubSub', 'port'),
db=self.config.get('RedisPubSub', 'db'))
self.subscriber = r.pubsub()
self.subscriber.psubscribe(channel)
self.subscriber.get_message()
elif conn_name.startswith('ZMQ'):
self.zmq_sub = True
context = zmq.Context()
self.subscriber = context.socket(zmq.SUB)
self.subscriber.connect(self.config.get(conn_name, 'address'))
self.subscriber.setsockopt(zmq.SUBSCRIBE, channel)
def zmq_pub_send(self, msg): def setup_publish(self, conn_name):
if self.pub_channel is None: if self.config.has_section(conn_name):
raise Exception('A channel is reqired to send a message.') channel = self.config.get(conn_name, 'channel')
self.pub_socket.send('{} {}'.format(self.pub_channel, msg))
def redis_rpop(self):
return self.r_queues.rpop(self.redis_channel)
def redis_queue_shutdown(self, is_queue=False):
if is_queue:
flag = self.subscriber_name + '_Q'
else: else:
flag = self.subscriber_name channel = conn_name.split('_')[1]
# srem returns False if the element does not exists if conn_name.startswith('Redis'):
return self.r_queues.srem('SHUTDOWN_FLAGS', flag) r = redis.StrictRedis(host=self.config.get('RedisPubSub', 'host'),
port=self.config.get('RedisPubSub', 'port'),
db=self.config.get('RedisPubSub', 'db'))
self.publishers['Redis'].append((r, channel))
elif conn_name.startswith('ZMQ'):
context = zmq.Context()
p = context.socket(zmq.PUB)
p.bind(self.config.get(conn_name, 'address'))
self.publishers['ZMQ'].append((p, channel))
def redis_queue_subscribe(self, publisher): def publish(self, message):
self.zmq_sub() m = json.loads(message)
publisher.info("Suscribed to channel {}".format(self.sub_channel)) channel_message = m.get('channel')
for p, channel in self.publishers['Redis']:
if channel_message is None or channel_message == channel:
p.publish(channel, m['message'])
for p, channel in self.publishers['ZMQ']:
if channel_message is None or channel_message == channel:
p.send('{} {}'.format(channel, m['message']))
def subscribe(self):
if self.redis_sub:
for msg in self.subscriber.listen():
if msg.get('data', None) is not None:
yield msg['data']
elif self.zmq_sub:
while True:
msg = self.subscriber.recv()
yield msg.split(' ', 1)[1]
else:
raise Exception('No subscribe function defined')
class Process(object):
def __init__(self, conf_section):
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
modulesfile = os.path.join(os.environ['AIL_BIN'], 'packages/modules.cfg')
self.config = ConfigParser.ConfigParser()
self.config.read(configfile)
self.modules = ConfigParser.ConfigParser()
self.modules.read(modulesfile)
self.subscriber_name = conf_section
self.pubsub = None
if self.modules.has_section(conf_section):
self.pubsub = PubSub()
else:
raise Exception('Your process has to listen to at least one feed.')
self.r_temp = redis.StrictRedis(
host=self.config.get('RedisPubSub', 'host'),
port=self.config.get('RedisPubSub', 'port'),
db=self.config.get('RedisPubSub', 'db'))
def populate_set_in(self):
# monoproc
src = self.modules.get(self.subscriber_name, 'subscribe')
self.pubsub.setup_subscribe(src)
for msg in self.pubsub.subscribe():
in_set = self.subscriber_name + 'in'
self.r_temp.sadd(in_set, msg)
self.r_temp.hset('queues', self.subscriber_name,
int(self.r_temp.scard(in_set)))
def get_from_set(self):
# multiproc
in_set = self.subscriber_name + 'in'
self.r_temp.hset('queues', self.subscriber_name,
int(self.r_temp.scard(in_set)))
return self.r_temp.spop(in_set)
def populate_set_out(self, msg, channel=None):
# multiproc
msg = {'message': msg}
if channel is not None:
msg.update({'channel': channel})
self.r_temp.sadd(self.subscriber_name + 'out', json.dumps(msg))
def publish(self):
# monoproc
if not self.modules.has_option(self.subscriber_name, 'publish'):
return False
dest = self.modules.get(self.subscriber_name, 'publish')
# We can have multiple publisher
for name in dest.split(','):
self.pubsub.setup_publish(name)
while True: while True:
msg = self.sub_socket.recv() message = self.r_temp.spop(self.subscriber_name + 'out')
p = self.r_queues.pipeline() if message is None:
p.sadd("queues", self.redis_channel) time.sleep(1)
p.lpush(self.redis_channel, msg) continue
p.execute() self.pubsub.publish(message)
if self.redis_queue_shutdown(True):
print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.")
break

View file

@ -17,26 +17,21 @@ from whoosh.index import create_in, exists_in, open_dir
from whoosh.fields import Schema, TEXT, ID from whoosh.fields import Schema, TEXT, ID
import os import os
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
# Subscriber config_section = 'Indexer'
sub_config_section = 'PubSub_Global'
sub_name = 'indexer'
config_section = 'PubSub_Global' p = Process(config_section)
config_channel = 'channel'
subscriber_name = 'indexer'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
# Indexer configuration - index dir and schema setup # Indexer configuration - index dir and schema setup
indexpath = h.config.get("Indexer", "path") indexpath = os.path.join(os.environ['AIL_HOME'],
indexertype = h.config.get("Indexer", "type") p.config.get("Indexer", "path"))
indexertype = p.config.get("Indexer", "type")
if indexertype == "whoosh": if indexertype == "whoosh":
schema = Schema(title=TEXT(stored=True), path=ID(stored=True, schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
unique=True), unique=True),
@ -49,18 +44,16 @@ if __name__ == "__main__":
ix = open_dir(indexpath) ix = open_dir(indexpath)
# LOGGING # # LOGGING #
publisher.info("""ZMQ Indexer is Running""") publisher.info("ZMQ Indexer is Running")
while True: while True:
try: try:
message = h.redis_rpop() message = p.get_from_set()
if message is not None: if message is not None:
PST = Paste.Paste(message.split(" ", -1)[-1]) PST = Paste.Paste(message)
else: else:
if h.redis_queue_shutdown(): publisher.debug("Script Indexer is idling 1s")
break
publisher.debug("Script Indexer is idling 10s")
time.sleep(1) time.sleep(1)
continue continue
docpath = message.split(" ", -1)[-1] docpath = message.split(" ", -1)[-1]

View file

@ -32,26 +32,19 @@ import time
from packages import Paste from packages import Paste
from pubsublogger import publisher from pubsublogger import publisher
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == '__main__':
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = 'Script'
config_section = 'PubSub_Global' config_section = 'Lines'
config_channel = 'channel' p = Process(config_section)
subscriber_name = 'line'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
# Publisher
pub_config_section = 'PubSub_Longlines'
h.zmq_pub(pub_config_section, None)
# SCRIPT PARSER # # SCRIPT PARSER #
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='''This script is a part of the Analysis Information \ description='This script is a part of the Analysis Information \
Leak framework.''') Leak framework.')
parser.add_argument( parser.add_argument(
'-max', type=int, default=500, '-max', type=int, default=500,
@ -60,24 +53,18 @@ if __name__ == "__main__":
args = parser.parse_args() args = parser.parse_args()
channel_0 = h.config.get("PubSub_Longlines", "channel_0")
channel_1 = h.config.get("PubSub_Longlines", "channel_1")
# FUNCTIONS # # FUNCTIONS #
tmp_string = "Lines script Subscribed to channel {} and Start to publish \ tmp_string = "Lines script Subscribed to channel {} and Start to publish \
on channel {}, {}" on channel Longlines, Shortlines"
publisher.info(tmp_string.format(h.sub_channel, channel_0, channel_1)) publisher.info(tmp_string)
while True: while True:
try: try:
message = h.redis_rpop() message = p.get_from_set()
print message
if message is not None: if message is not None:
PST = Paste.Paste(message.split(" ", -1)[-1]) PST = Paste.Paste(message)
else: else:
if h.redis_queue_shutdown():
print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.")
break
publisher.debug("Tokeniser is idling 10s") publisher.debug("Tokeniser is idling 10s")
time.sleep(10) time.sleep(10)
continue continue
@ -89,10 +76,9 @@ if __name__ == "__main__":
# FIXME Not used. # FIXME Not used.
PST.store.sadd("Pastes_Objects", PST.p_path) PST.store.sadd("Pastes_Objects", PST.p_path)
if lines_infos[1] >= args.max: if lines_infos[1] < args.max:
h.pub_channel = channel_0 p.populate_set_out(PST.p_path, 'LinesShort')
else: else:
h.pub_channel = channel_1 p.populate_set_out(PST.p_path, 'LinesLong')
h.zmq_pub_send(PST.p_path)
except IOError: except IOError:
print "CRC Checksum Error on : ", PST.p_path print "CRC Checksum Error on : ", PST.p_path

View file

@ -9,28 +9,29 @@ from packages import Paste
from packages import lib_refine from packages import lib_refine
from pubsublogger import publisher from pubsublogger import publisher
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = 'PubSub_Categ' config_section = 'Mail'
config_channel = 'channel_1'
subscriber_name = 'emails'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name) p = Process(config_section)
# REDIS # # REDIS #
r_serv2 = redis.StrictRedis( r_serv2 = redis.StrictRedis(
host=h.config.get("Redis_Cache", "host"), host=p.config.get("Redis_Cache", "host"),
port=h.config.getint("Redis_Cache", "port"), port=p.config.getint("Redis_Cache", "port"),
db=h.config.getint("Redis_Cache", "db")) db=p.config.getint("Redis_Cache", "db"))
# FUNCTIONS # # FUNCTIONS #
publisher.info("Suscribed to channel mails_categ") publisher.info("Suscribed to channel mails_categ")
message = h.redis_rpop() # FIXME For retro compatibility
channel = 'mails_categ'
message = p.get_from_set()
prec_filename = None prec_filename = None
# Log as critical if there are more that that amout of valid emails # Log as critical if there are more that that amout of valid emails
@ -41,7 +42,8 @@ if __name__ == "__main__":
while True: while True:
try: try:
if message is not None: if message is not None:
channel, filename, word, score = message.split() print message
filename, word, score = message.split()
if prec_filename is None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
PST = Paste.Paste(filename) PST = Paste.Paste(filename)
@ -65,14 +67,11 @@ if __name__ == "__main__":
prec_filename = filename prec_filename = filename
else: else:
if h.redis_queue_shutdown():
print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.")
break
publisher.debug("Script Mails is Idling 10s") publisher.debug("Script Mails is Idling 10s")
print 'Sleeping'
time.sleep(10) time.sleep(10)
message = h.redis_rpop() message = p.get_from_set()
except dns.exception.Timeout: except dns.exception.Timeout:
# FIXME retry! # FIXME retry!
print "dns.exception.Timeout" print "dns.exception.Timeout"

View file

@ -27,32 +27,34 @@ from packages import Paste
from pubsublogger import publisher from pubsublogger import publisher
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = 'PubSub_Categ' config_section = 'Onion'
config_channel = 'channel_2'
subscriber_name = 'tor'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name) p = Process(config_section)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script subscribed to channel onion_categ") publisher.info("Script subscribed to channel onion_categ")
# FIXME For retro compatibility
channel = 'onion_categ'
# Getting the first message from redis. # Getting the first message from redis.
message = h.redis_rpop() message = p.get_from_set()
prec_filename = None prec_filename = None
# Thanks to Faup project for this regex # Thanks to Faup project for this regex
# https://github.com/stricaud/faup # https://github.com/stricaud/faup
url_regex = "([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|onion|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*" url_regex = "([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
while True: while True:
if message is not None: if message is not None:
channel, filename, word, score = message.split() print message
filename, word, score = message.split()
# "For each new paste" # "For each new paste"
if prec_filename is None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
@ -64,8 +66,7 @@ if __name__ == "__main__":
credential, subdomain, domain, host, tld, port, \ credential, subdomain, domain, host, tld, port, \
resource_path, query_string, f1, f2, f3, f4 = x resource_path, query_string, f1, f2, f3, f4 = x
if f1 == "onion": domains_list.append(domain)
domains_list.append(domain)
# Saving the list of extracted onion domains. # Saving the list of extracted onion domains.
PST.__setattr__(channel, domains_list) PST.__setattr__(channel, domains_list)
@ -83,11 +84,8 @@ if __name__ == "__main__":
prec_filename = filename prec_filename = filename
else: else:
if h.redis_queue_shutdown():
print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.")
break
publisher.debug("Script url is Idling 10s") publisher.debug("Script url is Idling 10s")
print 'Sleeping'
time.sleep(10) time.sleep(10)
message = h.redis_rpop() message = p.get_from_set()

24
bin/QueueIn.py Executable file
View file

@ -0,0 +1,24 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
from pubsublogger import publisher
from Helper import Process
import argparse
def run(config_section):
p = Process(config_section)
p.populate_set_in()
if __name__ == '__main__':
publisher.port = 6380
publisher.channel = 'Queuing'
parser = argparse.ArgumentParser(description='Entry queue for a module.')
parser.add_argument("-c", "--config_section", type=str,
help="Config section to use in the config file.")
args = parser.parse_args()
run(args.config_section)

24
bin/QueueOut.py Executable file
View file

@ -0,0 +1,24 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
from pubsublogger import publisher
from Helper import Process
import argparse
def run(config_section):
p = Process(config_section)
if not p.publish():
print(config_section, 'has no publisher.')
if __name__ == '__main__':
publisher.port = 6380
publisher.channel = 'Queuing'
parser = argparse.ArgumentParser(description='Entry queue for a module.')
parser.add_argument("-c", "--config_section", type=str,
help="Config section to use in the config file.")
args = parser.parse_args()
run(args.config_section)

View file

@ -27,40 +27,28 @@ import time
from packages import Paste from packages import Paste
from pubsublogger import publisher from pubsublogger import publisher
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = 'PubSub_Longlines' config_section = 'Tokenize'
config_channel = 'channel_1' p = Process(config_section)
subscriber_name = 'tokenize'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
# Publisher
pub_config_section = 'PubSub_Words'
pub_config_channel = 'channel_0'
h.zmq_pub(pub_config_section, pub_config_channel)
# LOGGING # # LOGGING #
publisher.info("Tokeniser subscribed to channel {}".format(h.sub_channel)) publisher.info("Tokeniser started")
while True: while True:
message = h.redis_rpop() message = p.get_from_set()
print message print message
if message is not None: if message is not None:
paste = Paste.Paste(message.split(" ", -1)[-1]) paste = Paste.Paste(message)
for word, score in paste._get_top_words().items(): for word, score in paste._get_top_words().items():
if len(word) >= 4: if len(word) >= 4:
h.zmq_pub_send('{} {} {}'.format(paste.p_path, word, msg = '{} {} {}'.format(paste.p_path, word, score)
score)) p.populate_set_out(msg)
else: else:
if h.redis_queue_shutdown():
print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.")
break
publisher.debug("Tokeniser is idling 10s") publisher.debug("Tokeniser is idling 10s")
time.sleep(10) time.sleep(10)
print "sleepin" print "sleepin"

View file

@ -14,36 +14,32 @@ import socket
import pycountry import pycountry
import ipaddress import ipaddress
import Helper from Helper import Process
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = 'PubSub_Categ' config_section = 'Web'
config_channel = 'channel_3'
subscriber_name = "urls"
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name) p = Process(config_section)
# Publisher
pub_config_section = "PubSub_Url"
pub_config_channel = 'channel'
h.zmq_pub(pub_config_section, pub_config_channel)
# REDIS # # REDIS #
r_serv2 = redis.StrictRedis( r_serv2 = redis.StrictRedis(
host=h.config.get("Redis_Cache", "host"), host=p.config.get("Redis_Cache", "host"),
port=h.config.getint("Redis_Cache", "port"), port=p.config.getint("Redis_Cache", "port"),
db=h.config.getint("Redis_Cache", "db")) db=p.config.getint("Redis_Cache", "db"))
# Country to log as critical # Country to log as critical
cc_critical = h.config.get("PubSub_Url", "cc_critical") cc_critical = p.config.get("PubSub_Url", "cc_critical")
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script URL subscribed to channel web_categ") publisher.info("Script URL subscribed to channel web_categ")
message = h.redis_rpop() # FIXME For retro compatibility
channel = 'web_categ'
message = p.get_from_set()
prec_filename = None prec_filename = None
url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*" url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
@ -51,7 +47,7 @@ if __name__ == "__main__":
while True: while True:
try: try:
if message is not None: if message is not None:
channel, filename, word, score = message.split() filename, word, score = message.split()
if prec_filename is None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
domains_list = [] domains_list = []
@ -62,7 +58,7 @@ if __name__ == "__main__":
port, resource_path, query_string, f1, f2, f3, \ port, resource_path, query_string, f1, f2, f3, \
f4 = x f4 = x
domains_list.append(domain) domains_list.append(domain)
h.zmq_pub_send(str(x)) p.populate_set_out(x, 'Url')
publisher.debug('{} Published'.format(x)) publisher.debug('{} Published'.format(x))
if f1 == "onion": if f1 == "onion":
@ -110,13 +106,10 @@ if __name__ == "__main__":
prec_filename = filename prec_filename = filename
else: else:
if h.redis_queue_shutdown():
print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.")
break
publisher.debug("Script url is Idling 10s") publisher.debug("Script url is Idling 10s")
print 'Sleeping'
time.sleep(10) time.sleep(10)
message = h.redis_rpop() message = p.get_from_set()
except dns.exception.Timeout: except dns.exception.Timeout:
print "dns.exception.Timeout", A_values print "dns.exception.Timeout", A_values

View file

@ -1,37 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The ZMQ_Feed_Q Module
=====================
This module is the first of the ZMQ tree processing.
It's subscribe to a data stream and put the received messages
into a Redis-list waiting to be popped later by others scripts
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
Requirements
------------
*Need running Redis instances.
*Messages from the stream should be formated as follow:
"channel_name"+" "+/path/to/the/paste.gz+" "base64_data_encoded_paste"
"""
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.channel = "Queuing"
config_section = 'Feed'
config_channel = 'topicfilter'
subscriber_name = 'feed'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -1,35 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The ZMQ_PubSub_Categ_Q Module
============================
This module subscribe to a Publisher stream and put the received messages
into a Redis-list waiting to be popped later by others scripts.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
Requirements
------------
*Running Redis instances.
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
"""
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = 'Queuing'
config_section = 'PubSub_Words'
config_channel = 'channel_0'
subscriber_name = 'categ'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -1,34 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The ZMQ_PubSub_Line_Q Module
============================
This module subscribe to a Publisher stream and put the received messages
into a Redis-list waiting to be popped later by others scripts.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
Requirements
------------
*Running Redis instances.
*Should register to the Publisher "ZMQ_Feed"
"""
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Queuing"
config_section = "PubSub_Global"
config_channel = 'channel'
subscriber_name = 'line'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -1,35 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The ZMQ_PubSub_Tokenize_Q Module
============================
This module subscribe to a Publisher stream and put the received messages
into a Redis-list waiting to be popped later by others scripts.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
Requirements
------------
*Running Redis instances.
*Should register to the Publisher "ZMQ_PubSub_Line" channel 1
"""
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Queuing"
config_section = 'PubSub_Longlines'
config_channel = 'channel_1'
subscriber_name = 'tokenize'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -1,54 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The ZMQ_Pub_Global Module
=========================
This module is consuming the Redis-list created by the script ./Dir.py.
This module is as the same level of the ZMQ tree than the Module ZMQ_Feed
Whereas the ZMQ_Feed is poping the list created in redis by ZMQ_Feed_Q which is
listening a stream, ZMQ_Pub_Global is poping the list created in redis by
./Dir.py.
Thanks to this Module there is now two way to Feed the ZMQ tree:
*By a continuous stream ..seealso:: ZMQ_Feed Module
*Manually with this module and ./Dir.py script.
Requirements
------------
*Need running Redis instances. (Redis)
"""
import time
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Global"
config_section = 'PubSub_Global'
config_channel = 'channel'
subscriber_name = 'global'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
# Publisher
pub_config_section = 'PubSub_Global'
pub_config_channel = 'channel'
h.zmq_pub(pub_config_section, pub_config_channel)
# LOGGING #
publisher.info("Starting to publish.")
while True:
filename = h.redis_rpop()
if filename is not None:
h.zmq_pub_send(filename)
else:
time.sleep(10)
publisher.debug("Nothing to publish")

View file

@ -1,35 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The ZMQ_Sub_Attributes_Q Module
============================
This module subscribe to a Publisher stream and put the received messages
into a Redis-list waiting to be popped later by others scripts.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
Requirements
------------
*Running Redis instances.
*Should register to the Publisher "ZMQ_Feed"
"""
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Queuing"
config_section = 'PubSub_Global'
config_channel = 'channel'
subscriber_name = 'attributes'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -1,18 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Queuing"
config_section = 'PubSub_Categ'
config_channel = 'channel_0'
subscriber_name = 'cards'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -1,35 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The ZMQ_Sub_Curve_Q Module
============================
This module subscribe to a Publisher stream and put the received messages
into a Redis-list waiting to be popped later by others scripts.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
Requirements
------------
*Running Redis instances.
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
"""
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Queuing"
config_section = 'PubSub_Words'
config_channel = 'channel_0'
subscriber_name = 'curve'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -1,17 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = 'Queuing'
config_section = 'PubSub_Global'
config_channel = 'channel'
subscriber_name = 'duplicate'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -1,29 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The ZMQ_Sub_Indexer_Q Module
============================
The ZMQ_Sub_Indexer_Q module subscribes to PubSub_Global ZMQ channel
and bufferizes the data in a Redis FIFO.
The FIFO will be then processed by the Indexer scripts (ZMQ_Sub_Indexer)
handling the indexing process of the files seen.
"""
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Queuing"
config_section = 'PubSub_Global'
config_channel = 'channel'
subscriber_name = 'indexer'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -1,17 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Queuing"
config_section = 'PubSub_Categ'
config_channel = 'channel_1'
subscriber_name = 'emails'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -1,34 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The ZMQ_Sub_Onion_Q Module
============================
This module subscribe to a Publisher stream and put the received messages
into a Redis-list waiting to be popped later by ZMQ_Sub_Onion.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them (here "tor")
Requirements
------------
*Running Redis instances.
*Should register to the Publisher "ZMQ_PubSub_Categ"
"""
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Queuing"
config_section = 'PubSub_Categ'
config_channel = 'channel_2'
subscriber_name = 'tor'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -1,18 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
from pubsublogger import publisher
import Helper
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Queuing"
config_section = 'PubSub_Categ'
config_channel = 'channel_3'
subscriber_name = 'urls'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.redis_queue_subscribe(publisher)

View file

@ -18,7 +18,8 @@ db = 0
[Redis_Queues] [Redis_Queues]
host = localhost host = localhost
port = 6381 port = 6381
db = 0 db_sub = 0
db_pub = 1
[Redis_Data_Merging] [Redis_Data_Merging]
host = localhost host = localhost
@ -71,3 +72,18 @@ cc_critical = DE
[Indexer] [Indexer]
type = whoosh type = whoosh
path = indexdir path = indexdir
###############################################################################
[ZMQ_Global]
address = tcp://crf.circl.lu:5556
channel = 102
[ZMQ_Url]
address = tcp://127.0.0.1:5004
channel = urls
[RedisPubSub]
host = localhost
port = 6381
db = 0

41
bin/packages/modules.cfg Normal file
View file

@ -0,0 +1,41 @@
[Global]
subscribe = ZMQ_Global
publish = Redis_Global
[Duplicates]
subscribe = Redis_Global
[Indexer]
subscribe = Redis_Global
[Attributes]
subscribe = Redis_Global
[Lines]
subscribe = Redis_Global
publish = Redis_LinesShort,Redis_LinesLong
[Tokenize]
subscribe = Redis_LinesShort
publish = Redis_Words
[Curve]
subscribe = Redis_Words
[Categ]
subscribe = Redis_Words
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web
[CreditCards]
subscribe = Redis_CreditCards
[Mail]
subscribe = Redis_Mail
[Onion]
subscribe = Redis_Onion
#publish = Redis_Global
[Web]
subscribe = Redis_Web
publish = Redis_Url,ZMQ_Url

22
bin/run_modules.py Executable file
View file

@ -0,0 +1,22 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
import ConfigParser
import os
import subprocess
if __name__ == '__main__':
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/modules.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
config = ConfigParser.ConfigParser()
config.read(configfile)
modules = config.sections()
for module in modules:
subprocess.Popen(["python", './QueueIn.py', '-c', module])
subprocess.Popen(["python", './QueueOut.py', '-c', module])
#subprocess.Popen(["python", './{}.py'.format(module)])

View file

@ -1,6 +0,0 @@
isFinite
isNaN
eval
var
parseFloat
javascript

View file

@ -1,6 +0,0 @@
../files/mails_categ
../files/creditcard_categ
../files/pass_categ
../files/web_categ
../files/javascript_categ
../files/onion_categ

View file

@ -1,23 +0,0 @@
123456
password
12345678
qwerty
abc123
123456789
1234567
iloveyou
adobe123
123123
admin
1234567890
letmein
photoshop
1234
monkey
shadow
sunshine
12345
password1
princess
azerty
trustno1

View file

@ -44,7 +44,8 @@ def event_stream():
def get_queues(r): def get_queues(r):
# We may want to put the llen in a pipeline to do only one query. # We may want to put the llen in a pipeline to do only one query.
return [(queue, r.llen(queue)) for queue in r.smembers("queues")] return [(queue, int(card)) for queue, card in
r.hgetall("queues").iteritems()]
@app.route("/_logs") @app.route("/_logs")
@ -54,6 +55,7 @@ def logs():
@app.route("/_stuff", methods=['GET']) @app.route("/_stuff", methods=['GET'])
def stuff(): def stuff():
print get_queues(r_serv)
return jsonify(row1=get_queues(r_serv)) return jsonify(row1=get_queues(r_serv))

View file

@ -158,14 +158,14 @@ $(document).ready(function () {
var x = new Date(); var x = new Date();
for (i = 0; i < glob_tabvar.row1.length; i++){ for (i = 0; i < glob_tabvar.row1.length; i++){
if (glob_tabvar.row1[i][0].substring(0,4) != "word"){ if (glob_tabvar.row1[i][0] == 'Categ' || glob_tabvar.row1[i][0] == 'Curve'){
tmp_tab.push(0);
curves_labels.push(glob_tabvar.row1[i][0]);
}
else {
tmp_tab2.push(0); tmp_tab2.push(0);
curves_labels2.push(glob_tabvar.row1[i][0]); curves_labels2.push(glob_tabvar.row1[i][0]);
} }
else {
tmp_tab.push(0);
curves_labels.push(glob_tabvar.row1[i][0]);
}
} }
tmp_tab.unshift(x); tmp_tab.unshift(x);
tmp_tab2.unshift(x); tmp_tab2.unshift(x);
@ -225,11 +225,11 @@ $(document).ready(function () {
for (i = 0; i < (glob_tabvar.row1).length; i++){ for (i = 0; i < (glob_tabvar.row1).length; i++){
if (glob_tabvar.row1[i][0].substring(0,4) != "word"){ if (glob_tabvar.row1[i][0] == 'Categ' || glob_tabvar.row1[i][0] == 'Curve'){
tmp_values.push(glob_tabvar.row1[i][1]); tmp_values2.push(glob_tabvar.row1[i][1]);
} }
else { else {
tmp_values2.push(glob_tabvar.row1[i][1]); tmp_values.push(glob_tabvar.row1[i][1]);
} }
} }
tmp_values.unshift(x); tmp_values.unshift(x);