Improve the cleanup. Still some to do.

This commit is contained in:
Raphaël Vinot 2014-08-19 19:07:07 +02:00
parent f1753d67c6
commit 45b0bf3983
21 changed files with 169 additions and 344 deletions

3
.gitignore vendored
View file

@ -4,3 +4,6 @@
AILENV AILENV
redis-leveldb redis-leveldb
redis redis
# Local config
bin/packages/config.cfg

View file

@ -10,52 +10,64 @@ into a Redis-list waiting to be popped later by others scripts.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put ..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them. the same Subscriber name in both of them.
Requirements
------------
*Running Redis instances.
*Should register to the Publisher "ZMQ_PubSub_Line" channel 1
""" """
import redis import redis
import ConfigParser import ConfigParser
import os import os
from packages import ZMQ_PubSub import zmq
class Queues(object): class Redis_Queues(object):
def __init__(self): def __init__(self, zmq_conf_section, zmq_conf_channel, subscriber_name):
configfile = os.join(os.environ('AIL_BIN'), 'packages/config.cfg') configfile = os.path.join(os.environ('AIL_BIN'), 'packages/config.cfg')
if not os.exists(configfile): if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. Did you set environment variables? Or activate the virtualenv.') raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
self.config = ConfigParser.ConfigParser() self.config = ConfigParser.ConfigParser()
self.config.read(self.configfile) self.config.read(configfile)
self.subscriber_name = subscriber_name
def _queue_init_redis(self): # ZMQ subscriber
self.sub_channel = self.config.get(zmq_conf_section, zmq_conf_channel)
sub_address = self.config.get(zmq_conf_section, 'adress')
context = zmq.Context()
self.sub_socket = context.socket(zmq.SUB)
self.sub_socket.connect(sub_address)
self.sub_socket.setsockopt(zmq.SUBSCRIBE, self.sub_channel)
# Redis Queue
config_section = "Redis_Queues" config_section = "Redis_Queues"
self.r_queues = redis.StrictRedis( self.r_queues = redis.StrictRedis(
host=self.config.get(config_section, "host"), host=self.config.get(config_section, "host"),
port=self.config.getint(config_section, "port"), port=self.config.getint(config_section, "port"),
db=self.config.getint(config_section, "db")) db=self.config.getint(config_section, "db"))
self.redis_channel = self.sub_channel + subscriber_name
def _queue_shutdown(self): def zmq_pub(self, config_section):
# FIXME: Why not just a key? # FIXME: should probably go somewhere else
if self.r_queues.sismember("SHUTDOWN_FLAGS", "Feed_Q"): context = zmq.Context()
self.r_queues.srem("SHUTDOWN_FLAGS", "Feed_Q") self.pub_socket = context.socket(zmq.PUB)
return True self.pub_socket.bind(self.config.get(config_section, 'adress'))
return False
def queue_subscribe(self, publisher, config_section, channel, def redis_queue_shutdown(self, is_queue=False):
subscriber_name): if is_queue:
channel = self.config.get(config_section, channel) flag = self.subscriber_name + '_Q'
zmq_sub = ZMQ_PubSub.ZMQSub(self.config, config_section, else:
channel, subscriber_name) flag = self.subscriber_name
publisher.info("""Suscribed to channel {}""".format(channel)) # srem returns False if the element does not exists
self._queue_init_redis() return self.r_queues.srem('SHUTDOWN_FLAGS', flag)
def redis_queue_subscribe(self, publisher):
publisher.info("Suscribed to channel {}".format(self.sub_channel))
while True: while True:
zmq_sub.get_and_lpush(self.r_queues) msg = self.sub_socket.recv()
if self._queues_shutdown(): p = self.r_queues.pipeline()
p.sadd("queues", self.redis_channel)
p.lpush(self.redis_channel, msg)
p.execute()
if self.redis_queue_shutdown(True):
print "Shutdown Flag Up: Terminating" print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.") publisher.warning("Shutdown Flag Up: Terminating.")
break break

View file

@ -20,50 +20,34 @@ Requirements
*Need the ZMQ_Feed_Q Module running to be able to work properly. *Need the ZMQ_Feed_Q Module running to be able to work properly.
""" """
import redis
import ConfigParser
import base64 import base64
import os import os
import time import time
from pubsublogger import publisher from pubsublogger import publisher
from packages import ZMQ_PubSub
configfile = './packages/config.cfg' import Helper
def main(): if __name__ == "__main__":
"""Main Function""" publisher.channel = "Script"
# CONFIG # config_section = 'Feed'
cfg = ConfigParser.ConfigParser() config_channel = 'topicfilter'
cfg.read(configfile) subscriber_name = 'feed'
# REDIS h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
r_serv = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
# ZMQ #
channel = cfg.get("Feed", "topicfilter")
# Subscriber
subscriber_name = "feed"
subscriber_config_section = "Feed"
# Publisher # Publisher
publisher_name = "pubfed" pub_config_section = "PubSub_Global"
publisher_config_section = "PubSub_Global" h.zmq_pub(pub_config_section)
pub_channel = h.config.get(pub_config_section, "channel")
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
PubGlob = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
# LOGGING # # LOGGING #
publisher.channel = "Script"
publisher.info("Feed Script started to receive & publish.") publisher.info("Feed Script started to receive & publish.")
while True: while True:
message = Sub.get_msg_from_queue(r_serv) message = h.r_queues.rpop(h.sub_channel + h.subscriber_name)
# Recovering the streamed message informations. # Recovering the streamed message informations.
if message is not None: if message is not None:
if len(message.split()) == 3: if len(message.split()) == 3:
@ -75,8 +59,7 @@ def main():
publisher.debug("Empty Paste: {0} not processed".format(paste)) publisher.debug("Empty Paste: {0} not processed".format(paste))
continue continue
else: else:
if r_serv.sismember("SHUTDOWN_FLAGS", "Feed"): if h.redis_queue_shutdown():
r_serv.srem("SHUTDOWN_FLAGS", "Feed")
print "Shutdown Flag Up: Terminating" print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.") publisher.warning("Shutdown Flag Up: Terminating.")
break break
@ -84,24 +67,13 @@ def main():
time.sleep(10) time.sleep(10)
continue continue
# Creating the full filepath # Creating the full filepath
filename = cfg.get("Directories", "pastes") + paste filename = os.path.join(os.environ('AIL_BIN'),
h.config.get("Directories", "pastes"), paste)
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
if not os.path.exists(filename.rsplit("/", 1)[0]): with open(filename, 'wb') as f:
os.makedirs(filename.rsplit("/", 1)[0]) f.write(base64.standard_b64decode(gzip64encoded))
else:
# Path already existing
pass
decoded_gzip = base64.standard_b64decode(gzip64encoded) h.pub_socket.send('{} {}'.format(pub_channel, filename))
# paste, zlib.decompress(decoded_gzip, zlib.MAX_WBITS|16)
with open(filename, 'wb') as F:
F.write(decoded_gzip)
msg = cfg.get("PubSub_Global", "channel")+" "+filename
PubGlob.send_message(msg)
publisher.debug("{0} Published".format(msg))
if __name__ == "__main__":
main()

View file

@ -33,5 +33,5 @@ if __name__ == "__main__":
config_channel = 'topicfilter' config_channel = 'topicfilter'
subscriber_name = 'feed' subscriber_name = 'feed'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -36,111 +36,78 @@ Requirements
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. *Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
""" """
import redis import glob
import os
import argparse import argparse
import ConfigParser
import time import time
from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
from packages import Paste from packages import Paste
configfile = './packages/config.cfg' import Helper
if __name__ == "__main__":
publisher.channel = "Script"
def main(): # Publisher
"""Main Function""" pub_config_section = 'PubSub_Categ'
# CONFIG # config_section = 'PubSub_Words'
cfg = ConfigParser.ConfigParser() config_channel = 'channel_0'
cfg.read(configfile) subscriber_name = 'pubcateg'
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.zmq_pub(pub_config_section)
# SCRIPT PARSER # # SCRIPT PARSER #
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='''This script is a part of the Analysis Information Leak framework.''', description='This script is a part of the Analysis Information \
epilog='''''') Leak framework.')
parser.add_argument( parser.add_argument(
'-l', type=str, default="../files/list_categ_files", '-d', type=str, default="../files/",
help='Path to the list_categ_files (../files/list_categ_files)', help='Path to the directory containing the category files.',
action='store') action='store')
args = parser.parse_args() args = parser.parse_args()
# REDIS #
r_serv = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
# LOGGING #
publisher.channel = "Script"
# ZMQ #
channel = cfg.get("PubSub_Words", "channel_0")
subscriber_name = "categ"
subscriber_config_section = "PubSub_Words"
publisher_name = "pubcateg"
publisher_config_section = "PubSub_Categ"
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel,
subscriber_name)
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section,
publisher_name)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script Categ subscribed to channel {0}".format( publisher.info(
cfg.get("PubSub_Words", "channel_0"))) "Script Categ subscribed to channel {}".format(h.sub_channel))
with open(args.l, 'rb') as L: tmp_dict = {}
tmp_dict = {} for filename in glob.glob(args.d):
bname = os.path.basename(filename)
tmp_dict[bname] = []
with open(filename, 'r') as f:
for l in f:
tmp_dict[bname].append(l.strip())
for num, fname in enumerate(L):
# keywords temp list
tmp_list = []
with open(fname[:-1], 'rb') as LS:
for num, kword in enumerate(LS):
tmp_list.append(kword[:-1])
tmp_dict[fname.split('/')[-1][:-1]] = tmp_list
message = sub.get_msg_from_queue(r_serv)
prec_filename = None prec_filename = None
while True: while True:
message = h.r_queues.rpop(h.sub_channel + h.subscriber_name)
if message is not None: if message is not None:
channel, filename, word, score = message.split() channel, filename, word, score = message.split()
if prec_filename is None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
PST = Paste.Paste(filename) PST = Paste.Paste(filename)
prec_filename = filename
prec_filename = filename for categ, words_list in tmp_dict.items():
for categ, list in tmp_dict.items(): if word.lower() in words_list:
h.pub_socket.send('{} {} {} {}'.format(
if word.lower() in list: categ, PST.p_path, word, score))
channel = categ
msg = channel+" "+PST.p_path+" "+word+" "+score
pub.send_message(msg)
# dico_categ.add(categ)
publisher.info( publisher.info(
'Categ;{};{};{};Detected {} "{}"'.format( 'Categ;{};{};{};Detected {} "{}"'.format(
PST.p_source, PST.p_date, PST.p_name, score, word)) PST.p_source, PST.p_date, PST.p_name, score, word))
else: else:
if r_serv.sismember("SHUTDOWN_FLAGS", "Categ"): if h.redis_queue_shutdown():
r_serv.srem("SHUTDOWN_FLAGS", "Categ")
print "Shutdown Flag Up: Terminating" print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.") publisher.warning("Shutdown Flag Up: Terminating.")
break break
publisher.debug("Script Categ is Idling 10s") publisher.debug("Script Categ is Idling 10s")
time.sleep(10) time.sleep(10)
message = sub.get_msg_from_queue(r_serv)
if __name__ == "__main__":
main()

View file

@ -30,5 +30,5 @@ if __name__ == "__main__":
config_channel = 'channel_0' config_channel = 'channel_0'
subscriber_name = 'categ' subscriber_name = 'categ'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -29,5 +29,5 @@ if __name__ == "__main__":
config_channel = 'channel' config_channel = 'channel'
subscriber_name = 'line' subscriber_name = 'line'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -4,9 +4,11 @@
The ZMQ_PubSub_Lines Module The ZMQ_PubSub_Lines Module
============================ ============================
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q Module. This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
Module.
It tokenize the content of the paste and publish the result in the following format: It tokenize the content of the paste and publish the result in the following
format:
channel_name+' '+/path/of/the/paste.gz+' '+tokenized_word+' '+scoring channel_name+' '+/path/of/the/paste.gz+' '+tokenized_word+' '+scoring
..seealso:: Paste method (_get_top_words) ..seealso:: Paste method (_get_top_words)
@ -21,72 +23,45 @@ Requirements
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. *Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
""" """
import redis
import ConfigParser
import time import time
from packages import Paste from packages import Paste
from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
configfile = './packages/config.cfg' import Helper
if __name__ == "__main__":
def main():
"""Main Function"""
# CONFIG #
cfg = ConfigParser.ConfigParser()
cfg.read(configfile)
# REDIS #
r_serv = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
# LOGGING #
publisher.channel = "Script" publisher.channel = "Script"
# ZMQ #
channel = cfg.get("PubSub_Longlines", "channel_1")
subscriber_name = "tokenize"
subscriber_config_section = "PubSub_Longlines"
# Publisher # Publisher
publisher_config_section = "PubSub_Words" pub_config_section = 'PubSub_Words'
publisher_name = "pubtokenize"
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) config_section = 'PubSub_Longlines'
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) config_channel = 'channel_1'
subscriber_name = 'tokenize'
channel_0 = cfg.get("PubSub_Words", "channel_0") h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
# FUNCTIONS # h.zmq_pub(pub_config_section)
publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1"))) pub_channel = h.config.get(pub_config_section, "channel_0")
# LOGGING #
publisher.info("Tokeniser subscribed to channel {}".format(h.sub_channel))
while True: while True:
message = sub.get_msg_from_queue(r_serv) message = h.r_queues.rpop(h.sub_channel + h.subscriber_name)
print message print message
if message is not None: if message is not None:
PST = Paste.Paste(message.split(" ", -1)[-1]) paste = Paste.Paste(message.split(" ", -1)[-1])
for word, score in paste._get_top_words().items():
if len(word) >= 4:
h.pub_socket.send(
'{} {} {} {}'.format(pub_channel, paste.p_path,
word, score))
else: else:
if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"): if h.redis_queue_shutdown():
r_serv.srem("SHUTDOWN_FLAGS", "Tokenize")
print "Shutdown Flag Up: Terminating" print "Shutdown Flag Up: Terminating"
publisher.warning("Shutdown Flag Up: Terminating.") publisher.warning("Shutdown Flag Up: Terminating.")
break break
publisher.debug("Tokeniser is idling 10s") publisher.debug("Tokeniser is idling 10s")
time.sleep(10) time.sleep(10)
print "sleepin" print "sleepin"
continue
for word, score in PST._get_top_words().items():
if len(word) >= 4:
msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score)
pub.send_message(msg)
print msg
else:
pass
if __name__ == "__main__":
main()

View file

@ -30,5 +30,5 @@ if __name__ == "__main__":
config_channel = 'channel_1' config_channel = 'channel_1'
subscriber_name = 'tokenize' subscriber_name = 'tokenize'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -30,5 +30,5 @@ if __name__ == "__main__":
config_channel = 'channel' config_channel = 'channel'
subscriber_name = 'attributes' subscriber_name = 'attributes'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -13,5 +13,5 @@ if __name__ == "__main__":
config_channel = 'channel_0' config_channel = 'channel_0'
subscriber_name = 'creditcard_categ' subscriber_name = 'creditcard_categ'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -30,5 +30,5 @@ if __name__ == "__main__":
config_channel = 'channel_0' config_channel = 'channel_0'
subscriber_name = 'curve' subscriber_name = 'curve'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -12,5 +12,5 @@ if __name__ == "__main__":
config_channel = 'channel' config_channel = 'channel'
subscriber_name = 'duplicate' subscriber_name = 'duplicate'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -9,38 +9,37 @@ The ZMQ_Sub_Indexer modules is fetching the list of files to be processed
and index each file with a full-text indexer (Whoosh until now). and index each file with a full-text indexer (Whoosh until now).
""" """
import redis
import ConfigParser
import time import time
from packages import Paste from packages import Paste
from packages import ZMQ_PubSub
from pubsublogger import publisher from pubsublogger import publisher
from whoosh.index import create_in, exists_in, open_dir from whoosh.index import create_in, exists_in, open_dir
from whoosh.fields import Schema, TEXT, ID from whoosh.fields import Schema, TEXT, ID
import os import os
configfile = './packages/config.cfg' import Helper
def main(): if __name__ == "__main__":
"""Main Function""" publisher.channel = "Script"
# CONFIG # # Subscriber
cfg = ConfigParser.ConfigParser() sub_config_section = 'PubSub_Global'
cfg.read(configfile) sub_name = 'indexer'
# Redis config_section = 'PubSub_Global'
r_serv1 = redis.StrictRedis( config_channel = 'channel'
host=cfg.get("Redis_Queues", "host"), subscriber_name = 'indexer'
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db")) h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
# Indexer configuration - index dir and schema setup # Indexer configuration - index dir and schema setup
indexpath = cfg.get("Indexer", "path") indexpath = h.config.get("Indexer", "path")
indexertype = cfg.get("Indexer", "type") indexertype = h.config.get("Indexer", "type")
if indexertype == "whoosh": if indexertype == "whoosh":
schema = Schema(title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT) schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
unique=True),
content=TEXT)
if not os.path.exists(indexpath): if not os.path.exists(indexpath):
os.mkdir(indexpath) os.mkdir(indexpath)
if not exists_in(indexpath): if not exists_in(indexpath):
@ -49,29 +48,16 @@ def main():
ix = open_dir(indexpath) ix = open_dir(indexpath)
# LOGGING # # LOGGING #
publisher.channel = "Script"
# ZMQ #
# Subscriber
channel = cfg.get("PubSub_Global", "channel")
subscriber_name = "indexer"
subscriber_config_section = "PubSub_Global"
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
# FUNCTIONS #
publisher.info("""ZMQ Indexer is Running""") publisher.info("""ZMQ Indexer is Running""")
while True: while True:
try: try:
message = sub.get_msg_from_queue(r_serv1) message = h.r_queues.rpop(h.sub_channel + h.subscriber_name)
if message is not None: if message is not None:
PST = Paste.Paste(message.split(" ", -1)[-1]) PST = Paste.Paste(message.split(" ", -1)[-1])
else: else:
if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"): if h.redis_queue_shutdown():
r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
publisher.warning("Shutdown Flag Up: Terminating.")
break break
publisher.debug("Script Indexer is idling 10s") publisher.debug("Script Indexer is idling 10s")
time.sleep(1) time.sleep(1)
@ -88,9 +74,5 @@ def main():
indexwriter.commit() indexwriter.commit()
except IOError: except IOError:
print "CRC Checksum Failed on :", PST.p_path print "CRC Checksum Failed on :", PST.p_path
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(PST.p_source, PST.p_date, PST.p_name)) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
pass PST.p_source, PST.p_date, PST.p_name))
if __name__ == "__main__":
main()

View file

@ -24,5 +24,5 @@ if __name__ == "__main__":
config_channel = 'channel' config_channel = 'channel'
subscriber_name = 'indexer' subscriber_name = 'indexer'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -12,5 +12,5 @@ if __name__ == "__main__":
config_channel = 'channel_1' config_channel = 'channel_1'
subscriber_name = 'mails_categ' subscriber_name = 'mails_categ'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -29,5 +29,5 @@ if __name__ == "__main__":
config_channel = 'channel_2' config_channel = 'channel_2'
subscriber_name = 'onion_categ' subscriber_name = 'onion_categ'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -13,5 +13,5 @@ if __name__ == "__main__":
config_channel = 'channel_3' config_channel = 'channel_3'
subscriber_name = 'web_categ' subscriber_name = 'web_categ'
h = Helper.Queues() h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name) h.redis_queue_subscribe(publisher)

View file

@ -28,7 +28,6 @@ class PubSub(object):
""" """
def __init__(self, config, log_channel, ps_name): def __init__(self, config, log_channel, ps_name):
self._ps_name = ps_name self._ps_name = ps_name
self._config_parser = config self._config_parser = config
self._context_zmq = zmq.Context() self._context_zmq = zmq.Context()
@ -60,9 +59,8 @@ class ZMQPub(PubSub):
def __init__(self, config, pub_config_section, ps_name): def __init__(self, config, pub_config_section, ps_name):
super(ZMQPub, self).__init__(config, "Default", ps_name) super(ZMQPub, self).__init__(config, "Default", ps_name)
self._pub_config_section = pub_config_section
self._pubsocket = self._context_zmq.socket(zmq.PUB) self._pubsocket = self._context_zmq.socket(zmq.PUB)
self._pub_adress = self._config_parser.get(self._pub_config_section, "adress") self._pub_adress = self._config_parser.get(pub_config_section, "adress")
self._pubsocket.bind(self._pub_adress) self._pubsocket.bind(self._pub_adress)
@ -117,33 +115,14 @@ class ZMQSub(PubSub):
def __init__(self, config, sub_config_section, channel, ps_name): def __init__(self, config, sub_config_section, channel, ps_name):
super(ZMQSub, self).__init__(config, "Default", ps_name) super(ZMQSub, self).__init__(config, "Default", ps_name)
self._sub_config_section = sub_config_section
self._subsocket = self._context_zmq.socket(zmq.SUB) self._subsocket = self._context_zmq.socket(zmq.SUB)
self._sub_adress = self._config_parser.get(self._sub_config_section, "adress") self._sub_adress = self._config_parser.get(sub_config_section, "adress")
self._subsocket.connect(self._sub_adress) self._subsocket.connect(self._sub_adress)
self._channel = channel self._channel = channel
self._subsocket.setsockopt(zmq.SUBSCRIBE, self._channel) self._subsocket.setsockopt(zmq.SUBSCRIBE, self._channel)
def get_message(self):
"""
Get the first sent message from a Publisher.
:return: (str) Message from Publisher
"""
return self._subsocket.recv()
def get_and_lpush(self, r_serv):
"""
Get the first sent message from a Publisher and storing it in redis
..note:: This function also create a set named "queue" for monitoring needs
"""
r_serv.sadd("queues", self._channel+self._ps_name)
r_serv.lpush(self._channel+self._ps_name, self._subsocket.recv())
def get_msg_from_queue(self, r_serv): def get_msg_from_queue(self, r_serv):
""" """
Get the first sent message from a Redis List Get the first sent message from a Redis List

View file

@ -1,65 +0,0 @@
[Directories]
bloomfilters = /home/user/Blooms/
pastes = /home/user/PASTES/
##### Redis #####
[Redis_Cache]
host = localhost
port = 6379
db = 0
[Redis_Log]
host = localhost
port = 6380
db = 0
[Redis_Queues]
host = localhost
port = 6381
db = 0
[Redis_Data_Merging]
host = localhost
port = 6379
db = 1
##### LevelDB #####
[Redis_Level_DB]
host = localhost
port = 2013
db = 0
[Redis_Level_DB_Hashs]
host = localhost
port = 2013
db = 1
# PUB / SUB : ZMQ
[Feed]
adress = tcp://crf.circl.lu:5556
topicfilter = 102
[PubSub_Global]
adress = tcp://127.0.0.1:5000
channel = filelist
[PubSub_Longlines]
adress = tcp://127.0.0.1:5001
channel_0 = Longlines
channel_1 = Shortlines
[PubSub_Words]
adress = tcp://127.0.0.1:5002
channel_0 = words
[PubSub_Categ]
adress = tcp://127.0.0.1:5003
channel_0 = cards
channel_1 = emails
channel_2 = tor
channel_3 = urls
#Channels are dynamic (1 channel per categ) <= FIXME: no it's not.
[PubSub_Url]
adress = tcp://127.0.0.1:5004
channel = urls

View file

@ -1,6 +1,6 @@
[Directories] [Directories]
bloomfilters = /home/user/Blooms/ bloomfilters = /home/user/Blooms/
pastes = /home/user/PASTES/ pastes = PASTES
wordtrending_csv = /home/user/AIL/var/www/static/csv/wordstrendingdata wordtrending_csv = /home/user/AIL/var/www/static/csv/wordstrendingdata
wordsfile = /home/user/AIL/files/wordfile wordsfile = /home/user/AIL/files/wordfile