mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
Improve the cleanup. Still some to do.
This commit is contained in:
parent
f1753d67c6
commit
45b0bf3983
21 changed files with 169 additions and 344 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -4,3 +4,6 @@
|
||||||
AILENV
|
AILENV
|
||||||
redis-leveldb
|
redis-leveldb
|
||||||
redis
|
redis
|
||||||
|
|
||||||
|
# Local config
|
||||||
|
bin/packages/config.cfg
|
||||||
|
|
|
@ -10,52 +10,64 @@ into a Redis-list waiting to be popped later by others scripts.
|
||||||
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
||||||
the same Subscriber name in both of them.
|
the same Subscriber name in both of them.
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
*Running Redis instances.
|
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Line" channel 1
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
import redis
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
import os
|
import os
|
||||||
from packages import ZMQ_PubSub
|
import zmq
|
||||||
|
|
||||||
|
|
||||||
class Queues(object):
|
class Redis_Queues(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, zmq_conf_section, zmq_conf_channel, subscriber_name):
|
||||||
configfile = os.join(os.environ('AIL_BIN'), 'packages/config.cfg')
|
configfile = os.path.join(os.environ('AIL_BIN'), 'packages/config.cfg')
|
||||||
if not os.exists(configfile):
|
if not os.path.exists(configfile):
|
||||||
raise Exception('Unable to find the configuration file. Did you set environment variables? Or activate the virtualenv.')
|
raise Exception('Unable to find the configuration file. \
|
||||||
|
Did you set environment variables? \
|
||||||
|
Or activate the virtualenv.')
|
||||||
self.config = ConfigParser.ConfigParser()
|
self.config = ConfigParser.ConfigParser()
|
||||||
self.config.read(self.configfile)
|
self.config.read(configfile)
|
||||||
|
self.subscriber_name = subscriber_name
|
||||||
|
|
||||||
def _queue_init_redis(self):
|
# ZMQ subscriber
|
||||||
|
self.sub_channel = self.config.get(zmq_conf_section, zmq_conf_channel)
|
||||||
|
sub_address = self.config.get(zmq_conf_section, 'adress')
|
||||||
|
context = zmq.Context()
|
||||||
|
self.sub_socket = context.socket(zmq.SUB)
|
||||||
|
self.sub_socket.connect(sub_address)
|
||||||
|
self.sub_socket.setsockopt(zmq.SUBSCRIBE, self.sub_channel)
|
||||||
|
|
||||||
|
# Redis Queue
|
||||||
config_section = "Redis_Queues"
|
config_section = "Redis_Queues"
|
||||||
self.r_queues = redis.StrictRedis(
|
self.r_queues = redis.StrictRedis(
|
||||||
host=self.config.get(config_section, "host"),
|
host=self.config.get(config_section, "host"),
|
||||||
port=self.config.getint(config_section, "port"),
|
port=self.config.getint(config_section, "port"),
|
||||||
db=self.config.getint(config_section, "db"))
|
db=self.config.getint(config_section, "db"))
|
||||||
|
self.redis_channel = self.sub_channel + subscriber_name
|
||||||
|
|
||||||
def _queue_shutdown(self):
|
def zmq_pub(self, config_section):
|
||||||
# FIXME: Why not just a key?
|
# FIXME: should probably go somewhere else
|
||||||
if self.r_queues.sismember("SHUTDOWN_FLAGS", "Feed_Q"):
|
context = zmq.Context()
|
||||||
self.r_queues.srem("SHUTDOWN_FLAGS", "Feed_Q")
|
self.pub_socket = context.socket(zmq.PUB)
|
||||||
return True
|
self.pub_socket.bind(self.config.get(config_section, 'adress'))
|
||||||
return False
|
|
||||||
|
|
||||||
def queue_subscribe(self, publisher, config_section, channel,
|
def redis_queue_shutdown(self, is_queue=False):
|
||||||
subscriber_name):
|
if is_queue:
|
||||||
channel = self.config.get(config_section, channel)
|
flag = self.subscriber_name + '_Q'
|
||||||
zmq_sub = ZMQ_PubSub.ZMQSub(self.config, config_section,
|
else:
|
||||||
channel, subscriber_name)
|
flag = self.subscriber_name
|
||||||
publisher.info("""Suscribed to channel {}""".format(channel))
|
# srem returns False if the element does not exists
|
||||||
self._queue_init_redis()
|
return self.r_queues.srem('SHUTDOWN_FLAGS', flag)
|
||||||
|
|
||||||
|
def redis_queue_subscribe(self, publisher):
|
||||||
|
publisher.info("Suscribed to channel {}".format(self.sub_channel))
|
||||||
while True:
|
while True:
|
||||||
zmq_sub.get_and_lpush(self.r_queues)
|
msg = self.sub_socket.recv()
|
||||||
if self._queues_shutdown():
|
p = self.r_queues.pipeline()
|
||||||
|
p.sadd("queues", self.redis_channel)
|
||||||
|
p.lpush(self.redis_channel, msg)
|
||||||
|
p.execute()
|
||||||
|
if self.redis_queue_shutdown(True):
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
|
|
|
@ -20,50 +20,34 @@ Requirements
|
||||||
*Need the ZMQ_Feed_Q Module running to be able to work properly.
|
*Need the ZMQ_Feed_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
import base64
|
import base64
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
if __name__ == "__main__":
|
||||||
"""Main Function"""
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# CONFIG #
|
config_section = 'Feed'
|
||||||
cfg = ConfigParser.ConfigParser()
|
config_channel = 'topicfilter'
|
||||||
cfg.read(configfile)
|
subscriber_name = 'feed'
|
||||||
|
|
||||||
# REDIS
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("Feed", "topicfilter")
|
|
||||||
|
|
||||||
# Subscriber
|
|
||||||
subscriber_name = "feed"
|
|
||||||
subscriber_config_section = "Feed"
|
|
||||||
# Publisher
|
# Publisher
|
||||||
publisher_name = "pubfed"
|
pub_config_section = "PubSub_Global"
|
||||||
publisher_config_section = "PubSub_Global"
|
h.zmq_pub(pub_config_section)
|
||||||
|
pub_channel = h.config.get(pub_config_section, "channel")
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
|
||||||
PubGlob = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
|
||||||
publisher.info("Feed Script started to receive & publish.")
|
publisher.info("Feed Script started to receive & publish.")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = h.r_queues.rpop(h.sub_channel + h.subscriber_name)
|
||||||
# Recovering the streamed message informations.
|
# Recovering the streamed message informations.
|
||||||
if message is not None:
|
if message is not None:
|
||||||
if len(message.split()) == 3:
|
if len(message.split()) == 3:
|
||||||
|
@ -75,8 +59,7 @@ def main():
|
||||||
publisher.debug("Empty Paste: {0} not processed".format(paste))
|
publisher.debug("Empty Paste: {0} not processed".format(paste))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Feed"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Feed")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
|
@ -84,24 +67,13 @@ def main():
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
continue
|
continue
|
||||||
# Creating the full filepath
|
# Creating the full filepath
|
||||||
filename = cfg.get("Directories", "pastes") + paste
|
filename = os.path.join(os.environ('AIL_BIN'),
|
||||||
|
h.config.get("Directories", "pastes"), paste)
|
||||||
|
dirname = os.path.dirname(filename)
|
||||||
|
if not os.path.exists(dirname):
|
||||||
|
os.makedirs(dirname)
|
||||||
|
|
||||||
if not os.path.exists(filename.rsplit("/", 1)[0]):
|
with open(filename, 'wb') as f:
|
||||||
os.makedirs(filename.rsplit("/", 1)[0])
|
f.write(base64.standard_b64decode(gzip64encoded))
|
||||||
else:
|
|
||||||
# Path already existing
|
|
||||||
pass
|
|
||||||
|
|
||||||
decoded_gzip = base64.standard_b64decode(gzip64encoded)
|
h.pub_socket.send('{} {}'.format(pub_channel, filename))
|
||||||
# paste, zlib.decompress(decoded_gzip, zlib.MAX_WBITS|16)
|
|
||||||
|
|
||||||
with open(filename, 'wb') as F:
|
|
||||||
F.write(decoded_gzip)
|
|
||||||
|
|
||||||
msg = cfg.get("PubSub_Global", "channel")+" "+filename
|
|
||||||
PubGlob.send_message(msg)
|
|
||||||
publisher.debug("{0} Published".format(msg))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -33,5 +33,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'topicfilter'
|
config_channel = 'topicfilter'
|
||||||
subscriber_name = 'feed'
|
subscriber_name = 'feed'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -36,111 +36,78 @@ Requirements
|
||||||
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
import glob
|
||||||
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
import ConfigParser
|
|
||||||
import time
|
import time
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
publisher.channel = "Script"
|
||||||
|
|
||||||
def main():
|
# Publisher
|
||||||
"""Main Function"""
|
pub_config_section = 'PubSub_Categ'
|
||||||
|
|
||||||
# CONFIG #
|
config_section = 'PubSub_Words'
|
||||||
cfg = ConfigParser.ConfigParser()
|
config_channel = 'channel_0'
|
||||||
cfg.read(configfile)
|
subscriber_name = 'pubcateg'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
|
||||||
|
h.zmq_pub(pub_config_section)
|
||||||
|
|
||||||
# SCRIPT PARSER #
|
# SCRIPT PARSER #
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='''This script is a part of the Analysis Information Leak framework.''',
|
description='This script is a part of the Analysis Information \
|
||||||
epilog='''''')
|
Leak framework.')
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-l', type=str, default="../files/list_categ_files",
|
'-d', type=str, default="../files/",
|
||||||
help='Path to the list_categ_files (../files/list_categ_files)',
|
help='Path to the directory containing the category files.',
|
||||||
action='store')
|
action='store')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Words", "channel_0")
|
|
||||||
subscriber_name = "categ"
|
|
||||||
subscriber_config_section = "PubSub_Words"
|
|
||||||
|
|
||||||
publisher_name = "pubcateg"
|
|
||||||
publisher_config_section = "PubSub_Categ"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel,
|
|
||||||
subscriber_name)
|
|
||||||
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section,
|
|
||||||
publisher_name)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script Categ subscribed to channel {0}".format(
|
publisher.info(
|
||||||
cfg.get("PubSub_Words", "channel_0")))
|
"Script Categ subscribed to channel {}".format(h.sub_channel))
|
||||||
|
|
||||||
with open(args.l, 'rb') as L:
|
tmp_dict = {}
|
||||||
tmp_dict = {}
|
for filename in glob.glob(args.d):
|
||||||
|
bname = os.path.basename(filename)
|
||||||
|
tmp_dict[bname] = []
|
||||||
|
with open(filename, 'r') as f:
|
||||||
|
for l in f:
|
||||||
|
tmp_dict[bname].append(l.strip())
|
||||||
|
|
||||||
for num, fname in enumerate(L):
|
|
||||||
# keywords temp list
|
|
||||||
tmp_list = []
|
|
||||||
|
|
||||||
with open(fname[:-1], 'rb') as LS:
|
|
||||||
|
|
||||||
for num, kword in enumerate(LS):
|
|
||||||
tmp_list.append(kword[:-1])
|
|
||||||
|
|
||||||
tmp_dict[fname.split('/')[-1][:-1]] = tmp_list
|
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
message = h.r_queues.rpop(h.sub_channel + h.subscriber_name)
|
||||||
if message is not None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
channel, filename, word, score = message.split()
|
||||||
|
|
||||||
if prec_filename is None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
PST = Paste.Paste(filename)
|
PST = Paste.Paste(filename)
|
||||||
|
prec_filename = filename
|
||||||
|
|
||||||
prec_filename = filename
|
for categ, words_list in tmp_dict.items():
|
||||||
|
|
||||||
for categ, list in tmp_dict.items():
|
if word.lower() in words_list:
|
||||||
|
h.pub_socket.send('{} {} {} {}'.format(
|
||||||
if word.lower() in list:
|
categ, PST.p_path, word, score))
|
||||||
channel = categ
|
|
||||||
msg = channel+" "+PST.p_path+" "+word+" "+score
|
|
||||||
pub.send_message(msg)
|
|
||||||
# dico_categ.add(categ)
|
|
||||||
|
|
||||||
publisher.info(
|
publisher.info(
|
||||||
'Categ;{};{};{};Detected {} "{}"'.format(
|
'Categ;{};{};{};Detected {} "{}"'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name, score, word))
|
PST.p_source, PST.p_date, PST.p_name, score, word))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Categ"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Categ")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
publisher.debug("Script Categ is Idling 10s")
|
publisher.debug("Script Categ is Idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -30,5 +30,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'channel_0'
|
config_channel = 'channel_0'
|
||||||
subscriber_name = 'categ'
|
subscriber_name = 'categ'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -29,5 +29,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'channel'
|
config_channel = 'channel'
|
||||||
subscriber_name = 'line'
|
subscriber_name = 'line'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -4,9 +4,11 @@
|
||||||
The ZMQ_PubSub_Lines Module
|
The ZMQ_PubSub_Lines Module
|
||||||
============================
|
============================
|
||||||
|
|
||||||
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q Module.
|
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
|
||||||
|
Module.
|
||||||
|
|
||||||
It tokenize the content of the paste and publish the result in the following format:
|
It tokenize the content of the paste and publish the result in the following
|
||||||
|
format:
|
||||||
channel_name+' '+/path/of/the/paste.gz+' '+tokenized_word+' '+scoring
|
channel_name+' '+/path/of/the/paste.gz+' '+tokenized_word+' '+scoring
|
||||||
|
|
||||||
..seealso:: Paste method (_get_top_words)
|
..seealso:: Paste method (_get_top_words)
|
||||||
|
@ -21,72 +23,45 @@ Requirements
|
||||||
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
import time
|
import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Longlines", "channel_1")
|
|
||||||
subscriber_name = "tokenize"
|
|
||||||
subscriber_config_section = "PubSub_Longlines"
|
|
||||||
|
|
||||||
# Publisher
|
# Publisher
|
||||||
publisher_config_section = "PubSub_Words"
|
pub_config_section = 'PubSub_Words'
|
||||||
publisher_name = "pubtokenize"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
config_section = 'PubSub_Longlines'
|
||||||
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
config_channel = 'channel_1'
|
||||||
|
subscriber_name = 'tokenize'
|
||||||
|
|
||||||
channel_0 = cfg.get("PubSub_Words", "channel_0")
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
|
||||||
# FUNCTIONS #
|
h.zmq_pub(pub_config_section)
|
||||||
publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1")))
|
pub_channel = h.config.get(pub_config_section, "channel_0")
|
||||||
|
|
||||||
|
# LOGGING #
|
||||||
|
publisher.info("Tokeniser subscribed to channel {}".format(h.sub_channel))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
message = h.r_queues.rpop(h.sub_channel + h.subscriber_name)
|
||||||
print message
|
print message
|
||||||
if message is not None:
|
if message is not None:
|
||||||
PST = Paste.Paste(message.split(" ", -1)[-1])
|
paste = Paste.Paste(message.split(" ", -1)[-1])
|
||||||
|
for word, score in paste._get_top_words().items():
|
||||||
|
if len(word) >= 4:
|
||||||
|
h.pub_socket.send(
|
||||||
|
'{} {} {} {}'.format(pub_channel, paste.p_path,
|
||||||
|
word, score))
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Tokenize")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
publisher.debug("Tokeniser is idling 10s")
|
publisher.debug("Tokeniser is idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
print "sleepin"
|
print "sleepin"
|
||||||
continue
|
|
||||||
|
|
||||||
for word, score in PST._get_top_words().items():
|
|
||||||
if len(word) >= 4:
|
|
||||||
msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score)
|
|
||||||
pub.send_message(msg)
|
|
||||||
print msg
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -30,5 +30,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'channel_1'
|
config_channel = 'channel_1'
|
||||||
subscriber_name = 'tokenize'
|
subscriber_name = 'tokenize'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -30,5 +30,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'channel'
|
config_channel = 'channel'
|
||||||
subscriber_name = 'attributes'
|
subscriber_name = 'attributes'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -13,5 +13,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'channel_0'
|
config_channel = 'channel_0'
|
||||||
subscriber_name = 'creditcard_categ'
|
subscriber_name = 'creditcard_categ'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -30,5 +30,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'channel_0'
|
config_channel = 'channel_0'
|
||||||
subscriber_name = 'curve'
|
subscriber_name = 'curve'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -12,5 +12,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'channel'
|
config_channel = 'channel'
|
||||||
subscriber_name = 'duplicate'
|
subscriber_name = 'duplicate'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -9,38 +9,37 @@ The ZMQ_Sub_Indexer modules is fetching the list of files to be processed
|
||||||
and index each file with a full-text indexer (Whoosh until now).
|
and index each file with a full-text indexer (Whoosh until now).
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
import time
|
import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
from whoosh.index import create_in, exists_in, open_dir
|
from whoosh.index import create_in, exists_in, open_dir
|
||||||
from whoosh.fields import Schema, TEXT, ID
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
import os
|
import os
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
if __name__ == "__main__":
|
||||||
"""Main Function"""
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# CONFIG #
|
# Subscriber
|
||||||
cfg = ConfigParser.ConfigParser()
|
sub_config_section = 'PubSub_Global'
|
||||||
cfg.read(configfile)
|
sub_name = 'indexer'
|
||||||
|
|
||||||
# Redis
|
config_section = 'PubSub_Global'
|
||||||
r_serv1 = redis.StrictRedis(
|
config_channel = 'channel'
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
subscriber_name = 'indexer'
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
|
||||||
# Indexer configuration - index dir and schema setup
|
# Indexer configuration - index dir and schema setup
|
||||||
indexpath = cfg.get("Indexer", "path")
|
indexpath = h.config.get("Indexer", "path")
|
||||||
indexertype = cfg.get("Indexer", "type")
|
indexertype = h.config.get("Indexer", "type")
|
||||||
if indexertype == "whoosh":
|
if indexertype == "whoosh":
|
||||||
schema = Schema(title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT)
|
schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
|
||||||
|
unique=True),
|
||||||
|
content=TEXT)
|
||||||
if not os.path.exists(indexpath):
|
if not os.path.exists(indexpath):
|
||||||
os.mkdir(indexpath)
|
os.mkdir(indexpath)
|
||||||
if not exists_in(indexpath):
|
if not exists_in(indexpath):
|
||||||
|
@ -49,29 +48,16 @@ def main():
|
||||||
ix = open_dir(indexpath)
|
ix = open_dir(indexpath)
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
# Subscriber
|
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
|
||||||
subscriber_name = "indexer"
|
|
||||||
subscriber_config_section = "PubSub_Global"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""ZMQ Indexer is Running""")
|
publisher.info("""ZMQ Indexer is Running""")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
message = sub.get_msg_from_queue(r_serv1)
|
message = h.r_queues.rpop(h.sub_channel + h.subscriber_name)
|
||||||
|
|
||||||
if message is not None:
|
if message is not None:
|
||||||
PST = Paste.Paste(message.split(" ", -1)[-1])
|
PST = Paste.Paste(message.split(" ", -1)[-1])
|
||||||
else:
|
else:
|
||||||
if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
break
|
||||||
publisher.debug("Script Indexer is idling 10s")
|
publisher.debug("Script Indexer is idling 10s")
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
@ -88,9 +74,5 @@ def main():
|
||||||
indexwriter.commit()
|
indexwriter.commit()
|
||||||
except IOError:
|
except IOError:
|
||||||
print "CRC Checksum Failed on :", PST.p_path
|
print "CRC Checksum Failed on :", PST.p_path
|
||||||
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(PST.p_source, PST.p_date, PST.p_name))
|
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
||||||
pass
|
PST.p_source, PST.p_date, PST.p_name))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -24,5 +24,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'channel'
|
config_channel = 'channel'
|
||||||
subscriber_name = 'indexer'
|
subscriber_name = 'indexer'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -12,5 +12,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'channel_1'
|
config_channel = 'channel_1'
|
||||||
subscriber_name = 'mails_categ'
|
subscriber_name = 'mails_categ'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -29,5 +29,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'channel_2'
|
config_channel = 'channel_2'
|
||||||
subscriber_name = 'onion_categ'
|
subscriber_name = 'onion_categ'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -13,5 +13,5 @@ if __name__ == "__main__":
|
||||||
config_channel = 'channel_3'
|
config_channel = 'channel_3'
|
||||||
subscriber_name = 'web_categ'
|
subscriber_name = 'web_categ'
|
||||||
|
|
||||||
h = Helper.Queues()
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
h.queue_subscribe(publisher, config_section, config_channel, subscriber_name)
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -28,7 +28,6 @@ class PubSub(object):
|
||||||
"""
|
"""
|
||||||
def __init__(self, config, log_channel, ps_name):
|
def __init__(self, config, log_channel, ps_name):
|
||||||
self._ps_name = ps_name
|
self._ps_name = ps_name
|
||||||
|
|
||||||
self._config_parser = config
|
self._config_parser = config
|
||||||
|
|
||||||
self._context_zmq = zmq.Context()
|
self._context_zmq = zmq.Context()
|
||||||
|
@ -60,9 +59,8 @@ class ZMQPub(PubSub):
|
||||||
def __init__(self, config, pub_config_section, ps_name):
|
def __init__(self, config, pub_config_section, ps_name):
|
||||||
super(ZMQPub, self).__init__(config, "Default", ps_name)
|
super(ZMQPub, self).__init__(config, "Default", ps_name)
|
||||||
|
|
||||||
self._pub_config_section = pub_config_section
|
|
||||||
self._pubsocket = self._context_zmq.socket(zmq.PUB)
|
self._pubsocket = self._context_zmq.socket(zmq.PUB)
|
||||||
self._pub_adress = self._config_parser.get(self._pub_config_section, "adress")
|
self._pub_adress = self._config_parser.get(pub_config_section, "adress")
|
||||||
|
|
||||||
self._pubsocket.bind(self._pub_adress)
|
self._pubsocket.bind(self._pub_adress)
|
||||||
|
|
||||||
|
@ -117,33 +115,14 @@ class ZMQSub(PubSub):
|
||||||
def __init__(self, config, sub_config_section, channel, ps_name):
|
def __init__(self, config, sub_config_section, channel, ps_name):
|
||||||
super(ZMQSub, self).__init__(config, "Default", ps_name)
|
super(ZMQSub, self).__init__(config, "Default", ps_name)
|
||||||
|
|
||||||
self._sub_config_section = sub_config_section
|
|
||||||
self._subsocket = self._context_zmq.socket(zmq.SUB)
|
self._subsocket = self._context_zmq.socket(zmq.SUB)
|
||||||
self._sub_adress = self._config_parser.get(self._sub_config_section, "adress")
|
self._sub_adress = self._config_parser.get(sub_config_section, "adress")
|
||||||
|
|
||||||
self._subsocket.connect(self._sub_adress)
|
self._subsocket.connect(self._sub_adress)
|
||||||
|
|
||||||
self._channel = channel
|
self._channel = channel
|
||||||
self._subsocket.setsockopt(zmq.SUBSCRIBE, self._channel)
|
self._subsocket.setsockopt(zmq.SUBSCRIBE, self._channel)
|
||||||
|
|
||||||
def get_message(self):
|
|
||||||
"""
|
|
||||||
Get the first sent message from a Publisher.
|
|
||||||
:return: (str) Message from Publisher
|
|
||||||
|
|
||||||
"""
|
|
||||||
return self._subsocket.recv()
|
|
||||||
|
|
||||||
def get_and_lpush(self, r_serv):
|
|
||||||
"""
|
|
||||||
Get the first sent message from a Publisher and storing it in redis
|
|
||||||
|
|
||||||
..note:: This function also create a set named "queue" for monitoring needs
|
|
||||||
|
|
||||||
"""
|
|
||||||
r_serv.sadd("queues", self._channel+self._ps_name)
|
|
||||||
r_serv.lpush(self._channel+self._ps_name, self._subsocket.recv())
|
|
||||||
|
|
||||||
def get_msg_from_queue(self, r_serv):
|
def get_msg_from_queue(self, r_serv):
|
||||||
"""
|
"""
|
||||||
Get the first sent message from a Redis List
|
Get the first sent message from a Redis List
|
||||||
|
|
|
@ -1,65 +0,0 @@
|
||||||
[Directories]
|
|
||||||
bloomfilters = /home/user/Blooms/
|
|
||||||
pastes = /home/user/PASTES/
|
|
||||||
|
|
||||||
##### Redis #####
|
|
||||||
[Redis_Cache]
|
|
||||||
host = localhost
|
|
||||||
port = 6379
|
|
||||||
db = 0
|
|
||||||
|
|
||||||
[Redis_Log]
|
|
||||||
host = localhost
|
|
||||||
port = 6380
|
|
||||||
db = 0
|
|
||||||
|
|
||||||
[Redis_Queues]
|
|
||||||
host = localhost
|
|
||||||
port = 6381
|
|
||||||
db = 0
|
|
||||||
|
|
||||||
[Redis_Data_Merging]
|
|
||||||
host = localhost
|
|
||||||
port = 6379
|
|
||||||
db = 1
|
|
||||||
|
|
||||||
##### LevelDB #####
|
|
||||||
[Redis_Level_DB]
|
|
||||||
host = localhost
|
|
||||||
port = 2013
|
|
||||||
db = 0
|
|
||||||
|
|
||||||
[Redis_Level_DB_Hashs]
|
|
||||||
host = localhost
|
|
||||||
port = 2013
|
|
||||||
db = 1
|
|
||||||
|
|
||||||
# PUB / SUB : ZMQ
|
|
||||||
[Feed]
|
|
||||||
adress = tcp://crf.circl.lu:5556
|
|
||||||
topicfilter = 102
|
|
||||||
|
|
||||||
[PubSub_Global]
|
|
||||||
adress = tcp://127.0.0.1:5000
|
|
||||||
channel = filelist
|
|
||||||
|
|
||||||
[PubSub_Longlines]
|
|
||||||
adress = tcp://127.0.0.1:5001
|
|
||||||
channel_0 = Longlines
|
|
||||||
channel_1 = Shortlines
|
|
||||||
|
|
||||||
[PubSub_Words]
|
|
||||||
adress = tcp://127.0.0.1:5002
|
|
||||||
channel_0 = words
|
|
||||||
|
|
||||||
[PubSub_Categ]
|
|
||||||
adress = tcp://127.0.0.1:5003
|
|
||||||
channel_0 = cards
|
|
||||||
channel_1 = emails
|
|
||||||
channel_2 = tor
|
|
||||||
channel_3 = urls
|
|
||||||
#Channels are dynamic (1 channel per categ) <= FIXME: no it's not.
|
|
||||||
|
|
||||||
[PubSub_Url]
|
|
||||||
adress = tcp://127.0.0.1:5004
|
|
||||||
channel = urls
|
|
|
@ -1,6 +1,6 @@
|
||||||
[Directories]
|
[Directories]
|
||||||
bloomfilters = /home/user/Blooms/
|
bloomfilters = /home/user/Blooms/
|
||||||
pastes = /home/user/PASTES/
|
pastes = PASTES
|
||||||
wordtrending_csv = /home/user/AIL/var/www/static/csv/wordstrendingdata
|
wordtrending_csv = /home/user/AIL/var/www/static/csv/wordstrendingdata
|
||||||
wordsfile = /home/user/AIL/files/wordfile
|
wordsfile = /home/user/AIL/files/wordfile
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue