mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-23 06:37:15 +00:00
Merge branch 'rewrite_zmq' into testing
Conflicts: bin/indexer_lookup.py
This commit is contained in:
commit
cf814468e7
33 changed files with 603 additions and 1294 deletions
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
*.swp
|
||||||
|
|
||||||
|
# Install Dirs
|
||||||
|
AILENV
|
||||||
|
redis-leveldb
|
||||||
|
redis
|
||||||
|
|
||||||
|
# Local config
|
||||||
|
bin/packages/config.cfg
|
86
bin/Helper.py
Executable file
86
bin/Helper.py
Executable file
|
@ -0,0 +1,86 @@
|
||||||
|
#!/usr/bin/env python2
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
"""
|
||||||
|
Queue helper module
|
||||||
|
============================
|
||||||
|
|
||||||
|
This module subscribe to a Publisher stream and put the received messages
|
||||||
|
into a Redis-list waiting to be popped later by others scripts.
|
||||||
|
|
||||||
|
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
||||||
|
the same Subscriber name in both of them.
|
||||||
|
|
||||||
|
"""
|
||||||
|
import redis
|
||||||
|
import ConfigParser
|
||||||
|
import os
|
||||||
|
import zmq
|
||||||
|
|
||||||
|
|
||||||
|
class Redis_Queues(object):
|
||||||
|
|
||||||
|
def __init__(self, conf_section, conf_channel, subscriber_name):
|
||||||
|
configfile = os.path.join(os.environ('AIL_BIN'), 'packages/config.cfg')
|
||||||
|
if not os.path.exists(configfile):
|
||||||
|
raise Exception('Unable to find the configuration file. \
|
||||||
|
Did you set environment variables? \
|
||||||
|
Or activate the virtualenv.')
|
||||||
|
self.config = ConfigParser.ConfigParser()
|
||||||
|
self.config.read(configfile)
|
||||||
|
self.subscriber_name = subscriber_name
|
||||||
|
|
||||||
|
self.sub_channel = self.config.get(conf_section, conf_channel)
|
||||||
|
|
||||||
|
# Redis Queue
|
||||||
|
config_section = "Redis_Queues"
|
||||||
|
self.r_queues = redis.StrictRedis(
|
||||||
|
host=self.config.get(config_section, "host"),
|
||||||
|
port=self.config.getint(config_section, "port"),
|
||||||
|
db=self.config.getint(config_section, "db"))
|
||||||
|
|
||||||
|
def zmq_sub(self, conf_section):
|
||||||
|
sub_address = self.config.get(conf_section, 'adress')
|
||||||
|
context = zmq.Context()
|
||||||
|
self.sub_socket = context.socket(zmq.SUB)
|
||||||
|
self.sub_socket.connect(sub_address)
|
||||||
|
self.sub_socket.setsockopt(zmq.SUBSCRIBE, self.sub_channel)
|
||||||
|
|
||||||
|
def zmq_pub(self, config_section, config_channel):
|
||||||
|
context = zmq.Context()
|
||||||
|
self.pub_socket = context.socket(zmq.PUB)
|
||||||
|
self.pub_socket.bind(self.config.get(config_section, 'adress'))
|
||||||
|
if config_channel is not None:
|
||||||
|
self.pub_channel = self.config.get(config_section, config_channel)
|
||||||
|
else:
|
||||||
|
# The publishing channel is defined dynamically
|
||||||
|
self.pub_channel = None
|
||||||
|
|
||||||
|
def zmq_pub_send(self, msg):
|
||||||
|
if self.pub_channel is None:
|
||||||
|
raise Exception('A channel is reqired to send a message.')
|
||||||
|
self.pub_socket.send('{} {}'.format(self.pub_channel, msg))
|
||||||
|
|
||||||
|
def redis_rpop(self):
|
||||||
|
return self.r_queues.rpop(self.sub_channel + self.subscriber_name)
|
||||||
|
|
||||||
|
def redis_queue_shutdown(self, is_queue=False):
|
||||||
|
if is_queue:
|
||||||
|
flag = self.subscriber_name + '_Q'
|
||||||
|
else:
|
||||||
|
flag = self.subscriber_name
|
||||||
|
# srem returns False if the element does not exists
|
||||||
|
return self.r_queues.srem('SHUTDOWN_FLAGS', flag)
|
||||||
|
|
||||||
|
def redis_queue_subscribe(self, publisher):
|
||||||
|
self.redis_channel = self.sub_channel + self.subscriber_name
|
||||||
|
publisher.info("Suscribed to channel {}".format(self.sub_channel))
|
||||||
|
while True:
|
||||||
|
msg = self.sub_socket.recv()
|
||||||
|
p = self.r_queues.pipeline()
|
||||||
|
p.sadd("queues", self.redis_channel)
|
||||||
|
p.lpush(self.redis_channel, msg)
|
||||||
|
p.execute()
|
||||||
|
if self.redis_queue_shutdown(True):
|
||||||
|
print "Shutdown Flag Up: Terminating"
|
||||||
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
|
break
|
|
@ -38,6 +38,7 @@ def main():
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
port=cfg.getint("Redis_Queues", "port"),
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
db=cfg.getint("Redis_Queues", "db"))
|
||||||
|
|
||||||
|
# FIXME: automatic based on the queue name.
|
||||||
# ### SCRIPTS ####
|
# ### SCRIPTS ####
|
||||||
r_serv.sadd("SHUTDOWN_FLAGS", "Feed")
|
r_serv.sadd("SHUTDOWN_FLAGS", "Feed")
|
||||||
r_serv.sadd("SHUTDOWN_FLAGS", "Categ")
|
r_serv.sadd("SHUTDOWN_FLAGS", "Categ")
|
||||||
|
|
|
@ -20,50 +20,34 @@ Requirements
|
||||||
*Need the ZMQ_Feed_Q Module running to be able to work properly.
|
*Need the ZMQ_Feed_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
import base64
|
import base64
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
if __name__ == "__main__":
|
||||||
"""Main Function"""
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# CONFIG #
|
config_section = 'Feed'
|
||||||
cfg = ConfigParser.ConfigParser()
|
config_channel = 'topicfilter'
|
||||||
cfg.read(configfile)
|
subscriber_name = 'feed'
|
||||||
|
|
||||||
# REDIS
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("Feed", "topicfilter")
|
|
||||||
|
|
||||||
# Subscriber
|
|
||||||
subscriber_name = "feed"
|
|
||||||
subscriber_config_section = "Feed"
|
|
||||||
# Publisher
|
# Publisher
|
||||||
publisher_name = "pubfed"
|
pub_config_section = "PubSub_Global"
|
||||||
publisher_config_section = "PubSub_Global"
|
pub_config_channel = 'channel'
|
||||||
|
h.zmq_pub(pub_config_section, pub_config_channel)
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
|
||||||
PubGlob = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
|
||||||
publisher.info("Feed Script started to receive & publish.")
|
publisher.info("Feed Script started to receive & publish.")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
# Recovering the streamed message informations.
|
# Recovering the streamed message informations.
|
||||||
if message is not None:
|
if message is not None:
|
||||||
if len(message.split()) == 3:
|
if len(message.split()) == 3:
|
||||||
|
@ -75,8 +59,7 @@ def main():
|
||||||
publisher.debug("Empty Paste: {0} not processed".format(paste))
|
publisher.debug("Empty Paste: {0} not processed".format(paste))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Feed"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Feed")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
|
@ -84,24 +67,13 @@ def main():
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
continue
|
continue
|
||||||
# Creating the full filepath
|
# Creating the full filepath
|
||||||
filename = cfg.get("Directories", "pastes") + paste
|
filename = os.path.join(os.environ('AIL_BIN'),
|
||||||
|
h.config.get("Directories", "pastes"), paste)
|
||||||
|
dirname = os.path.dirname(filename)
|
||||||
|
if not os.path.exists(dirname):
|
||||||
|
os.makedirs(dirname)
|
||||||
|
|
||||||
if not os.path.exists(filename.rsplit("/", 1)[0]):
|
with open(filename, 'wb') as f:
|
||||||
os.makedirs(filename.rsplit("/", 1)[0])
|
f.write(base64.standard_b64decode(gzip64encoded))
|
||||||
else:
|
|
||||||
# Path already existing
|
|
||||||
pass
|
|
||||||
|
|
||||||
decoded_gzip = base64.standard_b64decode(gzip64encoded)
|
h.zmq_pub_send(filename)
|
||||||
# paste, zlib.decompress(decoded_gzip, zlib.MAX_WBITS|16)
|
|
||||||
|
|
||||||
with open(filename, 'wb') as F:
|
|
||||||
F.write(decoded_gzip)
|
|
||||||
|
|
||||||
msg = cfg.get("PubSub_Global", "channel")+" "+filename
|
|
||||||
PubGlob.send_message(msg)
|
|
||||||
publisher.debug("{0} Published".format(msg))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -20,45 +20,19 @@ Requirements
|
||||||
"channel_name"+" "+/path/to/the/paste.gz+" "base64_data_encoded_paste"
|
"channel_name"+" "+/path/to/the/paste.gz+" "base64_data_encoded_paste"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("Feed", "topicfilter")
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, "Feed", channel, "feed")
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Feed_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Feed_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
|
config_section = 'Feed'
|
||||||
|
config_channel = 'topicfilter'
|
||||||
|
subscriber_name = 'feed'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -36,111 +36,78 @@ Requirements
|
||||||
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
import glob
|
||||||
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
import ConfigParser
|
|
||||||
import time
|
import time
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
publisher.channel = "Script"
|
||||||
|
|
||||||
def main():
|
config_section = 'PubSub_Words'
|
||||||
"""Main Function"""
|
config_channel = 'channel_0'
|
||||||
|
subscriber_name = 'pubcateg'
|
||||||
|
|
||||||
# CONFIG #
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
# Publisher
|
||||||
|
pub_config_section = 'PubSub_Categ'
|
||||||
|
|
||||||
|
h.zmq_pub(pub_config_section, None)
|
||||||
|
|
||||||
# SCRIPT PARSER #
|
# SCRIPT PARSER #
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='''This script is a part of the Analysis Information Leak framework.''',
|
description='This script is a part of the Analysis Information \
|
||||||
epilog='''''')
|
Leak framework.')
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-l', type=str, default="../files/list_categ_files",
|
'-d', type=str, default="../files/",
|
||||||
help='Path to the list_categ_files (../files/list_categ_files)',
|
help='Path to the directory containing the category files.',
|
||||||
action='store')
|
action='store')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Words", "channel_0")
|
|
||||||
subscriber_name = "categ"
|
|
||||||
subscriber_config_section = "PubSub_Words"
|
|
||||||
|
|
||||||
publisher_name = "pubcateg"
|
|
||||||
publisher_config_section = "PubSub_Categ"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel,
|
|
||||||
subscriber_name)
|
|
||||||
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section,
|
|
||||||
publisher_name)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script Categ subscribed to channel {0}".format(
|
publisher.info(
|
||||||
cfg.get("PubSub_Words", "channel_0")))
|
"Script Categ subscribed to channel {}".format(h.sub_channel))
|
||||||
|
|
||||||
with open(args.l, 'rb') as L:
|
|
||||||
tmp_dict = {}
|
tmp_dict = {}
|
||||||
|
for filename in glob.glob(args.d):
|
||||||
|
bname = os.path.basename(filename)
|
||||||
|
tmp_dict[bname] = []
|
||||||
|
with open(filename, 'r') as f:
|
||||||
|
for l in f:
|
||||||
|
tmp_dict[bname].append(l.strip())
|
||||||
|
|
||||||
for num, fname in enumerate(L):
|
|
||||||
# keywords temp list
|
|
||||||
tmp_list = []
|
|
||||||
|
|
||||||
with open(fname[:-1], 'rb') as LS:
|
|
||||||
|
|
||||||
for num, kword in enumerate(LS):
|
|
||||||
tmp_list.append(kword[:-1])
|
|
||||||
|
|
||||||
tmp_dict[fname.split('/')[-1][:-1]] = tmp_list
|
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
message = h.redis_rpop()
|
||||||
if message is not None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
channel, filename, word, score = message.split()
|
||||||
|
|
||||||
if prec_filename is None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
PST = Paste.Paste(filename)
|
PST = Paste.Paste(filename)
|
||||||
|
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
for categ, list in tmp_dict.items():
|
for categ, words_list in tmp_dict.items():
|
||||||
|
|
||||||
if word.lower() in list:
|
if word.lower() in words_list:
|
||||||
channel = categ
|
h.pub_channel = categ
|
||||||
msg = channel+" "+PST.p_path+" "+word+" "+score
|
h.zmq_pub_send('{} {} {}'.format(PST.p_path, word, score))
|
||||||
pub.send_message(msg)
|
|
||||||
# dico_categ.add(categ)
|
|
||||||
|
|
||||||
publisher.info(
|
publisher.info(
|
||||||
'Categ;{};{};{};Detected {} "{}"'.format(
|
'Categ;{};{};{};Detected {} "{}"'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name, score, word))
|
PST.p_source, PST.p_date, PST.p_name, score, word))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Categ"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Categ")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
publisher.debug("Script Categ is Idling 10s")
|
publisher.debug("Script Categ is Idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -17,47 +17,19 @@ Requirements
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
|
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Words", "channel_0")
|
|
||||||
subscriber_name = "categ"
|
|
||||||
subscriber_config_section = "PubSub_Words"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Categ_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Categ_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = 'Queuing'
|
||||||
|
|
||||||
|
config_section = 'PubSub_Words'
|
||||||
|
config_channel = 'channel_0'
|
||||||
|
subscriber_name = 'categ'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -5,10 +5,11 @@
|
||||||
The ZMQ_PubSub_Lines Module
|
The ZMQ_PubSub_Lines Module
|
||||||
============================
|
============================
|
||||||
|
|
||||||
This module is consuming the Redis-list created by the ZMQ_PubSub_Line_Q Module.
|
This module is consuming the Redis-list created by the ZMQ_PubSub_Line_Q
|
||||||
|
Module.
|
||||||
|
|
||||||
It perform a sorting on the line's length and publish/forward them to differents
|
It perform a sorting on the line's length and publish/forward them to
|
||||||
channels:
|
differents channels:
|
||||||
|
|
||||||
*Channel 1 if max length(line) < max
|
*Channel 1 if max length(line) < max
|
||||||
*Channel 2 if max length(line) > max
|
*Channel 2 if max length(line) > max
|
||||||
|
@ -28,79 +29,62 @@ Requirements
|
||||||
"""
|
"""
|
||||||
import redis
|
import redis
|
||||||
import argparse
|
import argparse
|
||||||
import ConfigParser
|
|
||||||
import time
|
import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
publisher.channel = "Script"
|
||||||
|
|
||||||
def main():
|
config_section = 'PubSub_Global'
|
||||||
"""Main Function"""
|
config_channel = 'channel'
|
||||||
|
subscriber_name = 'line'
|
||||||
|
|
||||||
# CONFIG #
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
# Publisher
|
||||||
|
pub_config_section = 'PubSub_Longlines'
|
||||||
|
h.zmq_pub(pub_config_section, None)
|
||||||
|
|
||||||
|
# Subscriber
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
|
||||||
# SCRIPT PARSER #
|
# SCRIPT PARSER #
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='''This script is a part of the Analysis Information Leak framework.''',
|
description='''This script is a part of the Analysis Information \
|
||||||
epilog='''''')
|
Leak framework.''')
|
||||||
|
|
||||||
parser.add_argument('-max', type=int, default=500,
|
parser.add_argument(
|
||||||
help='The limit between "short lines" and "long lines" (500)',
|
'-max', type=int, default=500,
|
||||||
|
help='The limit between "short lines" and "long lines"',
|
||||||
action='store')
|
action='store')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
|
# FIXME move it in the Paste object
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host=cfg.get("Redis_Data_Merging", "host"),
|
host=h.config.get("Redis_Data_Merging", "host"),
|
||||||
port=cfg.getint("Redis_Data_Merging", "port"),
|
port=h.config.getint("Redis_Data_Merging", "port"),
|
||||||
db=cfg.getint("Redis_Data_Merging", "db"))
|
db=h.config.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
channel_0 = h.config.get("PubSub_Longlines", "channel_0")
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
channel_1 = h.config.get("PubSub_Longlines", "channel_1")
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
# Subscriber
|
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
|
||||||
subscriber_name = "line"
|
|
||||||
subscriber_config_section = "PubSub_Global"
|
|
||||||
|
|
||||||
# Publisher
|
|
||||||
publisher_config_section = "PubSub_Longlines"
|
|
||||||
publisher_name = "publine"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
|
||||||
|
|
||||||
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
|
||||||
|
|
||||||
channel_0 = cfg.get("PubSub_Longlines", "channel_0")
|
|
||||||
channel_1 = cfg.get("PubSub_Longlines", "channel_1")
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
tmp_string = "Lines script Subscribed to channel {} and Start to publish on channel {}, {}"
|
tmp_string = "Lines script Subscribed to channel {} and Start to publish \
|
||||||
publisher.info(tmp_string.format(
|
on channel {}, {}"
|
||||||
cfg.get("PubSub_Global", "channel"),
|
publisher.info(tmp_string.format(h.sub_channel, channel_0, channel_1))
|
||||||
cfg.get("PubSub_Longlines", "channel_0"),
|
|
||||||
cfg.get("PubSub_Longlines", "channel_1")))
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
message = sub.get_msg_from_queue(r_serv1)
|
message = h.redis_rpop()
|
||||||
if message is not None:
|
if message is not None:
|
||||||
PST = Paste.Paste(message.split(" ", -1)[-1])
|
PST = Paste.Paste(message.split(" ", -1)[-1])
|
||||||
else:
|
else:
|
||||||
if r_serv1.sismember("SHUTDOWN_FLAGS", "Lines"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv1.srem("SHUTDOWN_FLAGS", "Lines")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
|
@ -111,18 +95,14 @@ def main():
|
||||||
lines_infos = PST.get_lines_info()
|
lines_infos = PST.get_lines_info()
|
||||||
|
|
||||||
PST.save_attribute_redis(r_serv, "p_nb_lines", lines_infos[0])
|
PST.save_attribute_redis(r_serv, "p_nb_lines", lines_infos[0])
|
||||||
PST.save_attribute_redis(r_serv, "p_max_length_line", lines_infos[1])
|
PST.save_attribute_redis(r_serv, "p_max_length_line",
|
||||||
|
lines_infos[1])
|
||||||
|
|
||||||
r_serv.sadd("Pastes_Objects", PST.p_path)
|
r_serv.sadd("Pastes_Objects", PST.p_path)
|
||||||
if lines_infos[1] >= args.max:
|
if lines_infos[1] >= args.max:
|
||||||
msg = channel_0+" "+PST.p_path
|
h.pub_channel = channel_0
|
||||||
else:
|
else:
|
||||||
msg = channel_1+" "+PST.p_path
|
h.pub_channel = channel_1
|
||||||
|
h.zmq_pub_send(PST.p_path)
|
||||||
pub.send_message(msg)
|
|
||||||
except IOError:
|
except IOError:
|
||||||
print "CRC Checksum Error on : ", PST.p_path
|
print "CRC Checksum Error on : ", PST.p_path
|
||||||
pass
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -18,47 +18,17 @@ Requirements
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
|
||||||
subscriber_name = "line"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Lines_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Lines_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
|
config_section = "PubSub_Global"
|
||||||
|
config_channel = 'channel'
|
||||||
|
subscriber_name = 'line'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -4,9 +4,11 @@
|
||||||
The ZMQ_PubSub_Lines Module
|
The ZMQ_PubSub_Lines Module
|
||||||
============================
|
============================
|
||||||
|
|
||||||
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q Module.
|
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
|
||||||
|
Module.
|
||||||
|
|
||||||
It tokenize the content of the paste and publish the result in the following format:
|
It tokenize the content of the paste and publish the result in the following
|
||||||
|
format:
|
||||||
channel_name+' '+/path/of/the/paste.gz+' '+tokenized_word+' '+scoring
|
channel_name+' '+/path/of/the/paste.gz+' '+tokenized_word+' '+scoring
|
||||||
|
|
||||||
..seealso:: Paste method (_get_top_words)
|
..seealso:: Paste method (_get_top_words)
|
||||||
|
@ -21,72 +23,43 @@ Requirements
|
||||||
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
import time
|
import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
config_section = 'PubSub_Longlines'
|
||||||
channel = cfg.get("PubSub_Longlines", "channel_1")
|
config_channel = 'channel_1'
|
||||||
subscriber_name = "tokenize"
|
subscriber_name = 'tokenize'
|
||||||
subscriber_config_section = "PubSub_Longlines"
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
|
||||||
# Publisher
|
# Publisher
|
||||||
publisher_config_section = "PubSub_Words"
|
pub_config_section = 'PubSub_Words'
|
||||||
publisher_name = "pubtokenize"
|
pub_config_channel = 'channel_0'
|
||||||
|
h.zmq_pub(pub_config_section, pub_config_channel)
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
# LOGGING #
|
||||||
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
publisher.info("Tokeniser subscribed to channel {}".format(h.sub_channel))
|
||||||
|
|
||||||
channel_0 = cfg.get("PubSub_Words", "channel_0")
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1")))
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
print message
|
print message
|
||||||
if message is not None:
|
if message is not None:
|
||||||
PST = Paste.Paste(message.split(" ", -1)[-1])
|
paste = Paste.Paste(message.split(" ", -1)[-1])
|
||||||
|
for word, score in paste._get_top_words().items():
|
||||||
|
if len(word) >= 4:
|
||||||
|
h.zmq_pub_send('{} {} {}'.format(paste.p_path, word,
|
||||||
|
score))
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Tokenize")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
publisher.debug("Tokeniser is idling 10s")
|
publisher.debug("Tokeniser is idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
print "sleepin"
|
print "sleepin"
|
||||||
continue
|
|
||||||
|
|
||||||
for word, score in PST._get_top_words().items():
|
|
||||||
if len(word) >= 4:
|
|
||||||
msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score)
|
|
||||||
pub.send_message(msg)
|
|
||||||
print msg
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -17,48 +17,19 @@ Requirements
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Line" channel 1
|
*Should register to the Publisher "ZMQ_PubSub_Line" channel 1
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Longlines", "channel_1")
|
|
||||||
subscriber_name = "tokenize"
|
|
||||||
subscriber_config_section = "PubSub_Longlines"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Tokenize_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
|
config_section = 'PubSub_Longlines'
|
||||||
|
config_channel = 'channel_1'
|
||||||
|
subscriber_name = 'tokenize'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -21,49 +21,33 @@ Requirements
|
||||||
*Need running Redis instances. (Redis)
|
*Need running Redis instances. (Redis)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
import time
|
import time
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read('./packages/config.cfg')
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Global"
|
publisher.channel = "Global"
|
||||||
|
|
||||||
# ZMQ #
|
config_section = 'PubSub_Global'
|
||||||
pub_glob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global")
|
config_channel = 'channel'
|
||||||
|
subscriber_name = 'global'
|
||||||
|
|
||||||
# FONCTIONS #
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
|
||||||
|
# Publisher
|
||||||
|
pub_config_section = 'PubSub_Global'
|
||||||
|
pub_config_channel = 'channel'
|
||||||
|
h.zmq_pub(pub_config_section, pub_config_channel)
|
||||||
|
|
||||||
|
# LOGGING #
|
||||||
publisher.info("Starting to publish.")
|
publisher.info("Starting to publish.")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
filename = r_serv.lpop("filelist")
|
filename = h.redis_rpop()
|
||||||
|
|
||||||
if filename is not None:
|
if filename is not None:
|
||||||
|
h.zmq_pub_send(filename)
|
||||||
msg = cfg.get("PubSub_Global", "channel")+" "+filename
|
|
||||||
pub_glob.send_message(msg)
|
|
||||||
publisher.debug("{0} Published".format(msg))
|
|
||||||
else:
|
else:
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
publisher.debug("Nothing to publish")
|
publisher.debug("Nothing to publish")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -27,56 +27,41 @@ Requirements
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
import redis
|
||||||
import ConfigParser
|
|
||||||
import time
|
import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
publisher.channel = "Script"
|
||||||
|
|
||||||
def main():
|
config_section = 'PubSub_Global'
|
||||||
"""Main Function"""
|
config_channel = 'channel'
|
||||||
|
subscriber_name = 'attributes'
|
||||||
|
|
||||||
# CONFIG #
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
# Subscriber
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv = redis.StrictRedis(
|
r_serv = redis.StrictRedis(
|
||||||
host=cfg.get("Redis_Data_Merging", "host"),
|
host=h.config.get("Redis_Data_Merging", "host"),
|
||||||
port=cfg.getint("Redis_Data_Merging", "port"),
|
port=h.config.getint("Redis_Data_Merging", "port"),
|
||||||
db=cfg.getint("Redis_Data_Merging", "db"))
|
db=h.config.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
# Subscriber
|
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
|
||||||
subscriber_name = "attributes"
|
|
||||||
subscriber_config_section = "PubSub_Global"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""ZMQ Attribute is Running""")
|
publisher.info("""ZMQ Attribute is Running""")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
message = sub.get_msg_from_queue(r_serv1)
|
message = h.redis_rpop()
|
||||||
|
|
||||||
if message is not None:
|
if message is not None:
|
||||||
PST = Paste.Paste(message.split(" ", -1)[-1])
|
PST = Paste.Paste(message.split(" ", -1)[-1])
|
||||||
else:
|
else:
|
||||||
if r_serv1.sismember("SHUTDOWN_FLAGS", "Attributes"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv1.srem("SHUTDOWN_FLAGS", "Attributes")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
|
@ -95,9 +80,6 @@ def main():
|
||||||
PST.save_all_attributes_redis(r_serv)
|
PST.save_all_attributes_redis(r_serv)
|
||||||
except IOError:
|
except IOError:
|
||||||
print "CRC Checksum Failed on :", PST.p_path
|
print "CRC Checksum Failed on :", PST.p_path
|
||||||
publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed"))
|
publisher.error('{0};{1};{2};{3};{4}'.format(
|
||||||
pass
|
"Duplicate", PST.p_source, PST.p_date, PST.p_name,
|
||||||
|
"CRC Checksum Failed"))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -18,47 +18,18 @@ Requirements
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
|
||||||
subscriber_name = "attributes"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Attributes_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Attributes_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
|
config_section = 'PubSub_Global'
|
||||||
|
config_channel = 'channel'
|
||||||
|
subscriber_name = 'attributes'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -1,53 +1,44 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
import redis
|
import redis
|
||||||
import ConfigParser
|
|
||||||
import pprint
|
import pprint
|
||||||
import time
|
import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from packages import lib_refine
|
from packages import lib_refine
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
|
import Helper
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Data_Merging", "host"),
|
|
||||||
port=cfg.getint("Redis_Data_Merging", "port"),
|
|
||||||
db=cfg.getint("Redis_Data_Merging", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
config_section = 'PubSub_Categ'
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards")
|
config_channel = 'channel_0'
|
||||||
|
subscriber_name = 'cards'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
|
||||||
|
# Subscriber
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
|
||||||
|
# REDIS #
|
||||||
|
r_serv1 = redis.StrictRedis(
|
||||||
|
host=h.config.get("Redis_Data_Merging", "host"),
|
||||||
|
port=h.config.getint("Redis_Data_Merging", "port"),
|
||||||
|
db=h.config.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Creditcard script subscribed to channel creditcard_categ")
|
publisher.info("Creditcard script subscribed to channel creditcard_categ")
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
|
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
|
||||||
|
|
||||||
# mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
# mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
||||||
# visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
# visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
||||||
# discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|[3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}"
|
# discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|
|
||||||
|
# [3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}"
|
||||||
# jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
# jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
||||||
# amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}"
|
# amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}"
|
||||||
# chinaUP_regex = "62[0-5]\d{13,16}"
|
# chinaUP_regex = "62[0-5]\d{13,16}"
|
||||||
|
@ -69,25 +60,22 @@ def main():
|
||||||
PST.save_attribute_redis(r_serv1, channel, creditcard_set)
|
PST.save_attribute_redis(r_serv1, channel, creditcard_set)
|
||||||
|
|
||||||
pprint.pprint(creditcard_set)
|
pprint.pprint(creditcard_set)
|
||||||
to_print = 'CreditCard;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name)
|
to_print = 'CreditCard;{};{};{};'.format(
|
||||||
|
PST.p_source, PST.p_date, PST.p_name)
|
||||||
if (len(creditcard_set) > 0):
|
if (len(creditcard_set) > 0):
|
||||||
publisher.critical('{}Checked {} valid number(s)'.format(to_print, len(creditcard_set)))
|
publisher.critical('{}Checked {} valid number(s)'.format(
|
||||||
|
to_print, len(creditcard_set)))
|
||||||
else:
|
else:
|
||||||
publisher.info('{}CreditCard related'.format(to_print))
|
publisher.info('{}CreditCard related'.format(to_print))
|
||||||
|
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Creditcards"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Creditcards")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
publisher.debug("Script creditcard is idling 1m")
|
publisher.debug("Script creditcard is idling 1m")
|
||||||
time.sleep(60)
|
time.sleep(60)
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -1,44 +1,18 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards")
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format("creditcard_categ"))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Creditcards_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Creditcards_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
|
config_section = 'PubSub_Categ'
|
||||||
|
config_channel = 'channel_0'
|
||||||
|
subscriber_name = 'creditcard_categ'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -6,7 +6,8 @@ The ZMQ_Sub_Curve Module
|
||||||
|
|
||||||
This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module.
|
This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module.
|
||||||
|
|
||||||
This modules update a .csv file used to draw curves representing selected words and their occurency per day.
|
This modules update a .csv file used to draw curves representing selected
|
||||||
|
words and their occurency per day.
|
||||||
|
|
||||||
..note:: The channel will have the name of the file created.
|
..note:: The channel will have the name of the file created.
|
||||||
|
|
||||||
|
@ -22,72 +23,60 @@ Requirements
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
import redis
|
||||||
import ConfigParser
|
|
||||||
import time
|
import time
|
||||||
from packages import Paste as P
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import lib_words
|
from packages import lib_words
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Level_DB", "host"),
|
|
||||||
port=cfg.get("Redis_Level_DB", "port"),
|
|
||||||
db=0)
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
config_section = 'PubSub_Words'
|
||||||
channel = cfg.get("PubSub_Words", "channel_0")
|
config_channel = 'channel_0'
|
||||||
subscriber_name = "curve"
|
subscriber_name = "curve"
|
||||||
subscriber_config_section = "PubSub_Words"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
|
||||||
|
# Subscriber
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
|
||||||
|
# REDIS #
|
||||||
|
r_serv1 = redis.StrictRedis(
|
||||||
|
host=h.config.get("Redis_Level_DB", "host"),
|
||||||
|
port=h.config.get("Redis_Level_DB", "port"),
|
||||||
|
db=h.config.get("Redis_Level_DB", "db"))
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script Curve subscribed to channel {0}".format(cfg.get("PubSub_Words", "channel_0")))
|
publisher.info("Script Curve subscribed to {}".format(h.sub_channel))
|
||||||
|
|
||||||
# FILE CURVE SECTION #
|
# FILE CURVE SECTION #
|
||||||
csv_path = cfg.get("Directories", "wordtrending_csv")
|
csv_path = h.config.get("Directories", "wordtrending_csv")
|
||||||
wordfile_path = cfg.get("Directories", "wordsfile")
|
wordfile_path = h.config.get("Directories", "wordsfile")
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
while True:
|
while True:
|
||||||
if message is not None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
channel, filename, word, score = message.split()
|
||||||
if prec_filename is None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
PST = P.Paste(filename)
|
PST = Paste.Paste(filename)
|
||||||
lib_words.create_curve_with_word_file(r_serv1, csv_path, wordfile_path, int(PST.p_date.year), int(PST.p_date.month))
|
lib_words.create_curve_with_word_file(
|
||||||
|
r_serv1, csv_path, wordfile_path, int(PST.p_date.year),
|
||||||
|
int(PST.p_date.month))
|
||||||
|
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
prev_score = r_serv1.hget(word.lower(), PST.p_date)
|
prev_score = r_serv1.hget(word.lower(), PST.p_date)
|
||||||
print prev_score
|
print prev_score
|
||||||
if prev_score is not None:
|
if prev_score is not None:
|
||||||
r_serv1.hset(word.lower(), PST.p_date, int(prev_score) + int(score))
|
r_serv1.hset(word.lower(), PST.p_date,
|
||||||
|
int(prev_score) + int(score))
|
||||||
else:
|
else:
|
||||||
r_serv1.hset(word.lower(), PST.p_date, score)
|
r_serv1.hset(word.lower(), PST.p_date, score)
|
||||||
# r_serv.expire(word,86400) #1day
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Curve"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Curve")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
|
@ -95,8 +84,4 @@ def main():
|
||||||
print "sleepin"
|
print "sleepin"
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -17,47 +17,19 @@ Requirements
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
|
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Words", "channel_0")
|
|
||||||
subscriber_name = "curve"
|
|
||||||
subscriber_config_section = "PubSub_Words"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Curve_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Curve_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
|
config_section = 'PubSub_Words'
|
||||||
|
config_channel = 'channel_0'
|
||||||
|
subscriber_name = 'curve'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -13,61 +13,51 @@ Requirements:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
import redis
|
||||||
import ConfigParser
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from pybloomfilter import BloomFilter
|
from pybloomfilter import BloomFilter
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
publisher.channel = "Script"
|
||||||
|
|
||||||
def main():
|
config_section = 'PubSub_Global'
|
||||||
"""Main Function"""
|
config_channel = 'channel'
|
||||||
|
subscriber_name = 'duplicate'
|
||||||
|
|
||||||
# CONFIG #
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
# Subscriber
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
# DB QUEUE ( MEMORY )
|
|
||||||
r_Q_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
r_serv_merge = redis.StrictRedis(
|
r_serv_merge = redis.StrictRedis(
|
||||||
host=cfg.get("Redis_Data_Merging", "host"),
|
host=h.config.get("Redis_Data_Merging", "host"),
|
||||||
port=cfg.getint("Redis_Data_Merging", "port"),
|
port=h.config.getint("Redis_Data_Merging", "port"),
|
||||||
db=cfg.getint("Redis_Data_Merging", "db"))
|
db=h.config.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
# DB OBJECT & HASHS ( DISK )
|
# DB OBJECT & HASHS ( DISK )
|
||||||
|
# FIXME increase flexibility
|
||||||
dico_redis = {}
|
dico_redis = {}
|
||||||
for year in xrange(2013, 2015):
|
for year in xrange(2013, 2015):
|
||||||
for month in xrange(0, 16):
|
for month in xrange(0, 16):
|
||||||
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
|
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
|
||||||
host=cfg.get("Redis_Level_DB", "host"),
|
host=h.config.get("Redis_Level_DB", "host"), port=year,
|
||||||
port=year,
|
|
||||||
db=month)
|
db=month)
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
|
||||||
subscriber_name = "duplicate"
|
|
||||||
subscriber_config_section = "PubSub_Global"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Script duplicate subscribed to channel {0}""".format(cfg.get("PubSub_Global", "channel")))
|
publisher.info("""Script duplicate subscribed to channel {0}""".format(
|
||||||
|
h.config.get("PubSub_Global", "channel")))
|
||||||
|
|
||||||
set_limit = 100
|
set_limit = 100
|
||||||
|
bloompath = os.path.join(os.environ('AIL_BIN'),
|
||||||
|
h.config.get("Directories", "bloomfilters"))
|
||||||
|
|
||||||
|
bloop_path_set = set()
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
super_dico = {}
|
super_dico = {}
|
||||||
|
@ -77,15 +67,14 @@ def main():
|
||||||
|
|
||||||
x = time.time()
|
x = time.time()
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_Q_serv)
|
message = h.redis_rpop()
|
||||||
if message is not None:
|
if message is not None:
|
||||||
path = message.split(" ", -1)[-1]
|
path = message.split(" ", -1)[-1]
|
||||||
PST = Paste.Paste(path)
|
PST = Paste.Paste(path)
|
||||||
else:
|
else:
|
||||||
publisher.debug("Script Attribute is idling 10s")
|
publisher.debug("Script Attribute is idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
if r_Q_serv.sismember("SHUTDOWN_FLAGS", "Duplicate"):
|
if h.redis_queue_shutdown():
|
||||||
r_Q_serv.srem("SHUTDOWN_FLAGS", "Duplicate")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
|
@ -97,19 +86,14 @@ def main():
|
||||||
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
|
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
|
||||||
|
|
||||||
# Creating the bloom filter name: bloomyyyymm
|
# Creating the bloom filter name: bloomyyyymm
|
||||||
bloomname = 'bloom' + PST.p_date.year + PST.p_date.month
|
filebloompath = os.path.join(bloompath, 'bloom' + PST.p_date.year +
|
||||||
|
PST.p_date.month)
|
||||||
bloompath = cfg.get("Directories", "bloomfilters")
|
|
||||||
|
|
||||||
filebloompath = bloompath + bloomname
|
|
||||||
|
|
||||||
# datetime.date(int(PST.p_date.year),int(PST.p_date.month),int(PST.p_date.day)).timetuple().tm_yday % 7
|
|
||||||
|
|
||||||
if os.path.exists(filebloompath):
|
if os.path.exists(filebloompath):
|
||||||
bloom = BloomFilter.open(filebloompath)
|
bloom = BloomFilter.open(filebloompath)
|
||||||
else:
|
else:
|
||||||
bloom = BloomFilter(100000000, 0.01, filebloompath)
|
bloom = BloomFilter(100000000, 0.01, filebloompath)
|
||||||
r_Q_serv.sadd("bloomlist", filebloompath)
|
bloop_path_set.add(filebloompath)
|
||||||
|
|
||||||
# UNIQUE INDEX HASHS TABLE
|
# UNIQUE INDEX HASHS TABLE
|
||||||
r_serv0 = dico_redis["201300"]
|
r_serv0 = dico_redis["201300"]
|
||||||
|
@ -121,45 +105,43 @@ def main():
|
||||||
|
|
||||||
# For each bloom filter
|
# For each bloom filter
|
||||||
opened_bloom = []
|
opened_bloom = []
|
||||||
for bloo in r_Q_serv.smembers("bloomlist"):
|
for bloo in bloop_path_set:
|
||||||
# Opening blooms
|
# Opening blooms
|
||||||
opened_bloom.append(BloomFilter.open(bloo))
|
opened_bloom.append(BloomFilter.open(bloo))
|
||||||
|
|
||||||
# For each hash of the paste
|
# For each hash of the paste
|
||||||
for hash in PST._get_hash_lines(min=5, start=1, jump=0):
|
for line_hash in PST._get_hash_lines(min=5, start=1, jump=0):
|
||||||
nb_hash_current += 1
|
nb_hash_current += 1
|
||||||
|
|
||||||
# Adding the hash in Redis & limiting the set
|
# Adding the hash in Redis & limiting the set
|
||||||
if r_serv1.scard(hash) <= set_limit:
|
if r_serv1.scard(line_hash) <= set_limit:
|
||||||
r_serv1.sadd(hash, index)
|
r_serv1.sadd(line_hash, index)
|
||||||
r_serv1.sadd("HASHS", hash)
|
r_serv1.sadd("HASHS", line_hash)
|
||||||
# Adding the hash in the bloom of the month
|
# Adding the hash in the bloom of the month
|
||||||
bloom.add(hash)
|
bloom.add(line_hash)
|
||||||
|
|
||||||
# Go throught the Database of the bloom filter (of the month)
|
# Go throught the Database of the bloom filter (of the month)
|
||||||
for bloo in opened_bloom:
|
for bloo in opened_bloom:
|
||||||
if hash in bloo:
|
if line_hash in bloo:
|
||||||
db = bloo.name[-6:]
|
db = bloo.name[-6:]
|
||||||
# Go throught the Database of the bloom filter (of the month)
|
# Go throught the Database of the bloom filter (month)
|
||||||
r_serv_bloom = dico_redis[db]
|
r_serv_bloom = dico_redis[db]
|
||||||
|
|
||||||
# set of index paste: set([1,2,4,65])
|
# set of index paste: set([1,2,4,65])
|
||||||
hash_current = r_serv_bloom.smembers(hash)
|
hash_current = r_serv_bloom.smembers(line_hash)
|
||||||
# removing itself from the list
|
# removing itself from the list
|
||||||
hash_current = hash_current - set([index])
|
hash_current = hash_current - set([index])
|
||||||
|
|
||||||
# if the hash is present at least in 1 files (already processed)
|
# if the hash is present at least in 1 files
|
||||||
|
# (already processed)
|
||||||
if len(hash_current) != 0:
|
if len(hash_current) != 0:
|
||||||
hash_dico[hash] = hash_current
|
hash_dico[line_hash] = hash_current
|
||||||
|
|
||||||
# if there is data in this dictionnary
|
# if there is data in this dictionnary
|
||||||
if len(hash_dico) != 0:
|
if len(hash_dico) != 0:
|
||||||
super_dico[index] = hash_dico
|
super_dico[index] = hash_dico
|
||||||
else:
|
|
||||||
# The hash is not in this bloom
|
|
||||||
pass
|
|
||||||
|
|
||||||
###########################################################################################
|
###########################################################################
|
||||||
|
|
||||||
# if there is data in this dictionnary
|
# if there is data in this dictionnary
|
||||||
if len(super_dico) != 0:
|
if len(super_dico) != 0:
|
||||||
|
@ -171,7 +153,8 @@ def main():
|
||||||
|
|
||||||
for p_fname in pset:
|
for p_fname in pset:
|
||||||
occur_dico.setdefault(p_fname, 0)
|
occur_dico.setdefault(p_fname, 0)
|
||||||
# Count how much hash is similar per file occuring in the dictionnary
|
# Count how much hash is similar per file occuring
|
||||||
|
# in the dictionnary
|
||||||
if occur_dico[p_fname] >= 0:
|
if occur_dico[p_fname] >= 0:
|
||||||
occur_dico[p_fname] = occur_dico[p_fname] + 1
|
occur_dico[p_fname] = occur_dico[p_fname] + 1
|
||||||
|
|
||||||
|
@ -181,7 +164,8 @@ def main():
|
||||||
dupl.append((paste, percentage))
|
dupl.append((paste, percentage))
|
||||||
|
|
||||||
# Creating the object attribute and save it.
|
# Creating the object attribute and save it.
|
||||||
to_print = 'Duplicate;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name)
|
to_print = 'Duplicate;{};{};{};'.format(
|
||||||
|
PST.p_source, PST.p_date, PST.p_name)
|
||||||
if dupl != []:
|
if dupl != []:
|
||||||
PST.__setattr__("p_duplicate", dupl)
|
PST.__setattr__("p_duplicate", dupl)
|
||||||
PST.save_attribute_redis(r_serv_merge, "p_duplicate", dupl)
|
PST.save_attribute_redis(r_serv_merge, "p_duplicate", dupl)
|
||||||
|
@ -193,7 +177,3 @@ def main():
|
||||||
except IOError:
|
except IOError:
|
||||||
print "CRC Checksum Failed on :", PST.p_path
|
print "CRC Checksum Failed on :", PST.p_path
|
||||||
publisher.error('{}CRC Checksum Failed'.format(to_print))
|
publisher.error('{}CRC Checksum Failed'.format(to_print))
|
||||||
pass
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -1,45 +1,17 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, "duplicate")
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Duplicate_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Duplicate_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = 'Queuing'
|
||||||
|
|
||||||
|
config_section = 'PubSub_Global'
|
||||||
|
config_channel = 'channel'
|
||||||
|
subscriber_name = 'duplicate'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -9,38 +9,37 @@ The ZMQ_Sub_Indexer modules is fetching the list of files to be processed
|
||||||
and index each file with a full-text indexer (Whoosh until now).
|
and index each file with a full-text indexer (Whoosh until now).
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
import time
|
import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
from whoosh.index import create_in, exists_in, open_dir
|
from whoosh.index import create_in, exists_in, open_dir
|
||||||
from whoosh.fields import Schema, TEXT, ID
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
import os
|
import os
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
if __name__ == "__main__":
|
||||||
"""Main Function"""
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# CONFIG #
|
# Subscriber
|
||||||
cfg = ConfigParser.ConfigParser()
|
sub_config_section = 'PubSub_Global'
|
||||||
cfg.read(configfile)
|
sub_name = 'indexer'
|
||||||
|
|
||||||
# Redis
|
config_section = 'PubSub_Global'
|
||||||
r_serv1 = redis.StrictRedis(
|
config_channel = 'channel'
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
subscriber_name = 'indexer'
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
|
||||||
# Indexer configuration - index dir and schema setup
|
# Indexer configuration - index dir and schema setup
|
||||||
indexpath = cfg.get("Indexer", "path")
|
indexpath = h.config.get("Indexer", "path")
|
||||||
indexertype = cfg.get("Indexer", "type")
|
indexertype = h.config.get("Indexer", "type")
|
||||||
if indexertype == "whoosh":
|
if indexertype == "whoosh":
|
||||||
schema = Schema(title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT)
|
schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
|
||||||
|
unique=True),
|
||||||
|
content=TEXT)
|
||||||
if not os.path.exists(indexpath):
|
if not os.path.exists(indexpath):
|
||||||
os.mkdir(indexpath)
|
os.mkdir(indexpath)
|
||||||
if not exists_in(indexpath):
|
if not exists_in(indexpath):
|
||||||
|
@ -49,29 +48,16 @@ def main():
|
||||||
ix = open_dir(indexpath)
|
ix = open_dir(indexpath)
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
# Subscriber
|
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
|
||||||
subscriber_name = "indexer"
|
|
||||||
subscriber_config_section = "PubSub_Global"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""ZMQ Indexer is Running""")
|
publisher.info("""ZMQ Indexer is Running""")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
message = sub.get_msg_from_queue(r_serv1)
|
message = h.redis_rpop()
|
||||||
|
|
||||||
if message is not None:
|
if message is not None:
|
||||||
PST = Paste.Paste(message.split(" ", -1)[-1])
|
PST = Paste.Paste(message.split(" ", -1)[-1])
|
||||||
else:
|
else:
|
||||||
if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
break
|
||||||
publisher.debug("Script Indexer is idling 10s")
|
publisher.debug("Script Indexer is idling 10s")
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
@ -88,9 +74,5 @@ def main():
|
||||||
indexwriter.commit()
|
indexwriter.commit()
|
||||||
except IOError:
|
except IOError:
|
||||||
print "CRC Checksum Failed on :", PST.p_path
|
print "CRC Checksum Failed on :", PST.p_path
|
||||||
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(PST.p_source, PST.p_date, PST.p_name))
|
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
||||||
pass
|
PST.p_source, PST.p_date, PST.p_name))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -12,49 +12,18 @@ handling the indexing process of the files seen.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
|
||||||
subscriber_name = "indexer"
|
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name)
|
|
||||||
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format(channel))
|
|
||||||
|
|
||||||
# Until the service is requested to be shutdown, the service
|
|
||||||
# will get the data from the global ZMQ queue and buffer it in Redis.
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Indexer_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Indexer_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
|
config_section = 'PubSub_Global'
|
||||||
|
config_channel = 'channel'
|
||||||
|
subscriber_name = 'indexer'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -2,53 +2,47 @@
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis
|
import redis
|
||||||
import ConfigParser
|
|
||||||
import pprint
|
import pprint
|
||||||
import time
|
import time
|
||||||
import dns.exception
|
import dns.exception
|
||||||
from packages import Paste as P
|
from packages import Paste
|
||||||
from packages import lib_refine
|
from packages import lib_refine
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Data_Merging", "host"),
|
|
||||||
port=cfg.getint("Redis_Data_Merging", "port"),
|
|
||||||
db=cfg.getint("Redis_Data_Merging", "db"))
|
|
||||||
|
|
||||||
r_serv2 = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Cache", "host"),
|
|
||||||
port=cfg.getint("Redis_Cache", "port"),
|
|
||||||
db=cfg.getint("Redis_Cache", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
config_section = 'PubSub_Categ'
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "mails_categ", "emails")
|
config_channel = 'channel_1'
|
||||||
|
subscriber_name = 'emails'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
|
||||||
|
# Subscriber
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
|
||||||
|
# REDIS #
|
||||||
|
r_serv1 = redis.StrictRedis(
|
||||||
|
host=h.config.get("Redis_Data_Merging", "host"),
|
||||||
|
port=h.config.getint("Redis_Data_Merging", "port"),
|
||||||
|
db=h.config.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
|
r_serv2 = redis.StrictRedis(
|
||||||
|
host=h.config.get("Redis_Cache", "host"),
|
||||||
|
port=h.config.getint("Redis_Cache", "port"),
|
||||||
|
db=h.config.getint("Redis_Cache", "db"))
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Suscribed to channel mails_categ")
|
publisher.info("Suscribed to channel mails_categ")
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
|
# Log as critical if there are more that that amout of valid emails
|
||||||
|
is_critical = 10
|
||||||
|
|
||||||
email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
|
email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
@ -57,36 +51,36 @@ def main():
|
||||||
channel, filename, word, score = message.split()
|
channel, filename, word, score = message.split()
|
||||||
|
|
||||||
if prec_filename is None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
PST = P.Paste(filename)
|
PST = Paste.Paste(filename)
|
||||||
MX_values = lib_refine.checking_MX_record(r_serv2, PST.get_regex(email_regex))
|
MX_values = lib_refine.checking_MX_record(
|
||||||
|
r_serv2, PST.get_regex(email_regex))
|
||||||
|
|
||||||
if MX_values[0] >= 1:
|
if MX_values[0] >= 1:
|
||||||
|
|
||||||
PST.__setattr__(channel, MX_values)
|
PST.__setattr__(channel, MX_values)
|
||||||
PST.save_attribute_redis(r_serv1, channel, (MX_values[0], list(MX_values[1])))
|
PST.save_attribute_redis(r_serv1, channel,
|
||||||
|
(MX_values[0],
|
||||||
|
list(MX_values[1])))
|
||||||
|
|
||||||
pprint.pprint(MX_values)
|
pprint.pprint(MX_values)
|
||||||
to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.format(PST.p_source, PST.p_date, PST.p_name, MX_values[0])
|
to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.\
|
||||||
if MX_values[0] > 10:
|
format(PST.p_source, PST.p_date, PST.p_name,
|
||||||
|
MX_values[0])
|
||||||
|
if MX_values[0] > is_critical:
|
||||||
publisher.warning(to_print)
|
publisher.warning(to_print)
|
||||||
else:
|
else:
|
||||||
publisher.info(to_print)
|
publisher.info(to_print)
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Mails"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Mails")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
publisher.debug("Script Mails is Idling 10s")
|
publisher.debug("Script Mails is Idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
except dns.exception.Timeout:
|
except dns.exception.Timeout:
|
||||||
|
# FIXME retry!
|
||||||
print "dns.exception.Timeout"
|
print "dns.exception.Timeout"
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -1,44 +1,17 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "mails_categ", "emails")
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format("mails_categ"))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Mails_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Mails_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
|
config_section = 'PubSub_Categ'
|
||||||
|
config_channel = 'channel_1'
|
||||||
|
subscriber_name = 'mails_categ'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -6,8 +6,8 @@ The ZMQ_Sub_Onion Module
|
||||||
|
|
||||||
This module is consuming the Redis-list created by the ZMQ_Sub_Onion_Q Module.
|
This module is consuming the Redis-list created by the ZMQ_Sub_Onion_Q Module.
|
||||||
|
|
||||||
It trying to extract url from paste and returning only ones which are tor related
|
It trying to extract url from paste and returning only ones which are tor
|
||||||
(.onion)
|
related (.onion)
|
||||||
|
|
||||||
..seealso:: Paste method (get_regex)
|
..seealso:: Paste method (get_regex)
|
||||||
|
|
||||||
|
@ -22,45 +22,37 @@ Requirements
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
import redis
|
||||||
import ConfigParser
|
|
||||||
import pprint
|
import pprint
|
||||||
import time
|
import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
|
||||||
|
|
||||||
|
import Helper
|
||||||
|
|
||||||
def main():
|
if __name__ == "__main__":
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Data_Merging", "host"),
|
|
||||||
port=cfg.getint("Redis_Data_Merging", "port"),
|
|
||||||
db=cfg.getint("Redis_Data_Merging", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
config_section = 'PubSub_Categ'
|
||||||
Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "onion_categ", "tor")
|
config_channel = 'channel_2'
|
||||||
|
subscriber_name = 'tor'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
|
||||||
|
# Subscriber
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
|
||||||
|
# REDIS #
|
||||||
|
r_serv1 = redis.StrictRedis(
|
||||||
|
host=h.config.get("Redis_Data_Merging", "host"),
|
||||||
|
port=h.config.getint("Redis_Data_Merging", "port"),
|
||||||
|
db=h.config.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script subscribed to channel onion_categ")
|
publisher.info("Script subscribed to channel onion_categ")
|
||||||
|
|
||||||
# Getting the first message from redis.
|
# Getting the first message from redis.
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
# Thanks to Faup project for this regex
|
# Thanks to Faup project for this regex
|
||||||
|
@ -78,7 +70,8 @@ def main():
|
||||||
|
|
||||||
for x in PST.get_regex(url_regex):
|
for x in PST.get_regex(url_regex):
|
||||||
# Extracting url with regex
|
# Extracting url with regex
|
||||||
credential, subdomain, domain, host, tld, port, resource_path, query_string, f1, f2, f3, f4 = x
|
credential, subdomain, domain, host, tld, port, \
|
||||||
|
resource_path, query_string, f1, f2, f3, f4 = x
|
||||||
|
|
||||||
if f1 == "onion":
|
if f1 == "onion":
|
||||||
domains_list.append(domain)
|
domains_list.append(domain)
|
||||||
|
@ -88,25 +81,22 @@ def main():
|
||||||
PST.save_attribute_redis(r_serv1, channel, domains_list)
|
PST.save_attribute_redis(r_serv1, channel, domains_list)
|
||||||
pprint.pprint(domains_list)
|
pprint.pprint(domains_list)
|
||||||
print PST.p_path
|
print PST.p_path
|
||||||
to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name)
|
to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date,
|
||||||
|
PST.p_name)
|
||||||
if len(domains_list) > 0:
|
if len(domains_list) > 0:
|
||||||
publisher.warning('{}Detected {} .onion(s)'.format(to_print, len(domains_list)))
|
publisher.warning('{}Detected {} .onion(s)'.format(
|
||||||
|
to_print, len(domains_list)))
|
||||||
else:
|
else:
|
||||||
publisher.info('{}Onion related'.format(to_print))
|
publisher.info('{}Onion related'.format(to_print))
|
||||||
|
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Onion"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Onion")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
publisher.debug("Script url is Idling 10s")
|
publisher.debug("Script url is Idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
message = Sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -17,44 +17,18 @@ Requirements
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Categ"
|
*Should register to the Publisher "ZMQ_PubSub_Categ"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "onion_categ", "tor")
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format("onion_categ"))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Onion_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Onion_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
|
config_section = 'PubSub_Categ'
|
||||||
|
config_channel = 'channel_2'
|
||||||
|
subscriber_name = 'onion_categ'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -1,13 +1,11 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
import redis
|
import redis
|
||||||
import ConfigParser
|
|
||||||
import pprint
|
import pprint
|
||||||
import time
|
import time
|
||||||
import dns.exception
|
import dns.exception
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from packages import lib_refine
|
from packages import lib_refine
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
# Country and ASN lookup
|
# Country and ASN lookup
|
||||||
|
@ -16,55 +14,43 @@ import socket
|
||||||
import pycountry
|
import pycountry
|
||||||
import ipaddress
|
import ipaddress
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
r_serv1 = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Data_Merging", "host"),
|
|
||||||
port=cfg.getint("Redis_Data_Merging", "port"),
|
|
||||||
db=cfg.getint("Redis_Data_Merging", "db"))
|
|
||||||
|
|
||||||
r_serv2 = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Cache", "host"),
|
|
||||||
port=cfg.getint("Redis_Cache", "port"),
|
|
||||||
db=cfg.getint("Redis_Cache", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# ZMQ #
|
config_section = 'PubSub_Categ'
|
||||||
# Subscriber
|
config_channel = 'channel_3'
|
||||||
subscriber_name = "urls"
|
subscriber_name = "urls"
|
||||||
subscriber_config_section = "PubSub_Categ"
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
|
||||||
# Publisher
|
# Publisher
|
||||||
publisher_config_section = "PubSub_Url"
|
pub_config_section = "PubSub_Url"
|
||||||
publisher_name = "adress"
|
pub_config_channel = 'channel'
|
||||||
pubchannel = cfg.get("PubSub_Url", "channel")
|
h.zmq_pub(pub_config_section, pub_config_channel)
|
||||||
|
|
||||||
|
# Subscriber
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
|
||||||
|
# REDIS #
|
||||||
|
r_serv1 = redis.StrictRedis(
|
||||||
|
host=h.config.get("Redis_Data_Merging", "host"),
|
||||||
|
port=h.config.getint("Redis_Data_Merging", "port"),
|
||||||
|
db=h.config.getint("Redis_Data_Merging", "db"))
|
||||||
|
|
||||||
|
r_serv2 = redis.StrictRedis(
|
||||||
|
host=h.config.get("Redis_Cache", "host"),
|
||||||
|
port=h.config.getint("Redis_Cache", "port"),
|
||||||
|
db=h.config.getint("Redis_Cache", "db"))
|
||||||
|
|
||||||
# Country to log as critical
|
# Country to log as critical
|
||||||
cc_critical = cfg.get("PubSub_Url", "cc_critical")
|
cc_critical = h.config.get("PubSub_Url", "cc_critical")
|
||||||
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, "web_categ", subscriber_name)
|
|
||||||
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script URL subscribed to channel web_categ")
|
publisher.info("Script URL subscribed to channel web_categ")
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
|
url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
|
||||||
|
@ -79,11 +65,12 @@ def main():
|
||||||
PST = Paste.Paste(filename)
|
PST = Paste.Paste(filename)
|
||||||
client = ip2asn()
|
client = ip2asn()
|
||||||
for x in PST.get_regex(url_regex):
|
for x in PST.get_regex(url_regex):
|
||||||
scheme, credential, subdomain, domain, host, tld, port, resource_path, query_string, f1, f2, f3, f4 = x
|
scheme, credential, subdomain, domain, host, tld, \
|
||||||
|
port, resource_path, query_string, f1, f2, f3, \
|
||||||
|
f4 = x
|
||||||
domains_list.append(domain)
|
domains_list.append(domain)
|
||||||
msg = pubchannel + " " + str(x)
|
h.zmq_pub_send(str(x))
|
||||||
pub.send_message(msg)
|
publisher.debug('{} Published'.format(x))
|
||||||
publisher.debug('{0} Published'.format(x))
|
|
||||||
|
|
||||||
if f1 == "onion":
|
if f1 == "onion":
|
||||||
print domain
|
print domain
|
||||||
|
@ -107,35 +94,38 @@ def main():
|
||||||
# EU is not an official ISO 3166 code (but used by RIPE
|
# EU is not an official ISO 3166 code (but used by RIPE
|
||||||
# IP allocation)
|
# IP allocation)
|
||||||
if cc is not None and cc != "EU":
|
if cc is not None and cc != "EU":
|
||||||
print hostl, asn, cc, pycountry.countries.get(alpha2=cc).name
|
print hostl, asn, cc, \
|
||||||
|
pycountry.countries.get(alpha2=cc).name
|
||||||
if cc == cc_critical:
|
if cc == cc_critical:
|
||||||
# FIXME: That's going to fail.
|
# FIXME: That's going to fail.
|
||||||
publisher.warning('{0};{1};{2};{3};{4}'.format("Url", PST.p_source, PST.p_date, PST.p_name, "Detected " + str(A_values[0]) + " " + hostl + " " + cc))
|
publisher.warning(
|
||||||
|
'Url;{};{};{};Detected {} {} {}'.format(
|
||||||
|
PST.p_source, PST.p_date, PST.p_name,
|
||||||
|
A_values[0], hostl, cc))
|
||||||
else:
|
else:
|
||||||
print hostl, asn, cc
|
print hostl, asn, cc
|
||||||
A_values = lib_refine.checking_A_record(r_serv2, domains_list)
|
A_values = lib_refine.checking_A_record(r_serv2,
|
||||||
|
domains_list)
|
||||||
|
|
||||||
if A_values[0] >= 1:
|
if A_values[0] >= 1:
|
||||||
PST.__setattr__(channel, A_values)
|
PST.__setattr__(channel, A_values)
|
||||||
PST.save_attribute_redis(r_serv1, channel, (A_values[0], list(A_values[1])))
|
PST.save_attribute_redis(r_serv1, channel,
|
||||||
|
(A_values[0],
|
||||||
|
list(A_values[1])))
|
||||||
|
|
||||||
pprint.pprint(A_values)
|
pprint.pprint(A_values)
|
||||||
publisher.info('{0};{1};{2};{3};{4}'.format("Url", PST.p_source, PST.p_date, PST.p_name, "Checked " + str(A_values[0]) + " URL"))
|
publisher.info('Url;{};{};{};Checked {} URL'.format(
|
||||||
|
PST.p_source, PST.p_date, PST.p_name, A_values[0]))
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Urls"):
|
if h.redis_queue_shutdown():
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Urls")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
print "Shutdown Flag Up: Terminating"
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
publisher.warning("Shutdown Flag Up: Terminating.")
|
||||||
break
|
break
|
||||||
publisher.debug("Script url is Idling 10s")
|
publisher.debug("Script url is Idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
message = sub.get_msg_from_queue(r_serv)
|
message = h.redis_rpop()
|
||||||
except dns.exception.Timeout:
|
except dns.exception.Timeout:
|
||||||
print "dns.exception.Timeout", A_values
|
print "dns.exception.Timeout", A_values
|
||||||
pass
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
@ -1,44 +1,18 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import redis
|
|
||||||
import ConfigParser
|
|
||||||
from packages import ZMQ_PubSub
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
import Helper
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main Function"""
|
|
||||||
|
|
||||||
# CONFIG #
|
|
||||||
cfg = ConfigParser.ConfigParser()
|
|
||||||
cfg.read(configfile)
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host=cfg.get("Redis_Queues", "host"),
|
|
||||||
port=cfg.getint("Redis_Queues", "port"),
|
|
||||||
db=cfg.getint("Redis_Queues", "db"))
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
# ZMQ #
|
|
||||||
sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "web_categ", "urls")
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("""Suscribed to channel {0}""".format("web_categ"))
|
|
||||||
|
|
||||||
while True:
|
|
||||||
sub.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
if r_serv.sismember("SHUTDOWN_FLAGS", "Urls_Q"):
|
|
||||||
r_serv.srem("SHUTDOWN_FLAGS", "Urls_Q")
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
publisher.channel = "Queuing"
|
||||||
|
|
||||||
|
config_section = 'PubSub_Categ'
|
||||||
|
config_channel = 'channel_3'
|
||||||
|
subscriber_name = 'web_categ'
|
||||||
|
|
||||||
|
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
||||||
|
h.zmq_sub(config_section)
|
||||||
|
h.redis_queue_subscribe(publisher)
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
import argparse
|
import argparse
|
||||||
import gzip
|
import gzip
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
def readdoc(path=None):
|
def readdoc(path=None):
|
||||||
|
@ -21,7 +22,7 @@ def readdoc(path=None):
|
||||||
f = gzip.open(path, 'r')
|
f = gzip.open(path, 'r')
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
configfile = './packages/config.cfg'
|
configfile = os.path.join(os.environ('AIL_BIN'), 'packages/config.cfg')
|
||||||
cfg = ConfigParser.ConfigParser()
|
cfg = ConfigParser.ConfigParser()
|
||||||
cfg.read(configfile)
|
cfg.read(configfile)
|
||||||
|
|
||||||
|
|
|
@ -1,160 +0,0 @@
|
||||||
#!/usr/bin/python2.7
|
|
||||||
"""
|
|
||||||
The ``ZMQ PubSub`` Modules
|
|
||||||
==========================
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import zmq
|
|
||||||
import ConfigParser
|
|
||||||
|
|
||||||
|
|
||||||
class PubSub(object):
|
|
||||||
"""
|
|
||||||
The PubSub class is a ``Virtual Class`` which regroup the shared attribute
|
|
||||||
of a Publisher ZeroMQ and a Subcriber ZeroMQ
|
|
||||||
|
|
||||||
:param file_conf: -- (str) The filepath of the configuration file used (.cfg)
|
|
||||||
:param log_channel: -- (str) The channel used as a log channel
|
|
||||||
:param ps_name: -- (str) The "ID" of the Publisher/Subcriber
|
|
||||||
|
|
||||||
:return: PubSub Object
|
|
||||||
|
|
||||||
..note:: The ps_name was implemented to separate subscriber queues in redis
|
|
||||||
when they are listening on a same "stream"
|
|
||||||
..seealso:: Method of the ZMQSub class
|
|
||||||
|
|
||||||
..todo:: Create Implementing a log channel as an attribute of this virtual class.
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, file_conf, log_channel, ps_name):
|
|
||||||
self._ps_name = ps_name
|
|
||||||
self._config_parser = ConfigParser.ConfigParser()
|
|
||||||
self._config_file = file_conf # "./packages/config.cfg"
|
|
||||||
|
|
||||||
self._config_parser.read(self._config_file)
|
|
||||||
|
|
||||||
self._context_zmq = zmq.Context()
|
|
||||||
|
|
||||||
# self._logging_publisher_channel = log_channel # "Default"
|
|
||||||
# publisher.channel(self._logging_publisher_channel)
|
|
||||||
|
|
||||||
|
|
||||||
class ZMQPub(PubSub):
|
|
||||||
"""
|
|
||||||
This class create a ZMQ Publisher which is able to send_message to a choosen socket.
|
|
||||||
|
|
||||||
:param pub_config_section: -- (str) The name of the section in the config file to get the settings
|
|
||||||
|
|
||||||
:return: ZMQPub Object
|
|
||||||
|
|
||||||
:Example:
|
|
||||||
Extract of the config file:
|
|
||||||
[PubSub_Categ]
|
|
||||||
adress = tcp://127.0.0.1:5003
|
|
||||||
|
|
||||||
Creating the object and sending message:
|
|
||||||
MyPublisher = ZMQPub('./packages/config.cfg', 'PubSub_Categ', 'pubcateg')
|
|
||||||
|
|
||||||
msg = "categ1"+" "+"Im the data sent on the categ1 channel"
|
|
||||||
MyPublisher.send_message(msg)
|
|
||||||
|
|
||||||
..note:: The ps_name attribute for a publisher is "optionnal" but still required to be
|
|
||||||
instantiated correctly.
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, file_conf, pub_config_section, ps_name):
|
|
||||||
super(ZMQPub, self).__init__(file_conf, "Default", ps_name)
|
|
||||||
|
|
||||||
self._pub_config_section = pub_config_section
|
|
||||||
self._pubsocket = self._context_zmq.socket(zmq.PUB)
|
|
||||||
self._pub_adress = self._config_parser.get(self._pub_config_section, "adress")
|
|
||||||
|
|
||||||
self._pubsocket.bind(self._config_parser.get(self._pub_config_section, "adress"))
|
|
||||||
|
|
||||||
def send_message(self, message):
|
|
||||||
"""Send a message throught the publisher socket"""
|
|
||||||
self._pubsocket.send(message)
|
|
||||||
|
|
||||||
|
|
||||||
class ZMQSub(PubSub):
|
|
||||||
"""
|
|
||||||
This class create a ZMQ Subcriber which is able to receive message directly or
|
|
||||||
throught redis as a buffer.
|
|
||||||
|
|
||||||
The redis buffer is usefull when the subcriber do a time consuming job which is
|
|
||||||
desynchronising it from the stream of data received.
|
|
||||||
The redis buffer ensures that no data will be loss waiting to be processed.
|
|
||||||
|
|
||||||
:param sub_config_section: -- (str) The name of the section in the config file to get the settings
|
|
||||||
:param channel: -- (str) The listening channel of the Subcriber.
|
|
||||||
|
|
||||||
:return: ZMQSub Object
|
|
||||||
|
|
||||||
:Example:
|
|
||||||
Extract of the config file:
|
|
||||||
[PubSub_Global]
|
|
||||||
adress = tcp://127.0.0.1:5000
|
|
||||||
channel = filelist
|
|
||||||
|
|
||||||
Creating the object and receiving data + pushing to redis (redis buffering):
|
|
||||||
|
|
||||||
r_serv = redis.StrictRedis(
|
|
||||||
host = 127.0.0.1,
|
|
||||||
port = 6387,
|
|
||||||
db = 0)
|
|
||||||
|
|
||||||
channel = cfg.get("PubSub_Global", "channel")
|
|
||||||
MySubscriber = ZMQSub('./packages/config.cfg',"PubSub_Global", channel, "duplicate")
|
|
||||||
|
|
||||||
while True:
|
|
||||||
MySubscriber.get_and_lpush(r_serv)
|
|
||||||
|
|
||||||
|
|
||||||
Inside another script use this line to retrive the data from redis.
|
|
||||||
...
|
|
||||||
while True:
|
|
||||||
MySubscriber.get_msg_from_queue(r_serv)
|
|
||||||
...
|
|
||||||
|
|
||||||
..note:: If you don't want any redis buffering simply use the "get_message" method
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, file_conf, sub_config_section, channel, ps_name):
|
|
||||||
super(ZMQSub, self).__init__(file_conf, "Default", ps_name)
|
|
||||||
|
|
||||||
self._sub_config_section = sub_config_section
|
|
||||||
self._subsocket = self._context_zmq.socket(zmq.SUB)
|
|
||||||
self._sub_adress = self._config_parser.get(self._sub_config_section, "adress")
|
|
||||||
|
|
||||||
self._subsocket.connect(self._config_parser.get(self._sub_config_section, "adress"))
|
|
||||||
|
|
||||||
self._channel = channel
|
|
||||||
self._subsocket.setsockopt(zmq.SUBSCRIBE, self._channel)
|
|
||||||
|
|
||||||
def get_message(self):
|
|
||||||
"""
|
|
||||||
Get the first sent message from a Publisher.
|
|
||||||
:return: (str) Message from Publisher
|
|
||||||
|
|
||||||
"""
|
|
||||||
return self._subsocket.recv()
|
|
||||||
|
|
||||||
def get_and_lpush(self, r_serv):
|
|
||||||
"""
|
|
||||||
Get the first sent message from a Publisher and storing it in redis
|
|
||||||
|
|
||||||
..note:: This function also create a set named "queue" for monitoring needs
|
|
||||||
|
|
||||||
"""
|
|
||||||
r_serv.sadd("queues", self._channel+self._ps_name)
|
|
||||||
r_serv.lpush(self._channel+self._ps_name, self._subsocket.recv())
|
|
||||||
|
|
||||||
def get_msg_from_queue(self, r_serv):
|
|
||||||
"""
|
|
||||||
Get the first sent message from a Redis List
|
|
||||||
|
|
||||||
:return: (str) Message from Publisher
|
|
||||||
|
|
||||||
"""
|
|
||||||
return r_serv.rpop(self._channel+self._ps_name)
|
|
|
@ -1,61 +0,0 @@
|
||||||
[Directories]
|
|
||||||
bloomfilters = /home/user/Blooms/
|
|
||||||
pastes = /home/user/PASTES/
|
|
||||||
|
|
||||||
##### Redis #####
|
|
||||||
[Redis_Cache]
|
|
||||||
host = localhost
|
|
||||||
port = 6379
|
|
||||||
db = 0
|
|
||||||
|
|
||||||
[Redis_Log]
|
|
||||||
host = localhost
|
|
||||||
port = 6380
|
|
||||||
db = 0
|
|
||||||
|
|
||||||
[Redis_Queues]
|
|
||||||
host = localhost
|
|
||||||
port = 6381
|
|
||||||
db = 0
|
|
||||||
|
|
||||||
[Redis_Data_Merging]
|
|
||||||
host = localhost
|
|
||||||
port = 6379
|
|
||||||
db = 1
|
|
||||||
|
|
||||||
##### LevelDB #####
|
|
||||||
[Redis_Level_DB]
|
|
||||||
host = localhost
|
|
||||||
port = 2013
|
|
||||||
db = 0
|
|
||||||
|
|
||||||
[Redis_Level_DB_Hashs]
|
|
||||||
host = localhost
|
|
||||||
port = 2013
|
|
||||||
db = 1
|
|
||||||
|
|
||||||
# PUB / SUB : ZMQ
|
|
||||||
[Feed]
|
|
||||||
adress = tcp://crf.circl.lu:5556
|
|
||||||
topicfilter = 102
|
|
||||||
|
|
||||||
[PubSub_Global]
|
|
||||||
adress = tcp://127.0.0.1:5000
|
|
||||||
channel = filelist
|
|
||||||
|
|
||||||
[PubSub_Longlines]
|
|
||||||
adress = tcp://127.0.0.1:5001
|
|
||||||
channel_0 = Longlines
|
|
||||||
channel_1 = Shortlines
|
|
||||||
|
|
||||||
[PubSub_Words]
|
|
||||||
adress = tcp://127.0.0.1:5002
|
|
||||||
channel_0 = words
|
|
||||||
|
|
||||||
[PubSub_Categ]
|
|
||||||
adress = tcp://127.0.0.1:5003
|
|
||||||
#Channels are dynamic (1 channel per categ)
|
|
||||||
|
|
||||||
[PubSub_Url]
|
|
||||||
adress = tcp://127.0.0.1:5004
|
|
||||||
channel = urls
|
|
|
@ -1,6 +1,6 @@
|
||||||
[Directories]
|
[Directories]
|
||||||
bloomfilters = /home/user/Blooms/
|
bloomfilters = /home/user/Blooms/
|
||||||
pastes = /home/user/PASTES/
|
pastes = PASTES
|
||||||
wordtrending_csv = /home/user/AIL/var/www/static/csv/wordstrendingdata
|
wordtrending_csv = /home/user/AIL/var/www/static/csv/wordstrendingdata
|
||||||
wordsfile = /home/user/AIL/files/wordfile
|
wordsfile = /home/user/AIL/files/wordfile
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ def create_dirfile(r_serv, directory, overwrite):
|
||||||
r_serv.delete("filelist")
|
r_serv.delete("filelist")
|
||||||
|
|
||||||
for x in listdirectory(directory):
|
for x in listdirectory(directory):
|
||||||
r_serv.rpush("filelist", x)
|
r_serv.lpush("filelist", x)
|
||||||
|
|
||||||
publisher.info("The list was overwritten")
|
publisher.info("The list was overwritten")
|
||||||
|
|
||||||
|
@ -53,13 +53,13 @@ def create_dirfile(r_serv, directory, overwrite):
|
||||||
if r_serv.llen("filelist") == 0:
|
if r_serv.llen("filelist") == 0:
|
||||||
|
|
||||||
for x in listdirectory(directory):
|
for x in listdirectory(directory):
|
||||||
r_serv.rpush("filelist", x)
|
r_serv.lpush("filelist", x)
|
||||||
|
|
||||||
publisher.info("New list created")
|
publisher.info("New list created")
|
||||||
else:
|
else:
|
||||||
|
|
||||||
for x in listdirectory(directory):
|
for x in listdirectory(directory):
|
||||||
r_serv.rpush("filelist", x)
|
r_serv.lpush("filelist", x)
|
||||||
|
|
||||||
publisher.info("The list was updated with new elements")
|
publisher.info("The list was updated with new elements")
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue