mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-22 22:27:17 +00:00
Big refactoring, make the queues more flexible
This commit is contained in:
parent
623e876f3b
commit
abfe13436b
40 changed files with 412 additions and 734 deletions
|
@ -30,34 +30,28 @@ import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
config_section = 'PubSub_Global'
|
config_section = 'Attributes'
|
||||||
config_channel = 'channel'
|
|
||||||
subscriber_name = 'attributes'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
p = Process(config_section)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""ZMQ Attribute is Running""")
|
publisher.info("Attribute is Running")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
|
|
||||||
if message is not None:
|
if message is not None:
|
||||||
PST = Paste.Paste(message.split(" ", -1)[-1])
|
PST = Paste.Paste(message)
|
||||||
else:
|
else:
|
||||||
if h.redis_queue_shutdown():
|
publisher.debug("Script Attribute is idling 1s")
|
||||||
print "Shutdown Flag Up: Terminating"
|
time.sleep(1)
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
publisher.debug("Script Attribute is idling 10s")
|
|
||||||
time.sleep(10)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# FIXME do it directly in the class
|
# FIXME do it directly in the class
|
|
@ -42,21 +42,15 @@ import time
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
config_section = 'PubSub_Words'
|
config_section = 'Categ'
|
||||||
config_channel = 'channel_0'
|
|
||||||
subscriber_name = 'categ'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
p = Process(config_section)
|
||||||
|
|
||||||
# Publisher
|
|
||||||
pub_config_section = 'PubSub_Categ'
|
|
||||||
h.zmq_pub(pub_config_section, None)
|
|
||||||
|
|
||||||
# SCRIPT PARSER #
|
# SCRIPT PARSER #
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
|
@ -71,11 +65,11 @@ if __name__ == "__main__":
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info(
|
publisher.info("Script Categ started")
|
||||||
"Script Categ subscribed to channel {}".format(h.sub_channel))
|
|
||||||
|
|
||||||
|
categories = ['CreditCards', 'Mail', 'Onion', 'Web']
|
||||||
tmp_dict = {}
|
tmp_dict = {}
|
||||||
for filename in os.listdir(args.d):
|
for filename in categories:
|
||||||
bname = os.path.basename(filename)
|
bname = os.path.basename(filename)
|
||||||
tmp_dict[bname] = []
|
tmp_dict[bname] = []
|
||||||
with open(os.path.join(args.d, filename), 'r') as f:
|
with open(os.path.join(args.d, filename), 'r') as f:
|
||||||
|
@ -85,9 +79,9 @@ if __name__ == "__main__":
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
if message is not None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
filename, word, score = message.split()
|
||||||
|
|
||||||
if prec_filename is None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
PST = Paste.Paste(filename)
|
PST = Paste.Paste(filename)
|
||||||
|
@ -96,17 +90,14 @@ if __name__ == "__main__":
|
||||||
for categ, words_list in tmp_dict.items():
|
for categ, words_list in tmp_dict.items():
|
||||||
|
|
||||||
if word.lower() in words_list:
|
if word.lower() in words_list:
|
||||||
h.pub_channel = categ
|
msg = '{} {} {}'.format(PST.p_path, word, score)
|
||||||
h.zmq_pub_send('{} {} {}'.format(PST.p_path, word, score))
|
p.populate_set_out(msg, categ)
|
||||||
|
|
||||||
publisher.info(
|
publisher.info(
|
||||||
'Categ;{};{};{};Detected {} "{}"'.format(
|
'Categ;{};{};{};Detected {} "{}"'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name, score, word))
|
PST.p_source, PST.p_date, PST.p_name, score, word))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if h.redis_queue_shutdown():
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
publisher.debug("Script Categ is Idling 10s")
|
publisher.debug("Script Categ is Idling 10s")
|
||||||
|
print 'Sleeping'
|
||||||
time.sleep(10)
|
time.sleep(10)
|
|
@ -6,26 +6,27 @@ from packages import Paste
|
||||||
from packages import lib_refine
|
from packages import lib_refine
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
config_section = 'PubSub_Categ'
|
config_section = 'CreditCards'
|
||||||
config_channel = 'channel_0'
|
|
||||||
subscriber_name = 'cards'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
p = Process(config_section)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Creditcard script subscribed to channel creditcard_categ")
|
publisher.info("Creditcard script subscribed to channel creditcard_categ")
|
||||||
|
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
|
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
|
||||||
|
|
||||||
|
# FIXME For retro compatibility
|
||||||
|
channel = 'creditcard_categ'
|
||||||
|
|
||||||
# mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
# mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
||||||
# visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
# visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
|
||||||
# discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|
|
# discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|
|
||||||
|
@ -37,7 +38,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if message is not None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
filename, word, score = message.split()
|
||||||
|
|
||||||
if prec_filename is None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
creditcard_set = set([])
|
creditcard_set = set([])
|
||||||
|
@ -62,11 +63,8 @@ if __name__ == "__main__":
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if h.redis_queue_shutdown():
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
publisher.debug("Script creditcard is idling 1m")
|
publisher.debug("Script creditcard is idling 1m")
|
||||||
|
print 'Sleeping'
|
||||||
time.sleep(60)
|
time.sleep(60)
|
||||||
|
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
|
@ -29,38 +29,35 @@ from pubsublogger import publisher
|
||||||
from packages import lib_words
|
from packages import lib_words
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
config_section = 'PubSub_Words'
|
config_section = 'Curve'
|
||||||
config_channel = 'channel_0'
|
p = Process(config_section)
|
||||||
subscriber_name = "curve"
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv1 = redis.StrictRedis(
|
r_serv1 = redis.StrictRedis(
|
||||||
host=h.config.get("Redis_Level_DB", "host"),
|
host=p.config.get("Redis_Level_DB", "host"),
|
||||||
port=h.config.get("Redis_Level_DB", "port"),
|
port=p.config.get("Redis_Level_DB", "port"),
|
||||||
db=h.config.get("Redis_Level_DB", "db"))
|
db=p.config.get("Redis_Level_DB", "db"))
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script Curve subscribed to {}".format(h.sub_channel))
|
publisher.info("Script Curve started")
|
||||||
|
|
||||||
# FILE CURVE SECTION #
|
# FILE CURVE SECTION #
|
||||||
csv_path = os.path.join(os.environ['AIL_HOME'],
|
csv_path = os.path.join(os.environ['AIL_HOME'],
|
||||||
h.config.get("Directories", "wordtrending_csv"))
|
p.config.get("Directories", "wordtrending_csv"))
|
||||||
wordfile_path = os.path.join(os.environ['AIL_HOME'],
|
wordfile_path = os.path.join(os.environ['AIL_HOME'],
|
||||||
h.config.get("Directories", "wordsfile"))
|
p.config.get("Directories", "wordsfile"))
|
||||||
|
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
while True:
|
while True:
|
||||||
if message is not None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
filename, word, score = message.split()
|
||||||
if prec_filename is None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
PST = Paste.Paste(filename)
|
PST = Paste.Paste(filename)
|
||||||
lib_words.create_curve_with_word_file(
|
lib_words.create_curve_with_word_file(
|
||||||
|
@ -69,7 +66,6 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
prev_score = r_serv1.hget(word.lower(), PST.p_date)
|
prev_score = r_serv1.hget(word.lower(), PST.p_date)
|
||||||
print prev_score
|
|
||||||
if prev_score is not None:
|
if prev_score is not None:
|
||||||
r_serv1.hset(word.lower(), PST.p_date,
|
r_serv1.hset(word.lower(), PST.p_date,
|
||||||
int(prev_score) + int(score))
|
int(prev_score) + int(score))
|
||||||
|
@ -77,12 +73,7 @@ if __name__ == "__main__":
|
||||||
r_serv1.hset(word.lower(), PST.p_date, score)
|
r_serv1.hset(word.lower(), PST.p_date, score)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if h.redis_queue_shutdown():
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
publisher.debug("Script Curve is Idling")
|
publisher.debug("Script Curve is Idling")
|
||||||
print "sleepin"
|
print "sleeping"
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
message = p.get_from_set()
|
||||||
message = h.redis_rpop()
|
|
|
@ -19,17 +19,15 @@ from packages import Paste
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from pybloomfilter import BloomFilter
|
from pybloomfilter import BloomFilter
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
config_section = 'PubSub_Global'
|
config_section = 'Duplicates'
|
||||||
config_channel = 'channel'
|
|
||||||
subscriber_name = 'duplicate'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
p = Process(config_section)
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
# DB OBJECT & HASHS ( DISK )
|
# DB OBJECT & HASHS ( DISK )
|
||||||
|
@ -38,16 +36,15 @@ if __name__ == "__main__":
|
||||||
for year in xrange(2013, 2015):
|
for year in xrange(2013, 2015):
|
||||||
for month in xrange(0, 16):
|
for month in xrange(0, 16):
|
||||||
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
|
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
|
||||||
host=h.config.get("Redis_Level_DB", "host"), port=year,
|
host=p.config.get("Redis_Level_DB", "host"), port=year,
|
||||||
db=month)
|
db=month)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("""Script duplicate subscribed to channel {0}""".format(
|
publisher.info("Script duplicate started")
|
||||||
h.config.get("PubSub_Global", "channel")))
|
|
||||||
|
|
||||||
set_limit = 100
|
set_limit = 100
|
||||||
bloompath = os.path.join(os.environ['AIL_HOME'],
|
bloompath = os.path.join(os.environ['AIL_HOME'],
|
||||||
h.config.get("Directories", "bloomfilters"))
|
p.config.get("Directories", "bloomfilters"))
|
||||||
|
|
||||||
bloop_path_set = set()
|
bloop_path_set = set()
|
||||||
while True:
|
while True:
|
||||||
|
@ -59,17 +56,13 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
x = time.time()
|
x = time.time()
|
||||||
|
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
if message is not None:
|
if message is not None:
|
||||||
path = message.split(" ", -1)[-1]
|
path = message
|
||||||
PST = Paste.Paste(path)
|
PST = Paste.Paste(path)
|
||||||
else:
|
else:
|
||||||
publisher.debug("Script Attribute is idling 10s")
|
publisher.debug("Script Attribute is idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
if h.redis_queue_shutdown():
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
PST._set_p_hash_kind("md5")
|
PST._set_p_hash_kind("md5")
|
|
@ -25,56 +25,44 @@ import os
|
||||||
import time
|
import time
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == '__main__':
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = 'Script'
|
||||||
|
|
||||||
config_section = 'Feed'
|
config_section = 'Global'
|
||||||
config_channel = 'topicfilter'
|
|
||||||
subscriber_name = 'feed'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
p = Process(config_section)
|
||||||
|
|
||||||
# Publisher
|
|
||||||
pub_config_section = "PubSub_Global"
|
|
||||||
pub_config_channel = 'channel'
|
|
||||||
h.zmq_pub(pub_config_section, pub_config_channel)
|
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.info("Feed Script started to receive & publish.")
|
publisher.info("Feed Script started to receive & publish.")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
# Recovering the streamed message informations.
|
# Recovering the streamed message informations.
|
||||||
if message is not None:
|
if message is not None:
|
||||||
if len(message.split()) == 3:
|
splitted = message.split()
|
||||||
topic, paste, gzip64encoded = message.split()
|
if len(splitted) == 2:
|
||||||
print paste
|
paste, gzip64encoded = splitted
|
||||||
else:
|
else:
|
||||||
# TODO Store the name of the empty paste inside a Redis-list.
|
# TODO Store the name of the empty paste inside a Redis-list.
|
||||||
print "Empty Paste: not processed"
|
print "Empty Paste: not processed"
|
||||||
publisher.debug("Empty Paste: {0} not processed".format(paste))
|
publisher.debug("Empty Paste: {0} not processed".format(paste))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
if h.redis_queue_shutdown():
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
print "Empty Queues: Waiting..."
|
print "Empty Queues: Waiting..."
|
||||||
time.sleep(10)
|
time.sleep(1)
|
||||||
continue
|
continue
|
||||||
# Creating the full filepath
|
# Creating the full filepath
|
||||||
filename = os.path.join(os.environ['AIL_HOME'],
|
filename = os.path.join(os.environ['AIL_HOME'],
|
||||||
h.config.get("Directories", "pastes"), paste)
|
p.config.get("Directories", "pastes"), paste)
|
||||||
dirname = os.path.dirname(filename)
|
dirname = os.path.dirname(filename)
|
||||||
if not os.path.exists(dirname):
|
if not os.path.exists(dirname):
|
||||||
os.makedirs(dirname)
|
os.makedirs(dirname)
|
||||||
|
|
||||||
with open(filename, 'wb') as f:
|
with open(filename, 'wb') as f:
|
||||||
f.write(base64.standard_b64decode(gzip64encoded))
|
f.write(base64.standard_b64decode(gzip64encoded))
|
||||||
|
p.populate_set_out(filename)
|
||||||
h.zmq_pub_send(filename)
|
|
180
bin/Helper.py
180
bin/Helper.py
|
@ -15,74 +15,144 @@ import redis
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
import os
|
import os
|
||||||
import zmq
|
import zmq
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
class Redis_Queues(object):
|
class PubSub(object):
|
||||||
|
|
||||||
def __init__(self, conf_section, conf_channel, subscriber_name):
|
def __init__(self):
|
||||||
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||||
print configfile
|
|
||||||
if not os.path.exists(configfile):
|
if not os.path.exists(configfile):
|
||||||
raise Exception('Unable to find the configuration file. \
|
raise Exception('Unable to find the configuration file. \
|
||||||
Did you set environment variables? \
|
Did you set environment variables? \
|
||||||
Or activate the virtualenv.')
|
Or activate the virtualenv.')
|
||||||
self.config = ConfigParser.ConfigParser()
|
self.config = ConfigParser.ConfigParser()
|
||||||
self.config.read(configfile)
|
self.config.read(configfile)
|
||||||
self.subscriber_name = subscriber_name
|
self.redis_sub = False
|
||||||
|
self.zmq_sub = False
|
||||||
|
self.subscriber = None
|
||||||
|
self.publishers = {'Redis': [], 'ZMQ': []}
|
||||||
|
|
||||||
self.sub_channel = self.config.get(conf_section, conf_channel)
|
def setup_subscribe(self, conn_name):
|
||||||
self.sub_address = self.config.get(conf_section, 'address')
|
if self.config.has_section(conn_name):
|
||||||
self.redis_channel = self.sub_channel + self.subscriber_name
|
channel = self.config.get(conn_name, 'channel')
|
||||||
|
|
||||||
# Redis Queue
|
|
||||||
config_section = "Redis_Queues"
|
|
||||||
self.r_queues = redis.StrictRedis(
|
|
||||||
host=self.config.get(config_section, "host"),
|
|
||||||
port=self.config.getint(config_section, "port"),
|
|
||||||
db=self.config.getint(config_section, "db"))
|
|
||||||
|
|
||||||
def zmq_sub(self):
|
|
||||||
context = zmq.Context()
|
|
||||||
self.sub_socket = context.socket(zmq.SUB)
|
|
||||||
self.sub_socket.connect(self.sub_address)
|
|
||||||
self.sub_socket.setsockopt(zmq.SUBSCRIBE, self.sub_channel)
|
|
||||||
|
|
||||||
def zmq_pub(self, config_section, config_channel):
|
|
||||||
context = zmq.Context()
|
|
||||||
self.pub_socket = context.socket(zmq.PUB)
|
|
||||||
self.pub_socket.bind(self.config.get(config_section, 'adress'))
|
|
||||||
if config_channel is not None:
|
|
||||||
self.pub_channel = self.config.get(config_section, config_channel)
|
|
||||||
else:
|
else:
|
||||||
# The publishing channel is defined dynamically
|
channel = conn_name.split('_')[1]
|
||||||
self.pub_channel = None
|
if conn_name.startswith('Redis'):
|
||||||
|
self.redis_sub = True
|
||||||
|
r = redis.StrictRedis(
|
||||||
|
host=self.config.get('RedisPubSub', 'host'),
|
||||||
|
port=self.config.get('RedisPubSub', 'port'),
|
||||||
|
db=self.config.get('RedisPubSub', 'db'))
|
||||||
|
self.subscriber = r.pubsub()
|
||||||
|
self.subscriber.psubscribe(channel)
|
||||||
|
self.subscriber.get_message()
|
||||||
|
elif conn_name.startswith('ZMQ'):
|
||||||
|
self.zmq_sub = True
|
||||||
|
context = zmq.Context()
|
||||||
|
self.subscriber = context.socket(zmq.SUB)
|
||||||
|
self.subscriber.connect(self.config.get(conn_name, 'address'))
|
||||||
|
self.subscriber.setsockopt(zmq.SUBSCRIBE, channel)
|
||||||
|
|
||||||
def zmq_pub_send(self, msg):
|
def setup_publish(self, conn_name):
|
||||||
if self.pub_channel is None:
|
if self.config.has_section(conn_name):
|
||||||
raise Exception('A channel is reqired to send a message.')
|
channel = self.config.get(conn_name, 'channel')
|
||||||
self.pub_socket.send('{} {}'.format(self.pub_channel, msg))
|
|
||||||
|
|
||||||
def redis_rpop(self):
|
|
||||||
return self.r_queues.rpop(self.redis_channel)
|
|
||||||
|
|
||||||
def redis_queue_shutdown(self, is_queue=False):
|
|
||||||
if is_queue:
|
|
||||||
flag = self.subscriber_name + '_Q'
|
|
||||||
else:
|
else:
|
||||||
flag = self.subscriber_name
|
channel = conn_name.split('_')[1]
|
||||||
# srem returns False if the element does not exists
|
if conn_name.startswith('Redis'):
|
||||||
return self.r_queues.srem('SHUTDOWN_FLAGS', flag)
|
r = redis.StrictRedis(host=self.config.get('RedisPubSub', 'host'),
|
||||||
|
port=self.config.get('RedisPubSub', 'port'),
|
||||||
|
db=self.config.get('RedisPubSub', 'db'))
|
||||||
|
self.publishers['Redis'].append((r, channel))
|
||||||
|
elif conn_name.startswith('ZMQ'):
|
||||||
|
context = zmq.Context()
|
||||||
|
p = context.socket(zmq.PUB)
|
||||||
|
p.bind(self.config.get(conn_name, 'address'))
|
||||||
|
self.publishers['ZMQ'].append((p, channel))
|
||||||
|
|
||||||
def redis_queue_subscribe(self, publisher):
|
def publish(self, message):
|
||||||
self.zmq_sub()
|
m = json.loads(message)
|
||||||
publisher.info("Suscribed to channel {}".format(self.sub_channel))
|
channel_message = m.get('channel')
|
||||||
|
for p, channel in self.publishers['Redis']:
|
||||||
|
if channel_message is None or channel_message == channel:
|
||||||
|
p.publish(channel, m['message'])
|
||||||
|
for p, channel in self.publishers['ZMQ']:
|
||||||
|
if channel_message is None or channel_message == channel:
|
||||||
|
p.send('{} {}'.format(channel, m['message']))
|
||||||
|
|
||||||
|
def subscribe(self):
|
||||||
|
if self.redis_sub:
|
||||||
|
for msg in self.subscriber.listen():
|
||||||
|
if msg.get('data', None) is not None:
|
||||||
|
yield msg['data']
|
||||||
|
elif self.zmq_sub:
|
||||||
|
while True:
|
||||||
|
msg = self.subscriber.recv()
|
||||||
|
yield msg.split(' ', 1)[1]
|
||||||
|
else:
|
||||||
|
raise Exception('No subscribe function defined')
|
||||||
|
|
||||||
|
|
||||||
|
class Process(object):
|
||||||
|
|
||||||
|
def __init__(self, conf_section):
|
||||||
|
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||||
|
if not os.path.exists(configfile):
|
||||||
|
raise Exception('Unable to find the configuration file. \
|
||||||
|
Did you set environment variables? \
|
||||||
|
Or activate the virtualenv.')
|
||||||
|
modulesfile = os.path.join(os.environ['AIL_BIN'], 'packages/modules.cfg')
|
||||||
|
self.config = ConfigParser.ConfigParser()
|
||||||
|
self.config.read(configfile)
|
||||||
|
self.modules = ConfigParser.ConfigParser()
|
||||||
|
self.modules.read(modulesfile)
|
||||||
|
self.subscriber_name = conf_section
|
||||||
|
self.pubsub = None
|
||||||
|
if self.modules.has_section(conf_section):
|
||||||
|
self.pubsub = PubSub()
|
||||||
|
else:
|
||||||
|
raise Exception('Your process has to listen to at least one feed.')
|
||||||
|
self.r_temp = redis.StrictRedis(
|
||||||
|
host=self.config.get('RedisPubSub', 'host'),
|
||||||
|
port=self.config.get('RedisPubSub', 'port'),
|
||||||
|
db=self.config.get('RedisPubSub', 'db'))
|
||||||
|
|
||||||
|
def populate_set_in(self):
|
||||||
|
# monoproc
|
||||||
|
src = self.modules.get(self.subscriber_name, 'subscribe')
|
||||||
|
self.pubsub.setup_subscribe(src)
|
||||||
|
for msg in self.pubsub.subscribe():
|
||||||
|
in_set = self.subscriber_name + 'in'
|
||||||
|
self.r_temp.sadd(in_set, msg)
|
||||||
|
self.r_temp.hset('queues', self.subscriber_name,
|
||||||
|
int(self.r_temp.scard(in_set)))
|
||||||
|
|
||||||
|
def get_from_set(self):
|
||||||
|
# multiproc
|
||||||
|
in_set = self.subscriber_name + 'in'
|
||||||
|
self.r_temp.hset('queues', self.subscriber_name,
|
||||||
|
int(self.r_temp.scard(in_set)))
|
||||||
|
return self.r_temp.spop(in_set)
|
||||||
|
|
||||||
|
def populate_set_out(self, msg, channel=None):
|
||||||
|
# multiproc
|
||||||
|
msg = {'message': msg}
|
||||||
|
if channel is not None:
|
||||||
|
msg.update({'channel': channel})
|
||||||
|
self.r_temp.sadd(self.subscriber_name + 'out', json.dumps(msg))
|
||||||
|
|
||||||
|
def publish(self):
|
||||||
|
# monoproc
|
||||||
|
if not self.modules.has_option(self.subscriber_name, 'publish'):
|
||||||
|
return False
|
||||||
|
dest = self.modules.get(self.subscriber_name, 'publish')
|
||||||
|
# We can have multiple publisher
|
||||||
|
for name in dest.split(','):
|
||||||
|
self.pubsub.setup_publish(name)
|
||||||
while True:
|
while True:
|
||||||
msg = self.sub_socket.recv()
|
message = self.r_temp.spop(self.subscriber_name + 'out')
|
||||||
p = self.r_queues.pipeline()
|
if message is None:
|
||||||
p.sadd("queues", self.redis_channel)
|
time.sleep(1)
|
||||||
p.lpush(self.redis_channel, msg)
|
continue
|
||||||
p.execute()
|
self.pubsub.publish(message)
|
||||||
if self.redis_queue_shutdown(True):
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
|
|
|
@ -17,26 +17,21 @@ from whoosh.index import create_in, exists_in, open_dir
|
||||||
from whoosh.fields import Schema, TEXT, ID
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
# Subscriber
|
config_section = 'Indexer'
|
||||||
sub_config_section = 'PubSub_Global'
|
|
||||||
sub_name = 'indexer'
|
|
||||||
|
|
||||||
config_section = 'PubSub_Global'
|
p = Process(config_section)
|
||||||
config_channel = 'channel'
|
|
||||||
subscriber_name = 'indexer'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
|
|
||||||
# Indexer configuration - index dir and schema setup
|
# Indexer configuration - index dir and schema setup
|
||||||
indexpath = h.config.get("Indexer", "path")
|
indexpath = os.path.join(os.environ['AIL_HOME'],
|
||||||
indexertype = h.config.get("Indexer", "type")
|
p.config.get("Indexer", "path"))
|
||||||
|
indexertype = p.config.get("Indexer", "type")
|
||||||
if indexertype == "whoosh":
|
if indexertype == "whoosh":
|
||||||
schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
|
schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
|
||||||
unique=True),
|
unique=True),
|
||||||
|
@ -49,18 +44,16 @@ if __name__ == "__main__":
|
||||||
ix = open_dir(indexpath)
|
ix = open_dir(indexpath)
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.info("""ZMQ Indexer is Running""")
|
publisher.info("ZMQ Indexer is Running")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
|
|
||||||
if message is not None:
|
if message is not None:
|
||||||
PST = Paste.Paste(message.split(" ", -1)[-1])
|
PST = Paste.Paste(message)
|
||||||
else:
|
else:
|
||||||
if h.redis_queue_shutdown():
|
publisher.debug("Script Indexer is idling 1s")
|
||||||
break
|
|
||||||
publisher.debug("Script Indexer is idling 10s")
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
continue
|
continue
|
||||||
docpath = message.split(" ", -1)[-1]
|
docpath = message.split(" ", -1)[-1]
|
|
@ -32,26 +32,19 @@ import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == '__main__':
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = 'Script'
|
||||||
|
|
||||||
config_section = 'PubSub_Global'
|
config_section = 'Lines'
|
||||||
config_channel = 'channel'
|
p = Process(config_section)
|
||||||
subscriber_name = 'line'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
|
|
||||||
# Publisher
|
|
||||||
pub_config_section = 'PubSub_Longlines'
|
|
||||||
h.zmq_pub(pub_config_section, None)
|
|
||||||
|
|
||||||
# SCRIPT PARSER #
|
# SCRIPT PARSER #
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='''This script is a part of the Analysis Information \
|
description='This script is a part of the Analysis Information \
|
||||||
Leak framework.''')
|
Leak framework.')
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-max', type=int, default=500,
|
'-max', type=int, default=500,
|
||||||
|
@ -60,24 +53,18 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
channel_0 = h.config.get("PubSub_Longlines", "channel_0")
|
|
||||||
channel_1 = h.config.get("PubSub_Longlines", "channel_1")
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
tmp_string = "Lines script Subscribed to channel {} and Start to publish \
|
tmp_string = "Lines script Subscribed to channel {} and Start to publish \
|
||||||
on channel {}, {}"
|
on channel Longlines, Shortlines"
|
||||||
publisher.info(tmp_string.format(h.sub_channel, channel_0, channel_1))
|
publisher.info(tmp_string)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
|
print message
|
||||||
if message is not None:
|
if message is not None:
|
||||||
PST = Paste.Paste(message.split(" ", -1)[-1])
|
PST = Paste.Paste(message)
|
||||||
else:
|
else:
|
||||||
if h.redis_queue_shutdown():
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
publisher.debug("Tokeniser is idling 10s")
|
publisher.debug("Tokeniser is idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
continue
|
continue
|
||||||
|
@ -89,10 +76,9 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
# FIXME Not used.
|
# FIXME Not used.
|
||||||
PST.store.sadd("Pastes_Objects", PST.p_path)
|
PST.store.sadd("Pastes_Objects", PST.p_path)
|
||||||
if lines_infos[1] >= args.max:
|
if lines_infos[1] < args.max:
|
||||||
h.pub_channel = channel_0
|
p.populate_set_out(PST.p_path, 'LinesShort')
|
||||||
else:
|
else:
|
||||||
h.pub_channel = channel_1
|
p.populate_set_out(PST.p_path, 'LinesLong')
|
||||||
h.zmq_pub_send(PST.p_path)
|
|
||||||
except IOError:
|
except IOError:
|
||||||
print "CRC Checksum Error on : ", PST.p_path
|
print "CRC Checksum Error on : ", PST.p_path
|
|
@ -9,28 +9,29 @@ from packages import Paste
|
||||||
from packages import lib_refine
|
from packages import lib_refine
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
config_section = 'PubSub_Categ'
|
config_section = 'Mail'
|
||||||
config_channel = 'channel_1'
|
|
||||||
subscriber_name = 'emails'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
p = Process(config_section)
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv2 = redis.StrictRedis(
|
r_serv2 = redis.StrictRedis(
|
||||||
host=h.config.get("Redis_Cache", "host"),
|
host=p.config.get("Redis_Cache", "host"),
|
||||||
port=h.config.getint("Redis_Cache", "port"),
|
port=p.config.getint("Redis_Cache", "port"),
|
||||||
db=h.config.getint("Redis_Cache", "db"))
|
db=p.config.getint("Redis_Cache", "db"))
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Suscribed to channel mails_categ")
|
publisher.info("Suscribed to channel mails_categ")
|
||||||
|
|
||||||
message = h.redis_rpop()
|
# FIXME For retro compatibility
|
||||||
|
channel = 'mails_categ'
|
||||||
|
|
||||||
|
message = p.get_from_set()
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
# Log as critical if there are more that that amout of valid emails
|
# Log as critical if there are more that that amout of valid emails
|
||||||
|
@ -41,7 +42,8 @@ if __name__ == "__main__":
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
if message is not None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
print message
|
||||||
|
filename, word, score = message.split()
|
||||||
|
|
||||||
if prec_filename is None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
PST = Paste.Paste(filename)
|
PST = Paste.Paste(filename)
|
||||||
|
@ -65,14 +67,11 @@ if __name__ == "__main__":
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if h.redis_queue_shutdown():
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
publisher.debug("Script Mails is Idling 10s")
|
publisher.debug("Script Mails is Idling 10s")
|
||||||
|
print 'Sleeping'
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
except dns.exception.Timeout:
|
except dns.exception.Timeout:
|
||||||
# FIXME retry!
|
# FIXME retry!
|
||||||
print "dns.exception.Timeout"
|
print "dns.exception.Timeout"
|
|
@ -27,32 +27,34 @@ from packages import Paste
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
config_section = 'PubSub_Categ'
|
config_section = 'Onion'
|
||||||
config_channel = 'channel_2'
|
|
||||||
subscriber_name = 'tor'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
p = Process(config_section)
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script subscribed to channel onion_categ")
|
publisher.info("Script subscribed to channel onion_categ")
|
||||||
|
|
||||||
|
# FIXME For retro compatibility
|
||||||
|
channel = 'onion_categ'
|
||||||
|
|
||||||
# Getting the first message from redis.
|
# Getting the first message from redis.
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
# Thanks to Faup project for this regex
|
# Thanks to Faup project for this regex
|
||||||
# https://github.com/stricaud/faup
|
# https://github.com/stricaud/faup
|
||||||
url_regex = "([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|onion|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
|
url_regex = "([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if message is not None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
print message
|
||||||
|
filename, word, score = message.split()
|
||||||
|
|
||||||
# "For each new paste"
|
# "For each new paste"
|
||||||
if prec_filename is None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
|
@ -64,8 +66,7 @@ if __name__ == "__main__":
|
||||||
credential, subdomain, domain, host, tld, port, \
|
credential, subdomain, domain, host, tld, port, \
|
||||||
resource_path, query_string, f1, f2, f3, f4 = x
|
resource_path, query_string, f1, f2, f3, f4 = x
|
||||||
|
|
||||||
if f1 == "onion":
|
domains_list.append(domain)
|
||||||
domains_list.append(domain)
|
|
||||||
|
|
||||||
# Saving the list of extracted onion domains.
|
# Saving the list of extracted onion domains.
|
||||||
PST.__setattr__(channel, domains_list)
|
PST.__setattr__(channel, domains_list)
|
||||||
|
@ -83,11 +84,8 @@ if __name__ == "__main__":
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if h.redis_queue_shutdown():
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
publisher.debug("Script url is Idling 10s")
|
publisher.debug("Script url is Idling 10s")
|
||||||
|
print 'Sleeping'
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
24
bin/QueueIn.py
Executable file
24
bin/QueueIn.py
Executable file
|
@ -0,0 +1,24 @@
|
||||||
|
#!/usr/bin/env python2
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
|
||||||
|
from pubsublogger import publisher
|
||||||
|
from Helper import Process
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def run(config_section):
|
||||||
|
p = Process(config_section)
|
||||||
|
p.populate_set_in()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
publisher.port = 6380
|
||||||
|
publisher.channel = 'Queuing'
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Entry queue for a module.')
|
||||||
|
parser.add_argument("-c", "--config_section", type=str,
|
||||||
|
help="Config section to use in the config file.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
run(args.config_section)
|
24
bin/QueueOut.py
Executable file
24
bin/QueueOut.py
Executable file
|
@ -0,0 +1,24 @@
|
||||||
|
#!/usr/bin/env python2
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
from pubsublogger import publisher
|
||||||
|
from Helper import Process
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def run(config_section):
|
||||||
|
p = Process(config_section)
|
||||||
|
if not p.publish():
|
||||||
|
print(config_section, 'has no publisher.')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
publisher.port = 6380
|
||||||
|
publisher.channel = 'Queuing'
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Entry queue for a module.')
|
||||||
|
parser.add_argument("-c", "--config_section", type=str,
|
||||||
|
help="Config section to use in the config file.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
run(args.config_section)
|
|
@ -27,40 +27,28 @@ import time
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
config_section = 'PubSub_Longlines'
|
config_section = 'Tokenize'
|
||||||
config_channel = 'channel_1'
|
p = Process(config_section)
|
||||||
subscriber_name = 'tokenize'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
|
|
||||||
# Publisher
|
|
||||||
pub_config_section = 'PubSub_Words'
|
|
||||||
pub_config_channel = 'channel_0'
|
|
||||||
h.zmq_pub(pub_config_section, pub_config_channel)
|
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.info("Tokeniser subscribed to channel {}".format(h.sub_channel))
|
publisher.info("Tokeniser started")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
print message
|
print message
|
||||||
if message is not None:
|
if message is not None:
|
||||||
paste = Paste.Paste(message.split(" ", -1)[-1])
|
paste = Paste.Paste(message)
|
||||||
for word, score in paste._get_top_words().items():
|
for word, score in paste._get_top_words().items():
|
||||||
if len(word) >= 4:
|
if len(word) >= 4:
|
||||||
h.zmq_pub_send('{} {} {}'.format(paste.p_path, word,
|
msg = '{} {} {}'.format(paste.p_path, word, score)
|
||||||
score))
|
p.populate_set_out(msg)
|
||||||
else:
|
else:
|
||||||
if h.redis_queue_shutdown():
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
publisher.debug("Tokeniser is idling 10s")
|
publisher.debug("Tokeniser is idling 10s")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
print "sleepin"
|
print "sleepin"
|
|
@ -14,36 +14,32 @@ import socket
|
||||||
import pycountry
|
import pycountry
|
||||||
import ipaddress
|
import ipaddress
|
||||||
|
|
||||||
import Helper
|
from Helper import Process
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
config_section = 'PubSub_Categ'
|
config_section = 'Web'
|
||||||
config_channel = 'channel_3'
|
|
||||||
subscriber_name = "urls"
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
p = Process(config_section)
|
||||||
|
|
||||||
# Publisher
|
|
||||||
pub_config_section = "PubSub_Url"
|
|
||||||
pub_config_channel = 'channel'
|
|
||||||
h.zmq_pub(pub_config_section, pub_config_channel)
|
|
||||||
|
|
||||||
# REDIS #
|
# REDIS #
|
||||||
r_serv2 = redis.StrictRedis(
|
r_serv2 = redis.StrictRedis(
|
||||||
host=h.config.get("Redis_Cache", "host"),
|
host=p.config.get("Redis_Cache", "host"),
|
||||||
port=h.config.getint("Redis_Cache", "port"),
|
port=p.config.getint("Redis_Cache", "port"),
|
||||||
db=h.config.getint("Redis_Cache", "db"))
|
db=p.config.getint("Redis_Cache", "db"))
|
||||||
|
|
||||||
# Country to log as critical
|
# Country to log as critical
|
||||||
cc_critical = h.config.get("PubSub_Url", "cc_critical")
|
cc_critical = p.config.get("PubSub_Url", "cc_critical")
|
||||||
|
|
||||||
# FUNCTIONS #
|
# FUNCTIONS #
|
||||||
publisher.info("Script URL subscribed to channel web_categ")
|
publisher.info("Script URL subscribed to channel web_categ")
|
||||||
|
|
||||||
message = h.redis_rpop()
|
# FIXME For retro compatibility
|
||||||
|
channel = 'web_categ'
|
||||||
|
|
||||||
|
message = p.get_from_set()
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
|
|
||||||
url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
|
url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
|
||||||
|
@ -51,7 +47,7 @@ if __name__ == "__main__":
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
if message is not None:
|
if message is not None:
|
||||||
channel, filename, word, score = message.split()
|
filename, word, score = message.split()
|
||||||
|
|
||||||
if prec_filename is None or filename != prec_filename:
|
if prec_filename is None or filename != prec_filename:
|
||||||
domains_list = []
|
domains_list = []
|
||||||
|
@ -62,7 +58,7 @@ if __name__ == "__main__":
|
||||||
port, resource_path, query_string, f1, f2, f3, \
|
port, resource_path, query_string, f1, f2, f3, \
|
||||||
f4 = x
|
f4 = x
|
||||||
domains_list.append(domain)
|
domains_list.append(domain)
|
||||||
h.zmq_pub_send(str(x))
|
p.populate_set_out(x, 'Url')
|
||||||
publisher.debug('{} Published'.format(x))
|
publisher.debug('{} Published'.format(x))
|
||||||
|
|
||||||
if f1 == "onion":
|
if f1 == "onion":
|
||||||
|
@ -110,13 +106,10 @@ if __name__ == "__main__":
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if h.redis_queue_shutdown():
|
|
||||||
print "Shutdown Flag Up: Terminating"
|
|
||||||
publisher.warning("Shutdown Flag Up: Terminating.")
|
|
||||||
break
|
|
||||||
publisher.debug("Script url is Idling 10s")
|
publisher.debug("Script url is Idling 10s")
|
||||||
|
print 'Sleeping'
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
message = h.redis_rpop()
|
message = p.get_from_set()
|
||||||
except dns.exception.Timeout:
|
except dns.exception.Timeout:
|
||||||
print "dns.exception.Timeout", A_values
|
print "dns.exception.Timeout", A_values
|
|
@ -1,37 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
"""
|
|
||||||
The ZMQ_Feed_Q Module
|
|
||||||
=====================
|
|
||||||
|
|
||||||
This module is the first of the ZMQ tree processing.
|
|
||||||
It's subscribe to a data stream and put the received messages
|
|
||||||
into a Redis-list waiting to be popped later by others scripts
|
|
||||||
|
|
||||||
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
|
||||||
the same Subscriber name in both of them.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
*Need running Redis instances.
|
|
||||||
*Messages from the stream should be formated as follow:
|
|
||||||
"channel_name"+" "+/path/to/the/paste.gz+" "base64_data_encoded_paste"
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
config_section = 'Feed'
|
|
||||||
config_channel = 'topicfilter'
|
|
||||||
subscriber_name = 'feed'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -1,35 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
"""
|
|
||||||
The ZMQ_PubSub_Categ_Q Module
|
|
||||||
============================
|
|
||||||
|
|
||||||
This module subscribe to a Publisher stream and put the received messages
|
|
||||||
into a Redis-list waiting to be popped later by others scripts.
|
|
||||||
|
|
||||||
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
|
||||||
the same Subscriber name in both of them.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
*Running Redis instances.
|
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = 'Queuing'
|
|
||||||
|
|
||||||
config_section = 'PubSub_Words'
|
|
||||||
config_channel = 'channel_0'
|
|
||||||
subscriber_name = 'categ'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -1,34 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
"""
|
|
||||||
The ZMQ_PubSub_Line_Q Module
|
|
||||||
============================
|
|
||||||
|
|
||||||
This module subscribe to a Publisher stream and put the received messages
|
|
||||||
into a Redis-list waiting to be popped later by others scripts.
|
|
||||||
|
|
||||||
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
|
||||||
the same Subscriber name in both of them.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
*Running Redis instances.
|
|
||||||
*Should register to the Publisher "ZMQ_Feed"
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
config_section = "PubSub_Global"
|
|
||||||
config_channel = 'channel'
|
|
||||||
subscriber_name = 'line'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -1,35 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
"""
|
|
||||||
The ZMQ_PubSub_Tokenize_Q Module
|
|
||||||
============================
|
|
||||||
|
|
||||||
This module subscribe to a Publisher stream and put the received messages
|
|
||||||
into a Redis-list waiting to be popped later by others scripts.
|
|
||||||
|
|
||||||
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
|
||||||
the same Subscriber name in both of them.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
*Running Redis instances.
|
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Line" channel 1
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
config_section = 'PubSub_Longlines'
|
|
||||||
config_channel = 'channel_1'
|
|
||||||
subscriber_name = 'tokenize'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -1,54 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
"""
|
|
||||||
The ZMQ_Pub_Global Module
|
|
||||||
=========================
|
|
||||||
|
|
||||||
This module is consuming the Redis-list created by the script ./Dir.py.
|
|
||||||
This module is as the same level of the ZMQ tree than the Module ZMQ_Feed
|
|
||||||
|
|
||||||
Whereas the ZMQ_Feed is poping the list created in redis by ZMQ_Feed_Q which is
|
|
||||||
listening a stream, ZMQ_Pub_Global is poping the list created in redis by
|
|
||||||
./Dir.py.
|
|
||||||
|
|
||||||
Thanks to this Module there is now two way to Feed the ZMQ tree:
|
|
||||||
*By a continuous stream ..seealso:: ZMQ_Feed Module
|
|
||||||
*Manually with this module and ./Dir.py script.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
*Need running Redis instances. (Redis)
|
|
||||||
|
|
||||||
"""
|
|
||||||
import time
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Global"
|
|
||||||
|
|
||||||
config_section = 'PubSub_Global'
|
|
||||||
config_channel = 'channel'
|
|
||||||
subscriber_name = 'global'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
|
|
||||||
# Publisher
|
|
||||||
pub_config_section = 'PubSub_Global'
|
|
||||||
pub_config_channel = 'channel'
|
|
||||||
h.zmq_pub(pub_config_section, pub_config_channel)
|
|
||||||
|
|
||||||
# LOGGING #
|
|
||||||
publisher.info("Starting to publish.")
|
|
||||||
|
|
||||||
while True:
|
|
||||||
filename = h.redis_rpop()
|
|
||||||
|
|
||||||
if filename is not None:
|
|
||||||
h.zmq_pub_send(filename)
|
|
||||||
else:
|
|
||||||
time.sleep(10)
|
|
||||||
publisher.debug("Nothing to publish")
|
|
|
@ -1,35 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
"""
|
|
||||||
The ZMQ_Sub_Attributes_Q Module
|
|
||||||
============================
|
|
||||||
|
|
||||||
This module subscribe to a Publisher stream and put the received messages
|
|
||||||
into a Redis-list waiting to be popped later by others scripts.
|
|
||||||
|
|
||||||
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
|
||||||
the same Subscriber name in both of them.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
*Running Redis instances.
|
|
||||||
*Should register to the Publisher "ZMQ_Feed"
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
config_section = 'PubSub_Global'
|
|
||||||
config_channel = 'channel'
|
|
||||||
subscriber_name = 'attributes'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -1,18 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
config_section = 'PubSub_Categ'
|
|
||||||
config_channel = 'channel_0'
|
|
||||||
subscriber_name = 'cards'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -1,35 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
"""
|
|
||||||
The ZMQ_Sub_Curve_Q Module
|
|
||||||
============================
|
|
||||||
|
|
||||||
This module subscribe to a Publisher stream and put the received messages
|
|
||||||
into a Redis-list waiting to be popped later by others scripts.
|
|
||||||
|
|
||||||
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
|
||||||
the same Subscriber name in both of them.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
*Running Redis instances.
|
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Tokenize"
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
config_section = 'PubSub_Words'
|
|
||||||
config_channel = 'channel_0'
|
|
||||||
subscriber_name = 'curve'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -1,17 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = 'Queuing'
|
|
||||||
|
|
||||||
config_section = 'PubSub_Global'
|
|
||||||
config_channel = 'channel'
|
|
||||||
subscriber_name = 'duplicate'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -1,29 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
"""
|
|
||||||
The ZMQ_Sub_Indexer_Q Module
|
|
||||||
============================
|
|
||||||
|
|
||||||
The ZMQ_Sub_Indexer_Q module subscribes to PubSub_Global ZMQ channel
|
|
||||||
and bufferizes the data in a Redis FIFO.
|
|
||||||
|
|
||||||
The FIFO will be then processed by the Indexer scripts (ZMQ_Sub_Indexer)
|
|
||||||
handling the indexing process of the files seen.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
config_section = 'PubSub_Global'
|
|
||||||
config_channel = 'channel'
|
|
||||||
subscriber_name = 'indexer'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -1,17 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
config_section = 'PubSub_Categ'
|
|
||||||
config_channel = 'channel_1'
|
|
||||||
subscriber_name = 'emails'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -1,34 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
"""
|
|
||||||
The ZMQ_Sub_Onion_Q Module
|
|
||||||
============================
|
|
||||||
|
|
||||||
This module subscribe to a Publisher stream and put the received messages
|
|
||||||
into a Redis-list waiting to be popped later by ZMQ_Sub_Onion.
|
|
||||||
|
|
||||||
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
|
||||||
the same Subscriber name in both of them (here "tor")
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
*Running Redis instances.
|
|
||||||
*Should register to the Publisher "ZMQ_PubSub_Categ"
|
|
||||||
|
|
||||||
"""
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
config_section = 'PubSub_Categ'
|
|
||||||
config_channel = 'channel_2'
|
|
||||||
subscriber_name = 'tor'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -1,18 +0,0 @@
|
||||||
#!/usr/bin/env python2
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import Helper
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Queuing"
|
|
||||||
|
|
||||||
config_section = 'PubSub_Categ'
|
|
||||||
config_channel = 'channel_3'
|
|
||||||
subscriber_name = 'urls'
|
|
||||||
|
|
||||||
h = Helper.Redis_Queues(config_section, config_channel, subscriber_name)
|
|
||||||
h.redis_queue_subscribe(publisher)
|
|
|
@ -18,7 +18,8 @@ db = 0
|
||||||
[Redis_Queues]
|
[Redis_Queues]
|
||||||
host = localhost
|
host = localhost
|
||||||
port = 6381
|
port = 6381
|
||||||
db = 0
|
db_sub = 0
|
||||||
|
db_pub = 1
|
||||||
|
|
||||||
[Redis_Data_Merging]
|
[Redis_Data_Merging]
|
||||||
host = localhost
|
host = localhost
|
||||||
|
@ -71,3 +72,18 @@ cc_critical = DE
|
||||||
[Indexer]
|
[Indexer]
|
||||||
type = whoosh
|
type = whoosh
|
||||||
path = indexdir
|
path = indexdir
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
[ZMQ_Global]
|
||||||
|
address = tcp://crf.circl.lu:5556
|
||||||
|
channel = 102
|
||||||
|
|
||||||
|
[ZMQ_Url]
|
||||||
|
address = tcp://127.0.0.1:5004
|
||||||
|
channel = urls
|
||||||
|
|
||||||
|
[RedisPubSub]
|
||||||
|
host = localhost
|
||||||
|
port = 6381
|
||||||
|
db = 0
|
||||||
|
|
41
bin/packages/modules.cfg
Normal file
41
bin/packages/modules.cfg
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
[Global]
|
||||||
|
subscribe = ZMQ_Global
|
||||||
|
publish = Redis_Global
|
||||||
|
|
||||||
|
[Duplicates]
|
||||||
|
subscribe = Redis_Global
|
||||||
|
|
||||||
|
[Indexer]
|
||||||
|
subscribe = Redis_Global
|
||||||
|
|
||||||
|
[Attributes]
|
||||||
|
subscribe = Redis_Global
|
||||||
|
|
||||||
|
[Lines]
|
||||||
|
subscribe = Redis_Global
|
||||||
|
publish = Redis_LinesShort,Redis_LinesLong
|
||||||
|
|
||||||
|
[Tokenize]
|
||||||
|
subscribe = Redis_LinesShort
|
||||||
|
publish = Redis_Words
|
||||||
|
|
||||||
|
[Curve]
|
||||||
|
subscribe = Redis_Words
|
||||||
|
|
||||||
|
[Categ]
|
||||||
|
subscribe = Redis_Words
|
||||||
|
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web
|
||||||
|
|
||||||
|
[CreditCards]
|
||||||
|
subscribe = Redis_CreditCards
|
||||||
|
|
||||||
|
[Mail]
|
||||||
|
subscribe = Redis_Mail
|
||||||
|
|
||||||
|
[Onion]
|
||||||
|
subscribe = Redis_Onion
|
||||||
|
#publish = Redis_Global
|
||||||
|
|
||||||
|
[Web]
|
||||||
|
subscribe = Redis_Web
|
||||||
|
publish = Redis_Url,ZMQ_Url
|
22
bin/run_modules.py
Executable file
22
bin/run_modules.py
Executable file
|
@ -0,0 +1,22 @@
|
||||||
|
#!/usr/bin/env python2
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
|
||||||
|
import ConfigParser
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/modules.cfg')
|
||||||
|
if not os.path.exists(configfile):
|
||||||
|
raise Exception('Unable to find the configuration file. \
|
||||||
|
Did you set environment variables? \
|
||||||
|
Or activate the virtualenv.')
|
||||||
|
config = ConfigParser.ConfigParser()
|
||||||
|
config.read(configfile)
|
||||||
|
|
||||||
|
modules = config.sections()
|
||||||
|
for module in modules:
|
||||||
|
subprocess.Popen(["python", './QueueIn.py', '-c', module])
|
||||||
|
subprocess.Popen(["python", './QueueOut.py', '-c', module])
|
||||||
|
#subprocess.Popen(["python", './{}.py'.format(module)])
|
|
@ -1,6 +0,0 @@
|
||||||
isFinite
|
|
||||||
isNaN
|
|
||||||
eval
|
|
||||||
var
|
|
||||||
parseFloat
|
|
||||||
javascript
|
|
|
@ -1,6 +0,0 @@
|
||||||
../files/mails_categ
|
|
||||||
../files/creditcard_categ
|
|
||||||
../files/pass_categ
|
|
||||||
../files/web_categ
|
|
||||||
../files/javascript_categ
|
|
||||||
../files/onion_categ
|
|
|
@ -1,23 +0,0 @@
|
||||||
123456
|
|
||||||
password
|
|
||||||
12345678
|
|
||||||
qwerty
|
|
||||||
abc123
|
|
||||||
123456789
|
|
||||||
1234567
|
|
||||||
iloveyou
|
|
||||||
adobe123
|
|
||||||
123123
|
|
||||||
admin
|
|
||||||
1234567890
|
|
||||||
letmein
|
|
||||||
photoshop
|
|
||||||
1234
|
|
||||||
monkey
|
|
||||||
shadow
|
|
||||||
sunshine
|
|
||||||
12345
|
|
||||||
password1
|
|
||||||
princess
|
|
||||||
azerty
|
|
||||||
trustno1
|
|
|
@ -44,7 +44,8 @@ def event_stream():
|
||||||
|
|
||||||
def get_queues(r):
|
def get_queues(r):
|
||||||
# We may want to put the llen in a pipeline to do only one query.
|
# We may want to put the llen in a pipeline to do only one query.
|
||||||
return [(queue, r.llen(queue)) for queue in r.smembers("queues")]
|
return [(queue, int(card)) for queue, card in
|
||||||
|
r.hgetall("queues").iteritems()]
|
||||||
|
|
||||||
|
|
||||||
@app.route("/_logs")
|
@app.route("/_logs")
|
||||||
|
@ -54,6 +55,7 @@ def logs():
|
||||||
|
|
||||||
@app.route("/_stuff", methods=['GET'])
|
@app.route("/_stuff", methods=['GET'])
|
||||||
def stuff():
|
def stuff():
|
||||||
|
print get_queues(r_serv)
|
||||||
return jsonify(row1=get_queues(r_serv))
|
return jsonify(row1=get_queues(r_serv))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -158,14 +158,14 @@ $(document).ready(function () {
|
||||||
var x = new Date();
|
var x = new Date();
|
||||||
|
|
||||||
for (i = 0; i < glob_tabvar.row1.length; i++){
|
for (i = 0; i < glob_tabvar.row1.length; i++){
|
||||||
if (glob_tabvar.row1[i][0].substring(0,4) != "word"){
|
if (glob_tabvar.row1[i][0] == 'Categ' || glob_tabvar.row1[i][0] == 'Curve'){
|
||||||
tmp_tab.push(0);
|
|
||||||
curves_labels.push(glob_tabvar.row1[i][0]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
tmp_tab2.push(0);
|
tmp_tab2.push(0);
|
||||||
curves_labels2.push(glob_tabvar.row1[i][0]);
|
curves_labels2.push(glob_tabvar.row1[i][0]);
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
tmp_tab.push(0);
|
||||||
|
curves_labels.push(glob_tabvar.row1[i][0]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
tmp_tab.unshift(x);
|
tmp_tab.unshift(x);
|
||||||
tmp_tab2.unshift(x);
|
tmp_tab2.unshift(x);
|
||||||
|
@ -225,11 +225,11 @@ $(document).ready(function () {
|
||||||
|
|
||||||
|
|
||||||
for (i = 0; i < (glob_tabvar.row1).length; i++){
|
for (i = 0; i < (glob_tabvar.row1).length; i++){
|
||||||
if (glob_tabvar.row1[i][0].substring(0,4) != "word"){
|
if (glob_tabvar.row1[i][0] == 'Categ' || glob_tabvar.row1[i][0] == 'Curve'){
|
||||||
tmp_values.push(glob_tabvar.row1[i][1]);
|
tmp_values2.push(glob_tabvar.row1[i][1]);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
tmp_values2.push(glob_tabvar.row1[i][1]);
|
tmp_values.push(glob_tabvar.row1[i][1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tmp_values.unshift(x);
|
tmp_values.unshift(x);
|
||||||
|
|
Loading…
Reference in a new issue