2018-05-04 11:53:29 +00:00
|
|
|
#!/usr/bin/env python3
|
2014-08-06 09:43:40 +00:00
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
|
|
|
The ZMQ_PubSub_Categ Module
|
|
|
|
============================
|
|
|
|
|
2018-09-11 11:51:57 +00:00
|
|
|
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
|
|
|
|
Module.
|
|
|
|
|
2014-08-06 09:43:40 +00:00
|
|
|
Each words files created under /files/ are representing categories.
|
|
|
|
This modules take these files and compare them to
|
2018-09-11 11:51:57 +00:00
|
|
|
the stream of data given by the ZMQ_PubSub_Tokenize_Q Module.
|
2014-08-06 09:43:40 +00:00
|
|
|
|
|
|
|
When a word from a paste match one or more of these words file, the filename of
|
|
|
|
the paste is published/forwarded to the next modules.
|
|
|
|
|
|
|
|
Each category (each files) are representing a dynamic channel.
|
|
|
|
This mean that if you create 1000 files under /files/ you'll have 1000 channels
|
|
|
|
where every time there is a matching word to a category, the paste containing
|
|
|
|
this word will be pushed to this specific channel.
|
|
|
|
|
|
|
|
..note:: The channel will have the name of the file created.
|
|
|
|
|
|
|
|
Implementing modules can start here, create your own category file,
|
2014-08-14 12:11:07 +00:00
|
|
|
and then create your own module to treat the specific paste matching this
|
|
|
|
category.
|
2014-08-06 09:43:40 +00:00
|
|
|
|
2018-09-11 11:51:57 +00:00
|
|
|
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
|
|
|
the same Subscriber name in both of them.
|
|
|
|
|
2014-08-06 09:43:40 +00:00
|
|
|
Requirements
|
|
|
|
------------
|
|
|
|
|
2018-09-11 11:51:57 +00:00
|
|
|
*Need running Redis instances. (Redis)
|
2014-08-06 09:43:40 +00:00
|
|
|
*Categories files of words in /files/ need to be created
|
2018-09-11 11:51:57 +00:00
|
|
|
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
2014-08-06 09:43:40 +00:00
|
|
|
|
|
|
|
"""
|
2014-08-19 17:07:07 +00:00
|
|
|
import os
|
2014-08-14 12:11:07 +00:00
|
|
|
import argparse
|
|
|
|
import time
|
2014-09-05 15:05:45 +00:00
|
|
|
import re
|
2014-08-06 09:43:40 +00:00
|
|
|
from pubsublogger import publisher
|
2014-08-14 12:11:07 +00:00
|
|
|
from packages import Paste
|
2014-08-06 09:43:40 +00:00
|
|
|
|
2014-08-29 17:37:56 +00:00
|
|
|
from Helper import Process
|
2014-08-06 09:43:40 +00:00
|
|
|
|
2014-08-19 17:07:07 +00:00
|
|
|
if __name__ == "__main__":
|
2014-08-22 15:35:40 +00:00
|
|
|
publisher.port = 6380
|
2014-08-19 17:07:07 +00:00
|
|
|
publisher.channel = "Script"
|
|
|
|
|
2014-08-29 17:37:56 +00:00
|
|
|
config_section = 'Categ'
|
2014-08-06 09:43:40 +00:00
|
|
|
|
2014-08-29 17:37:56 +00:00
|
|
|
p = Process(config_section)
|
2017-12-11 16:28:34 +00:00
|
|
|
matchingThreshold = p.config.getint("Categ", "matchingThreshold")
|
2014-08-06 09:43:40 +00:00
|
|
|
|
|
|
|
# SCRIPT PARSER #
|
2016-02-10 15:39:56 +00:00
|
|
|
parser = argparse.ArgumentParser(description='Start Categ module on files.')
|
2014-08-06 09:43:40 +00:00
|
|
|
|
2014-08-14 12:11:07 +00:00
|
|
|
parser.add_argument(
|
2014-08-19 17:07:07 +00:00
|
|
|
'-d', type=str, default="../files/",
|
|
|
|
help='Path to the directory containing the category files.',
|
2014-08-14 12:11:07 +00:00
|
|
|
action='store')
|
2014-08-06 09:43:40 +00:00
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
# FUNCTIONS #
|
2014-08-29 17:37:56 +00:00
|
|
|
publisher.info("Script Categ started")
|
2014-08-06 09:43:40 +00:00
|
|
|
|
2018-04-26 12:42:39 +00:00
|
|
|
categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey']
|
2014-08-19 17:07:07 +00:00
|
|
|
tmp_dict = {}
|
2014-08-29 17:37:56 +00:00
|
|
|
for filename in categories:
|
2014-08-19 17:07:07 +00:00
|
|
|
bname = os.path.basename(filename)
|
|
|
|
tmp_dict[bname] = []
|
2014-08-22 15:35:40 +00:00
|
|
|
with open(os.path.join(args.d, filename), 'r') as f:
|
2018-04-16 12:50:04 +00:00
|
|
|
patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f]
|
2014-09-05 15:05:45 +00:00
|
|
|
tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
|
2014-08-06 09:43:40 +00:00
|
|
|
|
|
|
|
prec_filename = None
|
|
|
|
|
|
|
|
while True:
|
2014-09-05 15:05:45 +00:00
|
|
|
filename = p.get_from_set()
|
2016-02-10 15:39:56 +00:00
|
|
|
if filename is None:
|
2014-08-06 09:43:40 +00:00
|
|
|
publisher.debug("Script Categ is Idling 10s")
|
2018-04-16 12:50:04 +00:00
|
|
|
print('Sleeping')
|
2014-08-06 09:43:40 +00:00
|
|
|
time.sleep(10)
|
2016-02-10 15:39:56 +00:00
|
|
|
continue
|
|
|
|
|
|
|
|
paste = Paste.Paste(filename)
|
|
|
|
content = paste.get_p_content()
|
|
|
|
|
2018-04-20 08:42:19 +00:00
|
|
|
#print('-----------------------------------------------------')
|
|
|
|
#print(filename)
|
|
|
|
#print(content)
|
|
|
|
#print('-----------------------------------------------------')
|
2018-04-16 12:50:04 +00:00
|
|
|
|
2016-02-10 15:39:56 +00:00
|
|
|
for categ, pattern in tmp_dict.items():
|
|
|
|
found = set(re.findall(pattern, content))
|
2017-12-11 16:28:34 +00:00
|
|
|
if len(found) >= matchingThreshold:
|
2016-02-10 15:39:56 +00:00
|
|
|
msg = '{} {}'.format(paste.p_path, len(found))
|
2018-04-16 12:50:04 +00:00
|
|
|
#msg = " ".join( [paste.p_path, bytes(len(found))] )
|
|
|
|
|
|
|
|
print(msg, categ)
|
2016-02-10 15:39:56 +00:00
|
|
|
p.populate_set_out(msg, categ)
|
|
|
|
|
|
|
|
publisher.info(
|
2017-02-14 09:59:47 +00:00
|
|
|
'Categ;{};{};{};Detected {} as {};{}'.format(
|
2016-02-10 15:39:56 +00:00
|
|
|
paste.p_source, paste.p_date, paste.p_name,
|
2017-02-28 08:14:18 +00:00
|
|
|
len(found), categ, paste.p_path))
|