diff --git a/OVERVIEW.md b/OVERVIEW.md index 5d6dbc99..2b357f9f 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -152,6 +152,56 @@ Redis and ARDB overview | binary_hash:**hash** | **item** | **nb_seen** | | hexadecimal_hash:**hash** | **item** | **nb_seen** | +#### PgpDump + +##### Hset: +| Key | Field | Value | +| ------ | ------ | ------ | +| pgp_metadata_key:*key id* | first_seen | **date** | +| | last_seen | **date** | +| | | +| pgp_metadata_name:*name* | first_seen | **date** | +| | last_seen | **date** | +| | | +| pgp_metadata_mail:*mail* | first_seen | **date** | +| | last_seen | **date** | + +##### set: +| Key | Value | +| ------ | ------ | +| pgp_key:*key id* | *item_path* | +| | | +| pgp_name:*name* | *item_path* | +| | | +| pgp_mail:*mail* | *item_path* | + +##### Hset date: +| Key | Field | Value | +| ------ | ------ | +| pgp:key:*date* | *key* | *nb seen* | +| | | +| pgp:name:*date* | *name* | *nb seen* | +| | | +| pgp:mail:*date* | *mail* | *nb seen* | + +##### zset: +| Key | Field | Value | +| ------ | ------ | ------ | +| pgp_all:key | *key* | *nb seen* | +| | | +| pgp_all:name | *name* | *nb seen* | +| | | +| pgp_all:mail | *mail* | *nb seen* | + +##### set: +| Key | Value | +| ------ | ------ | +| item_pgp_key:*item_path* | *key* | +| | | +| item_pgp_name:*item_path* | *name* | +| | | +| item_pgp_mail:*item_path* | *mail* | + ## DB9 - Crawler: ##### Hset: diff --git a/bin/Keys.py b/bin/Keys.py index 7fcc7a1e..6d6e6166 100755 --- a/bin/Keys.py +++ b/bin/Keys.py @@ -108,6 +108,15 @@ def search_key(paste): p.populate_set_out(msg, 'Tags') find = True + if '-----BEGIN PGP PUBLIC KEY BLOCK-----' in content: + p.populate_set_out(message, 'PgpDump') + + if '-----BEGIN PGP SIGNATURE-----' in content: + p.populate_set_out(message, 'PgpDump') + + if '-----BEGIN PGP MESSAGE-----' in content: + p.populate_set_out(message, 'PgpDump') + if find : #Send to duplicate diff --git a/bin/PgpDump.py b/bin/PgpDump.py new file mode 100755 index 00000000..6cf754d0 --- /dev/null +++ b/bin/PgpDump.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" + PgpDum module + + Extract ID from PGP Blocks +""" + +import os +import re +import redis +import signal +import datetime +import subprocess + +from pubsublogger import publisher +from bs4 import BeautifulSoup + +from Helper import Process +from packages import Paste + +class TimeoutException(Exception): + pass + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) + +def remove_html(item_content): + if bool(BeautifulSoup(item_content, "html.parser").find()): + soup = BeautifulSoup(item_content, 'html.parser') + # kill all script and style elements + for script in soup(["script", "style"]): + script.extract() # remove + + # get text + text = soup.get_text() + return text + else: + return item_content + +def extract_all_id(item_content, regex): + # max execution time on regex + signal.alarm(max_execution_time) + try: + pgp_extracted_block = re.findall(regex, item_content) + except TimeoutException: + pgp_extracted_block = [] + p.incr_module_timeout_statistic() # add encoder type + print ("{0} processing timeout".format(paste.p_rel_path)) + else: + signal.alarm(0) + + for pgp_to_dump in pgp_extracted_block: + pgp_packet = get_pgp_packet(pgp_to_dump) + extract_id_from_output(pgp_packet) + +def get_pgp_packet(save_path): + save_path = '{}'.format(save_path) + process1 = subprocess.Popen([ 'echo', '-e', save_path], stdout=subprocess.PIPE) + process2 = subprocess.Popen([ 'pgpdump'], stdin=process1.stdout, stdout=subprocess.PIPE) + process1.stdout.close() + output = process2.communicate()[0].decode() + return output + + +def extract_id_from_output(pgp_dump_outpout): + all_user_id = set(re.findall(regex_user_id, pgp_dump_outpout)) + for user_id in all_user_id: + user_id = user_id.replace(user_id_str, '', 1) + mail = None + if ' <' in user_id: + name, mail = user_id.rsplit(' <', 1) + mail = mail[:-1] + set_name.add(name) + set_mail.add(mail) + else: + name = user_id + set_name.add(name) + + all_key_id = set(re.findall(regex_key_id, pgp_dump_outpout)) + for key_id in all_key_id: + key_id = key_id.replace(key_id_str, '', 1) + set_key.add(key_id) + +def save_pgp_data(type_pgp, date, item_path, data): + # create basic medata + if not serv_metadata.exists('pgp_{}:{}'.format(type_pgp, data)): + serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date) + serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) + else: + last_seen = serv_metadata.hget('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen') + if not last_seen: + serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) + else: + if int(last_seen) < int(date): + serv_metadata.hset('pgp_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) + + # global set + serv_metadata.sadd('pgp_{}:{}'.format(type_pgp, data), item_path) + + # daily + serv_metadata.hincrby('pgp:{}:{}'.format(type_pgp, date), data, 1) + + # all type + serv_metadata.zincrby('pgp_all:{}'.format(type_pgp), data, 1) + + # item_metadata + serv_metadata.sadd('item_pgp_{}:{}'.format(type_pgp, item_path), data) + + +if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + #config_section = 'PgpDump' + config_section = 'PgpDump' + + # Setup the I/O queues + p = Process(config_section) + + serv_metadata = redis.StrictRedis( + host=p.config.get("ARDB_Metadata", "host"), + port=p.config.getint("ARDB_Metadata", "port"), + db=p.config.getint("ARDB_Metadata", "db"), + decode_responses=True) + + # Sent to the logging a description of the module + publisher.info("PgpDump started") + + user_id_str = 'User ID - ' + regex_user_id= '{}.+'.format(user_id_str) + + key_id_str = 'Key ID - ' + regex_key_id = '{}.+'.format(key_id_str) + regex_pgp_public_blocs = '-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----' + regex_pgp_signature = '-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----' + regex_pgp_message = '-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----' + + re.compile(regex_user_id) + re.compile(regex_key_id) + re.compile(regex_pgp_public_blocs) + re.compile(regex_pgp_signature) + re.compile(regex_pgp_message) + + max_execution_time = p.config.getint("PgpDump", "max_execution_time") + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + + set_key = set() + set_name = set() + set_mail = set() + paste = Paste.Paste(message) + + # Do something with the message from the queue + date = str(paste._get_p_date()) + content = paste.get_p_content() + content = remove_html(content) + + extract_all_id(content, regex_pgp_public_blocs) + extract_all_id(content, regex_pgp_signature) + extract_all_id(content, regex_pgp_message) + + for key_id in set_key: + print(key_id) + save_pgp_data('key', date, message, key_id) + + for name_id in set_name: + print(name_id) + save_pgp_data('name', date, message, name_id) + + for mail_id in set_mail: + print(mail_id) + save_pgp_data('mail', date, message, mail_id) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index f9483476..ea0ea55c 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -71,6 +71,9 @@ max_execution_time = 90 [Onion] max_execution_time = 180 +[PgpDump] +max_execution_time = 60 + [Base64] path = Base64/ max_execution_time = 60 diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index c3c09a4e..f1fe5e3d 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -118,6 +118,10 @@ publish = Redis_Duplicate,Redis_Tags [Keys] subscribe = Redis_Global +publish = Redis_Duplicate,Redis_PgpDump,Redis_Tags + +[PgpDump] +subscribe = Redis_PgpDump publish = Redis_Duplicate,Redis_Tags [ApiKey]