chg: [Domain] add domain object: tag + correlation (decoded items + tags + pgp + cryptocurrency)

This commit is contained in:
Terrtia 2019-10-17 16:39:43 +02:00
parent e759b560db
commit 48abb89d28
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
10 changed files with 287 additions and 43 deletions

View file

@ -261,6 +261,9 @@ Redis and ARDB overview
| set_pgpdump_name:*name* | *item_path* | | set_pgpdump_name:*name* | *item_path* |
| | | | | |
| set_pgpdump_mail:*mail* | *item_path* | | set_pgpdump_mail:*mail* | *item_path* |
| | |
| | |
| set_domain_pgpdump_**pgp_type**:**key** | **domain** |
##### Hset date: ##### Hset date:
| Key | Field | Value | | Key | Field | Value |
@ -288,11 +291,20 @@ Redis and ARDB overview
| item_pgpdump_name:*item_path* | *name* | | item_pgpdump_name:*item_path* | *name* |
| | | | | |
| item_pgpdump_mail:*item_path* | *mail* | | item_pgpdump_mail:*item_path* | *mail* |
| | |
| | |
| domain_pgpdump_**pgp_type**:**domain** | **key** |
#### Cryptocurrency #### Cryptocurrency
Supported cryptocurrency: Supported cryptocurrency:
- bitcoin - bitcoin
- bitcoin-cash
- dash
- etherum
- litecoin
- monero
- zcash
##### Hset: ##### Hset:
| Key | Field | Value | | Key | Field | Value |
@ -303,7 +315,8 @@ Supported cryptocurrency:
##### set: ##### set:
| Key | Value | | Key | Value |
| ------ | ------ | | ------ | ------ |
| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** | | set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** | PASTE
| domain_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **domain** | DOMAIN
##### Hset date: ##### Hset date:
| Key | Field | Value | | Key | Field | Value |
@ -318,8 +331,14 @@ Supported cryptocurrency:
##### set: ##### set:
| Key | Value | | Key | Value |
| ------ | ------ | | ------ | ------ |
| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | | item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | PASTE
| domain_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | DOMAIN
#### HASH
| Key | Value |
| ------ | ------ |
| hash_domain:**domain** | **hash** |
| domain_hash:**hash** | **domain** |
## DB9 - Crawler: ## DB9 - Crawler:
@ -362,6 +381,20 @@ Supported cryptocurrency:
} }
``` ```
##### CRAWLER QUEUES:
| SET - Key | Value |
| ------ | ------ |
| onion_crawler_queue | **url**;**item_id** | RE-CRAWL
| regular_crawler_queue | - |
| | |
| onion_crawler_priority_queue | **url**;**item_id** | USER
| regular_crawler_priority_queue | - |
| | |
| onion_crawler_discovery_queue | **url**;**item_id** | DISCOVER
| regular_crawler_discovery_queue | - |
##### TO CHANGE:
ARDB overview ARDB overview
----------------------------------------- SENTIMENT ------------------------------------ ----------------------------------------- SENTIMENT ------------------------------------

View file

@ -18,6 +18,7 @@ from pubsublogger import publisher
from Helper import Process from Helper import Process
from packages import Paste from packages import Paste
from packages import Item
import re import re
import signal import signal
@ -120,6 +121,12 @@ def save_hash(decoder_name, message, date, decoded):
serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map
serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste
# Domain Object
if Item.is_crawled(message):
domain = Item.get_item_domain(message)
serv_metadata.sadd('hash_domain:{}'.format(domain), hash) # domain - hash map
serv_metadata.sadd('domain_hash:{}'.format(hash), domain) # hash - domain map
def save_hash_on_disk(decode, type, hash, json_data): def save_hash_on_disk(decode, type, hash, json_data):

View file

@ -21,6 +21,8 @@ from bs4 import BeautifulSoup
from Helper import Process from Helper import Process
from packages import Paste from packages import Paste
from packages import Pgp
class TimeoutException(Exception): class TimeoutException(Exception):
pass pass
@ -117,31 +119,6 @@ def extract_id_from_output(pgp_dump_outpout):
key_id = key_id.replace(key_id_str, '', 1) key_id = key_id.replace(key_id_str, '', 1)
set_key.add(key_id) set_key.add(key_id)
def save_pgp_data(type_pgp, date, item_path, data):
# create basic medata
if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)):
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
else:
last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
if not last_seen:
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
else:
if int(last_seen) < int(date):
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
# global set
serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path)
# daily
serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1)
# all type
serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1)
# item_metadata
serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data)
if __name__ == '__main__': if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
@ -236,12 +213,12 @@ if __name__ == '__main__':
for key_id in set_key: for key_id in set_key:
print(key_id) print(key_id)
save_pgp_data('key', date, message, key_id) Pgp.save_pgp_data('key', date, message, key_id)
for name_id in set_name: for name_id in set_name:
print(name_id) print(name_id)
save_pgp_data('name', date, message, name_id) Pgp.save_pgp_data('name', date, message, name_id)
for mail_id in set_mail: for mail_id in set_mail:
print(mail_id) print(mail_id)
save_pgp_data('mail', date, message, mail_id) Pgp.save_pgp_data('mail', date, message, mail_id)

View file

@ -16,6 +16,8 @@ import datetime
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
from packages import Paste from packages import Paste
from packages import Item
def get_item_date(item_filename): def get_item_date(item_filename):
l_directory = item_filename.split('/') l_directory = item_filename.split('/')
@ -84,6 +86,12 @@ if __name__ == '__main__':
set_tag_metadata(tag, item_date) set_tag_metadata(tag, item_date)
server_metadata.sadd('tag:{}'.format(path), tag) server_metadata.sadd('tag:{}'.format(path), tag)
# Domain Object
if Item.is_crawled(path):
domain = Item.get_item_domain(path)
server_metadata.sadd('tag:{}'.format(domain), tag)
server.sadd('domain:{}:{}'.format(tag, item_date), domain)
curr_date = datetime.date.today().strftime("%Y%m%d") curr_date = datetime.date.today().strftime("%Y%m%d")
server.hincrby('daily_tags:{}'.format(item_date), tag, 1) server.hincrby('daily_tags:{}'.format(item_date), tag, 1)
p.populate_set_out(message, 'MISP_The_Hive_feeder') p.populate_set_out(message, 'MISP_The_Hive_feeder')

View file

@ -2,8 +2,10 @@
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import os import os
import sys
import redis import redis
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/'))
import Flask_config import Flask_config
r_serv_metadata = Flask_config.r_serv_metadata r_serv_metadata = Flask_config.r_serv_metadata
@ -14,9 +16,11 @@ class Correlation(object):
def __init__(self, correlation_name): def __init__(self, correlation_name):
self.correlation_name = correlation_name self.correlation_name = correlation_name
def _exist_corelation_field(self, correlation_type, field_name): def _exist_corelation_field(self, correlation_type, field_name, item_type='paste'):
return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) if type=='paste':
return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
else:
return r_serv_metadata.exists('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
def _get_items(self, correlation_type, field_name): def _get_items(self, correlation_type, field_name):
res = r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) res = r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
@ -25,6 +29,12 @@ class Correlation(object):
else: else:
return [] return []
def _get_domains(self, correlation_type, field_name):
res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
if res:
return list(res)
else:
return []
def _get_metadata(self, correlation_type, field_name): def _get_metadata(self, correlation_type, field_name):
meta_dict = {} meta_dict = {}
@ -35,14 +45,14 @@ class Correlation(object):
def _get_correlation_by_date(self, correlation_type, date): def _get_correlation_by_date(self, correlation_type, date):
return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date)) return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date))
def verify_correlation_field_request(self, request_dict, correlation_type): def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'):
if not request_dict: if not request_dict:
return Response({'status': 'error', 'reason': 'Malformed JSON'}, 400) return ({'status': 'error', 'reason': 'Malformed JSON'}, 400)
field_name = request_dict.get(correlation_type, None) field_name = request_dict.get(correlation_type, None)
if not field_name: if not field_name:
return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 ) return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 )
if not self._exist_corelation_field(correlation_type, field_name): if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type):
return ( {'status': 'error', 'reason': 'Item not found'}, 404 ) return ( {'status': 'error', 'reason': 'Item not found'}, 404 )
def get_correlation(self, request_dict, correlation_type, field_name): def get_correlation(self, request_dict, correlation_type, field_name):
@ -58,7 +68,37 @@ class Correlation(object):
return (dict_resp, 200) return (dict_resp, 200)
def get_correlation_domain(self, request_dict, correlation_type, field_name):
dict_resp = {}
dict_resp['domain'] = self._get_domains(correlation_type, field_name)
#if request_dict.get('metadata'):
# dict_resp['metadata'] = self._get_metadata(correlation_type, field_name)
#cryptocurrency_all:cryptocurrency name cryptocurrency address nb seen dict_resp[correlation_type] = field_name
return (dict_resp, 200)
######## INTERNAL ########
def _get_domain_correlation_obj(correlation_name, correlation_type, domain):
print('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain))
res = r_serv_metadata.smembers('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain))
if res:
return list(res)
else:
return []
######## ########
######## API EXPOSED ########
def get_domain_correlation_obj(request_dict, correlation_name, correlation_type, domain):
dict_resp = {}
dict_resp[correlation_type] = _get_domain_correlation_obj(correlation_name, correlation_type, domain)
dict_resp['domain'] = domain
return (dict_resp, 200)
######## ########

View file

@ -10,11 +10,13 @@ from hashlib import sha256
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
import Flask_config import Flask_config
from Correlation import Correlation from Correlation import Correlation
import Item
r_serv_metadata = Flask_config.r_serv_metadata r_serv_metadata = Flask_config.r_serv_metadata
all_cryptocurrency = ['bitcoin', 'etherum']
digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
#address_validation = {'bitcoin': 'base58', 'dash': 'base58'}
cryptocurrency = Correlation('cryptocurrency') cryptocurrency = Correlation('cryptocurrency')
@ -52,6 +54,21 @@ def get_cryptocurrency(request_dict, cryptocurrency_type):
return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name) return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name)
# # TODO: add get all cryptocurrency option
def get_cryptocurrency_domain(request_dict, cryptocurrency_type):
res = cryptocurrency.verify_correlation_field_request(request_dict, cryptocurrency_type, item_type='domain')
if res:
return res
field_name = request_dict.get(cryptocurrency_type)
if not verify_cryptocurrency_address(cryptocurrency_type, field_name):
return ( {'status': 'error', 'reason': 'Invalid Cryptocurrency address'}, 400 )
return cryptocurrency.get_correlation_domain(request_dict, cryptocurrency_type, field_name)
def get_domain_cryptocurrency(request_dict, cryptocurrency_type):
return cryptocurrency.get_domain_correlation_obj(self, request_dict, cryptocurrency_type, domain)
def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address): def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address):
# create basic medata # create basic medata
if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)): if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)):
@ -65,7 +82,8 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
if int(last_seen) < int(date): if int(last_seen) < int(date):
r_serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date) r_serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date)
# global set ## global set
# item
r_serv_metadata.sadd('set_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), item_path) r_serv_metadata.sadd('set_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), item_path)
# daily # daily
@ -74,5 +92,12 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
# all type # all type
r_serv_metadata.zincrby('cryptocurrency_all:{}'.format(cryptocurrency_name), cryptocurrency_address, 1) r_serv_metadata.zincrby('cryptocurrency_all:{}'.format(cryptocurrency_name), cryptocurrency_address, 1)
# item_metadata ## object_metadata
# item
r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address) r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address)
# domain
if Item.is_crawled(item_path):
domain = Item.get_item_domain(item_path)
r_serv_metadata.sadd('domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, domain), cryptocurrency_address)
r_serv_metadata.sadd('set_domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), domain)

85
bin/packages/Domain.py Executable file
View file

@ -0,0 +1,85 @@
#!/usr/bin/python3
"""
The ``Domain``
===================
"""
import os
import sys
import time
import redis
import Item
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/'))
import Flask_config
r_serv_onion = Flask_config.r_serv_onion
def get_domain_type(domain):
if str(domain).endswith('.onion'):
return 'onion'
else:
return 'regular'
def get_all_domain_up_by_type(domain_type):
if domain_type in domains:
list_domain = list(r_serv_onion.smembers('full_{}_up'.format(domain_type)))
return ({'type': domain_type, 'domains': list_domain}, 200)
else:
return ({"status": "error", "reason": "Invalid domain type"}, 400)
def get_domain_items(domain, root_item_id):
dom_item = get_domain_item_children(domain, root_item_id)
dom_item.append(root_item_id)
return dom_item
def get_domain_item_children(domain, root_item_id):
all_items = []
for item_id in Item.get_item_children(root_item_id):
if Item.is_item_in_domain(domain, item_id):
all_items.append(item_id)
all_items.extend(get_domain_item_children(domain, item_id))
return all_items
def get_link_tree():
pass
###
### correlation
###
def _get_domain_correlation(domain, correlation_name=None, correlation_type=None):
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
if res:
return list(res)
else:
return []
def get_item_bitcoin(item_id):
return _get_item_correlation('cryptocurrency', 'bitcoin', item_id)
def get_item_pgp_key(item_id):
return _get_item_correlation('pgpdump', 'key', item_id)
def get_item_pgp_name(item_id):
return _get_item_correlation('pgpdump', 'name', item_id)
def get_item_pgp_mail(item_id):
return _get_item_correlation('pgpdump', 'mail', item_id)
def get_item_pgp_correlation(item_id):
pass
class Domain(object):
"""docstring for Domain."""
def __init__(self, domain, port=80):
self.domain = str(domain)
## TODO: handle none port
self.type = get_domain_type(domain)

View file

@ -125,7 +125,6 @@ def get_item(request_dict):
### ###
def _get_item_correlation(correlation_name, correlation_type, item_id): def _get_item_correlation(correlation_name, correlation_type, item_id):
print('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id)) res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
if res: if res:
return list(res) return list(res)
@ -144,6 +143,8 @@ def get_item_pgp_name(item_id):
def get_item_pgp_mail(item_id): def get_item_pgp_mail(item_id):
return _get_item_correlation('pgpdump', 'mail', item_id) return _get_item_correlation('pgpdump', 'mail', item_id)
def get_item_pgp_correlation(item_id):
pass
### ###
### GET Internal Module DESC ### GET Internal Module DESC
@ -153,3 +154,29 @@ def get_item_list_desc(list_item_id):
for item_id in list_item_id: for item_id in list_item_id:
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} ) desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} )
return desc_list return desc_list
# # TODO: add an option to check the tag
def is_crawled(item_id):
return item_id.startswith('crawled')
def is_onion(item_id):
is_onion = False
if len(is_onion) > 62:
if is_crawled(item_id) and item_id[-42:-36] == '.onion':
is_onion = True
return is_onion
def is_item_in_domain(domain, item_id):
is_in_domain = False
domain_lenght = len(domain)
if len(item_id) > (domain_lenght+48):
if item_id[-36-domain_lenght:-36] == domain:
is_in_domain = True
return is_in_domain
def get_item_domain(item_id):
return item_id[19:-36]
def get_item_children(item_id):
return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id)))

View file

@ -2,14 +2,18 @@
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import os import os
import sys
import redis import redis
from hashlib import sha256 from hashlib import sha256
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
import Flask_config import Flask_config
from Correlation import Correlation
r_serv_metadata = Flask_config.r_serv_metadata from Correlation import Correlation
import Item
serv_metadata = Flask_config.r_serv_metadata
pgpdump = Correlation('pgpdump') pgpdump = Correlation('pgpdump')
@ -23,3 +27,36 @@ def get_pgp(request_dict, pgp_type):
field_name = request_dict.get(pgp_type) field_name = request_dict.get(pgp_type)
return pgpdump.get_correlation(request_dict, pgp_type, field_name) return pgpdump.get_correlation(request_dict, pgp_type, field_name)
def save_pgp_data(type_pgp, date, item_path, data):
# create basic medata
if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)):
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
else:
last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
if not last_seen:
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
else:
if int(last_seen) < int(date):
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
# global set
serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path)
# daily
serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1)
# all type
serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1)
## object_metadata
# paste
serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data)
# domain object
if Item.is_crawled(item_path):
domain = Item.get_item_domain(item_path)
serv_metadata.sadd('domain_pgpdump_{}:{}'.format(type_pgp, domain), data)
serv_metadata.sadd('set_domain_pgpdump_{}:{}'.format(type_pgp, data), domain)

View file

@ -121,6 +121,11 @@ def add_item_tag(tag, item_path):
r_serv_metadata.sadd('tag:{}'.format(item_path), tag) r_serv_metadata.sadd('tag:{}'.format(item_path), tag)
r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path) r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path)
if Item.is_crawled(item_path):
domain = Item.get_item_domain(item_path)
r_serv_metadata.sadd('tag:{}'.format(domain), tag)
r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain)
r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1) r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1)
tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')