mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-27 00:07:16 +00:00
chg: [Domain] add domain object: tag + correlation (decoded items + tags + pgp + cryptocurrency)
This commit is contained in:
parent
e759b560db
commit
48abb89d28
10 changed files with 287 additions and 43 deletions
37
OVERVIEW.md
37
OVERVIEW.md
|
@ -261,6 +261,9 @@ Redis and ARDB overview
|
||||||
| set_pgpdump_name:*name* | *item_path* |
|
| set_pgpdump_name:*name* | *item_path* |
|
||||||
| | |
|
| | |
|
||||||
| set_pgpdump_mail:*mail* | *item_path* |
|
| set_pgpdump_mail:*mail* | *item_path* |
|
||||||
|
| | |
|
||||||
|
| | |
|
||||||
|
| set_domain_pgpdump_**pgp_type**:**key** | **domain** |
|
||||||
|
|
||||||
##### Hset date:
|
##### Hset date:
|
||||||
| Key | Field | Value |
|
| Key | Field | Value |
|
||||||
|
@ -288,11 +291,20 @@ Redis and ARDB overview
|
||||||
| item_pgpdump_name:*item_path* | *name* |
|
| item_pgpdump_name:*item_path* | *name* |
|
||||||
| | |
|
| | |
|
||||||
| item_pgpdump_mail:*item_path* | *mail* |
|
| item_pgpdump_mail:*item_path* | *mail* |
|
||||||
|
| | |
|
||||||
|
| | |
|
||||||
|
| domain_pgpdump_**pgp_type**:**domain** | **key** |
|
||||||
|
|
||||||
#### Cryptocurrency
|
#### Cryptocurrency
|
||||||
|
|
||||||
Supported cryptocurrency:
|
Supported cryptocurrency:
|
||||||
- bitcoin
|
- bitcoin
|
||||||
|
- bitcoin-cash
|
||||||
|
- dash
|
||||||
|
- etherum
|
||||||
|
- litecoin
|
||||||
|
- monero
|
||||||
|
- zcash
|
||||||
|
|
||||||
##### Hset:
|
##### Hset:
|
||||||
| Key | Field | Value |
|
| Key | Field | Value |
|
||||||
|
@ -303,7 +315,8 @@ Supported cryptocurrency:
|
||||||
##### set:
|
##### set:
|
||||||
| Key | Value |
|
| Key | Value |
|
||||||
| ------ | ------ |
|
| ------ | ------ |
|
||||||
| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** |
|
| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** | PASTE
|
||||||
|
| domain_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **domain** | DOMAIN
|
||||||
|
|
||||||
##### Hset date:
|
##### Hset date:
|
||||||
| Key | Field | Value |
|
| Key | Field | Value |
|
||||||
|
@ -318,8 +331,14 @@ Supported cryptocurrency:
|
||||||
##### set:
|
##### set:
|
||||||
| Key | Value |
|
| Key | Value |
|
||||||
| ------ | ------ |
|
| ------ | ------ |
|
||||||
| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** |
|
| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | PASTE
|
||||||
|
| domain_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | DOMAIN
|
||||||
|
|
||||||
|
#### HASH
|
||||||
|
| Key | Value |
|
||||||
|
| ------ | ------ |
|
||||||
|
| hash_domain:**domain** | **hash** |
|
||||||
|
| domain_hash:**hash** | **domain** |
|
||||||
|
|
||||||
## DB9 - Crawler:
|
## DB9 - Crawler:
|
||||||
|
|
||||||
|
@ -362,6 +381,20 @@ Supported cryptocurrency:
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
##### CRAWLER QUEUES:
|
||||||
|
| SET - Key | Value |
|
||||||
|
| ------ | ------ |
|
||||||
|
| onion_crawler_queue | **url**;**item_id** | RE-CRAWL
|
||||||
|
| regular_crawler_queue | - |
|
||||||
|
| | |
|
||||||
|
| onion_crawler_priority_queue | **url**;**item_id** | USER
|
||||||
|
| regular_crawler_priority_queue | - |
|
||||||
|
| | |
|
||||||
|
| onion_crawler_discovery_queue | **url**;**item_id** | DISCOVER
|
||||||
|
| regular_crawler_discovery_queue | - |
|
||||||
|
|
||||||
|
##### TO CHANGE:
|
||||||
|
|
||||||
ARDB overview
|
ARDB overview
|
||||||
|
|
||||||
----------------------------------------- SENTIMENT ------------------------------------
|
----------------------------------------- SENTIMENT ------------------------------------
|
||||||
|
|
|
@ -18,6 +18,7 @@ from pubsublogger import publisher
|
||||||
|
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
|
from packages import Item
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import signal
|
import signal
|
||||||
|
@ -120,6 +121,12 @@ def save_hash(decoder_name, message, date, decoded):
|
||||||
serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map
|
serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map
|
||||||
serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste
|
serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste
|
||||||
|
|
||||||
|
# Domain Object
|
||||||
|
if Item.is_crawled(message):
|
||||||
|
domain = Item.get_item_domain(message)
|
||||||
|
serv_metadata.sadd('hash_domain:{}'.format(domain), hash) # domain - hash map
|
||||||
|
serv_metadata.sadd('domain_hash:{}'.format(hash), domain) # hash - domain map
|
||||||
|
|
||||||
|
|
||||||
def save_hash_on_disk(decode, type, hash, json_data):
|
def save_hash_on_disk(decode, type, hash, json_data):
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,8 @@ from bs4 import BeautifulSoup
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
|
|
||||||
|
from packages import Pgp
|
||||||
|
|
||||||
class TimeoutException(Exception):
|
class TimeoutException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -117,31 +119,6 @@ def extract_id_from_output(pgp_dump_outpout):
|
||||||
key_id = key_id.replace(key_id_str, '', 1)
|
key_id = key_id.replace(key_id_str, '', 1)
|
||||||
set_key.add(key_id)
|
set_key.add(key_id)
|
||||||
|
|
||||||
def save_pgp_data(type_pgp, date, item_path, data):
|
|
||||||
# create basic medata
|
|
||||||
if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)):
|
|
||||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
|
|
||||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
|
||||||
else:
|
|
||||||
last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
|
|
||||||
if not last_seen:
|
|
||||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
|
||||||
else:
|
|
||||||
if int(last_seen) < int(date):
|
|
||||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
|
||||||
|
|
||||||
# global set
|
|
||||||
serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path)
|
|
||||||
|
|
||||||
# daily
|
|
||||||
serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1)
|
|
||||||
|
|
||||||
# all type
|
|
||||||
serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1)
|
|
||||||
|
|
||||||
# item_metadata
|
|
||||||
serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||||
|
@ -236,12 +213,12 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
for key_id in set_key:
|
for key_id in set_key:
|
||||||
print(key_id)
|
print(key_id)
|
||||||
save_pgp_data('key', date, message, key_id)
|
Pgp.save_pgp_data('key', date, message, key_id)
|
||||||
|
|
||||||
for name_id in set_name:
|
for name_id in set_name:
|
||||||
print(name_id)
|
print(name_id)
|
||||||
save_pgp_data('name', date, message, name_id)
|
Pgp.save_pgp_data('name', date, message, name_id)
|
||||||
|
|
||||||
for mail_id in set_mail:
|
for mail_id in set_mail:
|
||||||
print(mail_id)
|
print(mail_id)
|
||||||
save_pgp_data('mail', date, message, mail_id)
|
Pgp.save_pgp_data('mail', date, message, mail_id)
|
||||||
|
|
|
@ -16,6 +16,8 @@ import datetime
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
|
from packages import Item
|
||||||
|
|
||||||
|
|
||||||
def get_item_date(item_filename):
|
def get_item_date(item_filename):
|
||||||
l_directory = item_filename.split('/')
|
l_directory = item_filename.split('/')
|
||||||
|
@ -84,6 +86,12 @@ if __name__ == '__main__':
|
||||||
set_tag_metadata(tag, item_date)
|
set_tag_metadata(tag, item_date)
|
||||||
server_metadata.sadd('tag:{}'.format(path), tag)
|
server_metadata.sadd('tag:{}'.format(path), tag)
|
||||||
|
|
||||||
|
# Domain Object
|
||||||
|
if Item.is_crawled(path):
|
||||||
|
domain = Item.get_item_domain(path)
|
||||||
|
server_metadata.sadd('tag:{}'.format(domain), tag)
|
||||||
|
server.sadd('domain:{}:{}'.format(tag, item_date), domain)
|
||||||
|
|
||||||
curr_date = datetime.date.today().strftime("%Y%m%d")
|
curr_date = datetime.date.today().strftime("%Y%m%d")
|
||||||
server.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
server.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
||||||
p.populate_set_out(message, 'MISP_The_Hive_feeder')
|
p.populate_set_out(message, 'MISP_The_Hive_feeder')
|
||||||
|
|
|
@ -2,8 +2,10 @@
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import redis
|
import redis
|
||||||
|
|
||||||
|
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/'))
|
||||||
import Flask_config
|
import Flask_config
|
||||||
|
|
||||||
r_serv_metadata = Flask_config.r_serv_metadata
|
r_serv_metadata = Flask_config.r_serv_metadata
|
||||||
|
@ -14,9 +16,11 @@ class Correlation(object):
|
||||||
def __init__(self, correlation_name):
|
def __init__(self, correlation_name):
|
||||||
self.correlation_name = correlation_name
|
self.correlation_name = correlation_name
|
||||||
|
|
||||||
def _exist_corelation_field(self, correlation_type, field_name):
|
def _exist_corelation_field(self, correlation_type, field_name, item_type='paste'):
|
||||||
return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
|
if type=='paste':
|
||||||
|
return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
|
||||||
|
else:
|
||||||
|
return r_serv_metadata.exists('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
|
||||||
|
|
||||||
def _get_items(self, correlation_type, field_name):
|
def _get_items(self, correlation_type, field_name):
|
||||||
res = r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
|
res = r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
|
||||||
|
@ -25,6 +29,12 @@ class Correlation(object):
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def _get_domains(self, correlation_type, field_name):
|
||||||
|
res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
|
||||||
|
if res:
|
||||||
|
return list(res)
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
def _get_metadata(self, correlation_type, field_name):
|
def _get_metadata(self, correlation_type, field_name):
|
||||||
meta_dict = {}
|
meta_dict = {}
|
||||||
|
@ -35,14 +45,14 @@ class Correlation(object):
|
||||||
def _get_correlation_by_date(self, correlation_type, date):
|
def _get_correlation_by_date(self, correlation_type, date):
|
||||||
return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date))
|
return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date))
|
||||||
|
|
||||||
def verify_correlation_field_request(self, request_dict, correlation_type):
|
def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'):
|
||||||
if not request_dict:
|
if not request_dict:
|
||||||
return Response({'status': 'error', 'reason': 'Malformed JSON'}, 400)
|
return ({'status': 'error', 'reason': 'Malformed JSON'}, 400)
|
||||||
|
|
||||||
field_name = request_dict.get(correlation_type, None)
|
field_name = request_dict.get(correlation_type, None)
|
||||||
if not field_name:
|
if not field_name:
|
||||||
return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 )
|
return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 )
|
||||||
if not self._exist_corelation_field(correlation_type, field_name):
|
if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type):
|
||||||
return ( {'status': 'error', 'reason': 'Item not found'}, 404 )
|
return ( {'status': 'error', 'reason': 'Item not found'}, 404 )
|
||||||
|
|
||||||
def get_correlation(self, request_dict, correlation_type, field_name):
|
def get_correlation(self, request_dict, correlation_type, field_name):
|
||||||
|
@ -58,7 +68,37 @@ class Correlation(object):
|
||||||
|
|
||||||
return (dict_resp, 200)
|
return (dict_resp, 200)
|
||||||
|
|
||||||
|
def get_correlation_domain(self, request_dict, correlation_type, field_name):
|
||||||
|
dict_resp = {}
|
||||||
|
|
||||||
|
dict_resp['domain'] = self._get_domains(correlation_type, field_name)
|
||||||
|
|
||||||
|
#if request_dict.get('metadata'):
|
||||||
|
# dict_resp['metadata'] = self._get_metadata(correlation_type, field_name)
|
||||||
|
|
||||||
#cryptocurrency_all:cryptocurrency name cryptocurrency address nb seen
|
dict_resp[correlation_type] = field_name
|
||||||
|
|
||||||
|
return (dict_resp, 200)
|
||||||
|
|
||||||
|
######## INTERNAL ########
|
||||||
|
|
||||||
|
def _get_domain_correlation_obj(correlation_name, correlation_type, domain):
|
||||||
|
print('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain))
|
||||||
|
res = r_serv_metadata.smembers('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain))
|
||||||
|
if res:
|
||||||
|
return list(res)
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
######## ########
|
||||||
|
|
||||||
|
######## API EXPOSED ########
|
||||||
|
|
||||||
|
def get_domain_correlation_obj(request_dict, correlation_name, correlation_type, domain):
|
||||||
|
dict_resp = {}
|
||||||
|
dict_resp[correlation_type] = _get_domain_correlation_obj(correlation_name, correlation_type, domain)
|
||||||
|
dict_resp['domain'] = domain
|
||||||
|
|
||||||
|
return (dict_resp, 200)
|
||||||
|
|
||||||
|
######## ########
|
||||||
|
|
|
@ -10,11 +10,13 @@ from hashlib import sha256
|
||||||
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
|
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
|
||||||
import Flask_config
|
import Flask_config
|
||||||
from Correlation import Correlation
|
from Correlation import Correlation
|
||||||
|
import Item
|
||||||
|
|
||||||
r_serv_metadata = Flask_config.r_serv_metadata
|
r_serv_metadata = Flask_config.r_serv_metadata
|
||||||
|
|
||||||
|
all_cryptocurrency = ['bitcoin', 'etherum']
|
||||||
|
|
||||||
digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||||
#address_validation = {'bitcoin': 'base58', 'dash': 'base58'}
|
|
||||||
|
|
||||||
cryptocurrency = Correlation('cryptocurrency')
|
cryptocurrency = Correlation('cryptocurrency')
|
||||||
|
|
||||||
|
@ -52,6 +54,21 @@ def get_cryptocurrency(request_dict, cryptocurrency_type):
|
||||||
|
|
||||||
return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name)
|
return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name)
|
||||||
|
|
||||||
|
# # TODO: add get all cryptocurrency option
|
||||||
|
def get_cryptocurrency_domain(request_dict, cryptocurrency_type):
|
||||||
|
res = cryptocurrency.verify_correlation_field_request(request_dict, cryptocurrency_type, item_type='domain')
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
field_name = request_dict.get(cryptocurrency_type)
|
||||||
|
if not verify_cryptocurrency_address(cryptocurrency_type, field_name):
|
||||||
|
return ( {'status': 'error', 'reason': 'Invalid Cryptocurrency address'}, 400 )
|
||||||
|
|
||||||
|
return cryptocurrency.get_correlation_domain(request_dict, cryptocurrency_type, field_name)
|
||||||
|
|
||||||
|
def get_domain_cryptocurrency(request_dict, cryptocurrency_type):
|
||||||
|
return cryptocurrency.get_domain_correlation_obj(self, request_dict, cryptocurrency_type, domain)
|
||||||
|
|
||||||
|
|
||||||
def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address):
|
def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address):
|
||||||
# create basic medata
|
# create basic medata
|
||||||
if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)):
|
if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)):
|
||||||
|
@ -65,7 +82,8 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
|
||||||
if int(last_seen) < int(date):
|
if int(last_seen) < int(date):
|
||||||
r_serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date)
|
r_serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date)
|
||||||
|
|
||||||
# global set
|
## global set
|
||||||
|
# item
|
||||||
r_serv_metadata.sadd('set_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), item_path)
|
r_serv_metadata.sadd('set_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), item_path)
|
||||||
|
|
||||||
# daily
|
# daily
|
||||||
|
@ -74,5 +92,12 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
|
||||||
# all type
|
# all type
|
||||||
r_serv_metadata.zincrby('cryptocurrency_all:{}'.format(cryptocurrency_name), cryptocurrency_address, 1)
|
r_serv_metadata.zincrby('cryptocurrency_all:{}'.format(cryptocurrency_name), cryptocurrency_address, 1)
|
||||||
|
|
||||||
# item_metadata
|
## object_metadata
|
||||||
|
# item
|
||||||
r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address)
|
r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address)
|
||||||
|
|
||||||
|
# domain
|
||||||
|
if Item.is_crawled(item_path):
|
||||||
|
domain = Item.get_item_domain(item_path)
|
||||||
|
r_serv_metadata.sadd('domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, domain), cryptocurrency_address)
|
||||||
|
r_serv_metadata.sadd('set_domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), domain)
|
||||||
|
|
85
bin/packages/Domain.py
Executable file
85
bin/packages/Domain.py
Executable file
|
@ -0,0 +1,85 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
"""
|
||||||
|
The ``Domain``
|
||||||
|
===================
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import redis
|
||||||
|
|
||||||
|
import Item
|
||||||
|
|
||||||
|
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/'))
|
||||||
|
import Flask_config
|
||||||
|
|
||||||
|
r_serv_onion = Flask_config.r_serv_onion
|
||||||
|
|
||||||
|
def get_domain_type(domain):
|
||||||
|
if str(domain).endswith('.onion'):
|
||||||
|
return 'onion'
|
||||||
|
else:
|
||||||
|
return 'regular'
|
||||||
|
|
||||||
|
def get_all_domain_up_by_type(domain_type):
|
||||||
|
if domain_type in domains:
|
||||||
|
list_domain = list(r_serv_onion.smembers('full_{}_up'.format(domain_type)))
|
||||||
|
return ({'type': domain_type, 'domains': list_domain}, 200)
|
||||||
|
else:
|
||||||
|
return ({"status": "error", "reason": "Invalid domain type"}, 400)
|
||||||
|
|
||||||
|
def get_domain_items(domain, root_item_id):
|
||||||
|
dom_item = get_domain_item_children(domain, root_item_id)
|
||||||
|
dom_item.append(root_item_id)
|
||||||
|
return dom_item
|
||||||
|
|
||||||
|
def get_domain_item_children(domain, root_item_id):
|
||||||
|
all_items = []
|
||||||
|
for item_id in Item.get_item_children(root_item_id):
|
||||||
|
if Item.is_item_in_domain(domain, item_id):
|
||||||
|
all_items.append(item_id)
|
||||||
|
all_items.extend(get_domain_item_children(domain, item_id))
|
||||||
|
return all_items
|
||||||
|
|
||||||
|
def get_link_tree():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
###
|
||||||
|
### correlation
|
||||||
|
###
|
||||||
|
|
||||||
|
def _get_domain_correlation(domain, correlation_name=None, correlation_type=None):
|
||||||
|
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
|
||||||
|
if res:
|
||||||
|
return list(res)
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_item_bitcoin(item_id):
|
||||||
|
return _get_item_correlation('cryptocurrency', 'bitcoin', item_id)
|
||||||
|
|
||||||
|
def get_item_pgp_key(item_id):
|
||||||
|
return _get_item_correlation('pgpdump', 'key', item_id)
|
||||||
|
|
||||||
|
def get_item_pgp_name(item_id):
|
||||||
|
return _get_item_correlation('pgpdump', 'name', item_id)
|
||||||
|
|
||||||
|
def get_item_pgp_mail(item_id):
|
||||||
|
return _get_item_correlation('pgpdump', 'mail', item_id)
|
||||||
|
|
||||||
|
def get_item_pgp_correlation(item_id):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Domain(object):
|
||||||
|
"""docstring for Domain."""
|
||||||
|
|
||||||
|
def __init__(self, domain, port=80):
|
||||||
|
self.domain = str(domain)
|
||||||
|
## TODO: handle none port
|
||||||
|
self.type = get_domain_type(domain)
|
|
@ -125,7 +125,6 @@ def get_item(request_dict):
|
||||||
###
|
###
|
||||||
|
|
||||||
def _get_item_correlation(correlation_name, correlation_type, item_id):
|
def _get_item_correlation(correlation_name, correlation_type, item_id):
|
||||||
print('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
|
|
||||||
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
|
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
|
||||||
if res:
|
if res:
|
||||||
return list(res)
|
return list(res)
|
||||||
|
@ -144,6 +143,8 @@ def get_item_pgp_name(item_id):
|
||||||
def get_item_pgp_mail(item_id):
|
def get_item_pgp_mail(item_id):
|
||||||
return _get_item_correlation('pgpdump', 'mail', item_id)
|
return _get_item_correlation('pgpdump', 'mail', item_id)
|
||||||
|
|
||||||
|
def get_item_pgp_correlation(item_id):
|
||||||
|
pass
|
||||||
|
|
||||||
###
|
###
|
||||||
### GET Internal Module DESC
|
### GET Internal Module DESC
|
||||||
|
@ -153,3 +154,29 @@ def get_item_list_desc(list_item_id):
|
||||||
for item_id in list_item_id:
|
for item_id in list_item_id:
|
||||||
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} )
|
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} )
|
||||||
return desc_list
|
return desc_list
|
||||||
|
|
||||||
|
# # TODO: add an option to check the tag
|
||||||
|
def is_crawled(item_id):
|
||||||
|
return item_id.startswith('crawled')
|
||||||
|
|
||||||
|
def is_onion(item_id):
|
||||||
|
is_onion = False
|
||||||
|
if len(is_onion) > 62:
|
||||||
|
if is_crawled(item_id) and item_id[-42:-36] == '.onion':
|
||||||
|
is_onion = True
|
||||||
|
return is_onion
|
||||||
|
|
||||||
|
def is_item_in_domain(domain, item_id):
|
||||||
|
is_in_domain = False
|
||||||
|
domain_lenght = len(domain)
|
||||||
|
if len(item_id) > (domain_lenght+48):
|
||||||
|
if item_id[-36-domain_lenght:-36] == domain:
|
||||||
|
is_in_domain = True
|
||||||
|
return is_in_domain
|
||||||
|
|
||||||
|
def get_item_domain(item_id):
|
||||||
|
return item_id[19:-36]
|
||||||
|
|
||||||
|
|
||||||
|
def get_item_children(item_id):
|
||||||
|
return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id)))
|
||||||
|
|
|
@ -2,14 +2,18 @@
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import redis
|
import redis
|
||||||
|
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
|
|
||||||
|
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
|
||||||
import Flask_config
|
import Flask_config
|
||||||
from Correlation import Correlation
|
|
||||||
|
|
||||||
r_serv_metadata = Flask_config.r_serv_metadata
|
from Correlation import Correlation
|
||||||
|
import Item
|
||||||
|
|
||||||
|
serv_metadata = Flask_config.r_serv_metadata
|
||||||
|
|
||||||
pgpdump = Correlation('pgpdump')
|
pgpdump = Correlation('pgpdump')
|
||||||
|
|
||||||
|
@ -23,3 +27,36 @@ def get_pgp(request_dict, pgp_type):
|
||||||
field_name = request_dict.get(pgp_type)
|
field_name = request_dict.get(pgp_type)
|
||||||
|
|
||||||
return pgpdump.get_correlation(request_dict, pgp_type, field_name)
|
return pgpdump.get_correlation(request_dict, pgp_type, field_name)
|
||||||
|
|
||||||
|
def save_pgp_data(type_pgp, date, item_path, data):
|
||||||
|
# create basic medata
|
||||||
|
if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)):
|
||||||
|
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
|
||||||
|
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||||
|
else:
|
||||||
|
last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
|
||||||
|
if not last_seen:
|
||||||
|
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||||
|
else:
|
||||||
|
if int(last_seen) < int(date):
|
||||||
|
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||||
|
|
||||||
|
# global set
|
||||||
|
serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path)
|
||||||
|
|
||||||
|
# daily
|
||||||
|
serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1)
|
||||||
|
|
||||||
|
# all type
|
||||||
|
serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1)
|
||||||
|
|
||||||
|
## object_metadata
|
||||||
|
# paste
|
||||||
|
serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data)
|
||||||
|
|
||||||
|
|
||||||
|
# domain object
|
||||||
|
if Item.is_crawled(item_path):
|
||||||
|
domain = Item.get_item_domain(item_path)
|
||||||
|
serv_metadata.sadd('domain_pgpdump_{}:{}'.format(type_pgp, domain), data)
|
||||||
|
serv_metadata.sadd('set_domain_pgpdump_{}:{}'.format(type_pgp, data), domain)
|
||||||
|
|
|
@ -121,6 +121,11 @@ def add_item_tag(tag, item_path):
|
||||||
r_serv_metadata.sadd('tag:{}'.format(item_path), tag)
|
r_serv_metadata.sadd('tag:{}'.format(item_path), tag)
|
||||||
r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path)
|
r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path)
|
||||||
|
|
||||||
|
if Item.is_crawled(item_path):
|
||||||
|
domain = Item.get_item_domain(item_path)
|
||||||
|
r_serv_metadata.sadd('tag:{}'.format(domain), tag)
|
||||||
|
r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain)
|
||||||
|
|
||||||
r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
||||||
|
|
||||||
tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
||||||
|
|
Loading…
Reference in a new issue