mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
chg: [Domain] add domain object: tag + correlation (decoded items + tags + pgp + cryptocurrency)
This commit is contained in:
parent
e759b560db
commit
48abb89d28
10 changed files with 287 additions and 43 deletions
37
OVERVIEW.md
37
OVERVIEW.md
|
@ -261,6 +261,9 @@ Redis and ARDB overview
|
|||
| set_pgpdump_name:*name* | *item_path* |
|
||||
| | |
|
||||
| set_pgpdump_mail:*mail* | *item_path* |
|
||||
| | |
|
||||
| | |
|
||||
| set_domain_pgpdump_**pgp_type**:**key** | **domain** |
|
||||
|
||||
##### Hset date:
|
||||
| Key | Field | Value |
|
||||
|
@ -288,11 +291,20 @@ Redis and ARDB overview
|
|||
| item_pgpdump_name:*item_path* | *name* |
|
||||
| | |
|
||||
| item_pgpdump_mail:*item_path* | *mail* |
|
||||
| | |
|
||||
| | |
|
||||
| domain_pgpdump_**pgp_type**:**domain** | **key** |
|
||||
|
||||
#### Cryptocurrency
|
||||
|
||||
Supported cryptocurrency:
|
||||
- bitcoin
|
||||
- bitcoin-cash
|
||||
- dash
|
||||
- etherum
|
||||
- litecoin
|
||||
- monero
|
||||
- zcash
|
||||
|
||||
##### Hset:
|
||||
| Key | Field | Value |
|
||||
|
@ -303,7 +315,8 @@ Supported cryptocurrency:
|
|||
##### set:
|
||||
| Key | Value |
|
||||
| ------ | ------ |
|
||||
| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** |
|
||||
| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** | PASTE
|
||||
| domain_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **domain** | DOMAIN
|
||||
|
||||
##### Hset date:
|
||||
| Key | Field | Value |
|
||||
|
@ -318,8 +331,14 @@ Supported cryptocurrency:
|
|||
##### set:
|
||||
| Key | Value |
|
||||
| ------ | ------ |
|
||||
| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** |
|
||||
| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | PASTE
|
||||
| domain_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | DOMAIN
|
||||
|
||||
#### HASH
|
||||
| Key | Value |
|
||||
| ------ | ------ |
|
||||
| hash_domain:**domain** | **hash** |
|
||||
| domain_hash:**hash** | **domain** |
|
||||
|
||||
## DB9 - Crawler:
|
||||
|
||||
|
@ -362,6 +381,20 @@ Supported cryptocurrency:
|
|||
}
|
||||
```
|
||||
|
||||
##### CRAWLER QUEUES:
|
||||
| SET - Key | Value |
|
||||
| ------ | ------ |
|
||||
| onion_crawler_queue | **url**;**item_id** | RE-CRAWL
|
||||
| regular_crawler_queue | - |
|
||||
| | |
|
||||
| onion_crawler_priority_queue | **url**;**item_id** | USER
|
||||
| regular_crawler_priority_queue | - |
|
||||
| | |
|
||||
| onion_crawler_discovery_queue | **url**;**item_id** | DISCOVER
|
||||
| regular_crawler_discovery_queue | - |
|
||||
|
||||
##### TO CHANGE:
|
||||
|
||||
ARDB overview
|
||||
|
||||
----------------------------------------- SENTIMENT ------------------------------------
|
||||
|
|
|
@ -18,6 +18,7 @@ from pubsublogger import publisher
|
|||
|
||||
from Helper import Process
|
||||
from packages import Paste
|
||||
from packages import Item
|
||||
|
||||
import re
|
||||
import signal
|
||||
|
@ -120,6 +121,12 @@ def save_hash(decoder_name, message, date, decoded):
|
|||
serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map
|
||||
serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste
|
||||
|
||||
# Domain Object
|
||||
if Item.is_crawled(message):
|
||||
domain = Item.get_item_domain(message)
|
||||
serv_metadata.sadd('hash_domain:{}'.format(domain), hash) # domain - hash map
|
||||
serv_metadata.sadd('domain_hash:{}'.format(hash), domain) # hash - domain map
|
||||
|
||||
|
||||
def save_hash_on_disk(decode, type, hash, json_data):
|
||||
|
||||
|
|
|
@ -21,6 +21,8 @@ from bs4 import BeautifulSoup
|
|||
from Helper import Process
|
||||
from packages import Paste
|
||||
|
||||
from packages import Pgp
|
||||
|
||||
class TimeoutException(Exception):
|
||||
pass
|
||||
|
||||
|
@ -117,31 +119,6 @@ def extract_id_from_output(pgp_dump_outpout):
|
|||
key_id = key_id.replace(key_id_str, '', 1)
|
||||
set_key.add(key_id)
|
||||
|
||||
def save_pgp_data(type_pgp, date, item_path, data):
|
||||
# create basic medata
|
||||
if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)):
|
||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
|
||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||
else:
|
||||
last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
|
||||
if not last_seen:
|
||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||
else:
|
||||
if int(last_seen) < int(date):
|
||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||
|
||||
# global set
|
||||
serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path)
|
||||
|
||||
# daily
|
||||
serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1)
|
||||
|
||||
# all type
|
||||
serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1)
|
||||
|
||||
# item_metadata
|
||||
serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||
|
@ -236,12 +213,12 @@ if __name__ == '__main__':
|
|||
|
||||
for key_id in set_key:
|
||||
print(key_id)
|
||||
save_pgp_data('key', date, message, key_id)
|
||||
Pgp.save_pgp_data('key', date, message, key_id)
|
||||
|
||||
for name_id in set_name:
|
||||
print(name_id)
|
||||
save_pgp_data('name', date, message, name_id)
|
||||
Pgp.save_pgp_data('name', date, message, name_id)
|
||||
|
||||
for mail_id in set_mail:
|
||||
print(mail_id)
|
||||
save_pgp_data('mail', date, message, mail_id)
|
||||
Pgp.save_pgp_data('mail', date, message, mail_id)
|
||||
|
|
|
@ -16,6 +16,8 @@ import datetime
|
|||
from pubsublogger import publisher
|
||||
from Helper import Process
|
||||
from packages import Paste
|
||||
from packages import Item
|
||||
|
||||
|
||||
def get_item_date(item_filename):
|
||||
l_directory = item_filename.split('/')
|
||||
|
@ -84,6 +86,12 @@ if __name__ == '__main__':
|
|||
set_tag_metadata(tag, item_date)
|
||||
server_metadata.sadd('tag:{}'.format(path), tag)
|
||||
|
||||
# Domain Object
|
||||
if Item.is_crawled(path):
|
||||
domain = Item.get_item_domain(path)
|
||||
server_metadata.sadd('tag:{}'.format(domain), tag)
|
||||
server.sadd('domain:{}:{}'.format(tag, item_date), domain)
|
||||
|
||||
curr_date = datetime.date.today().strftime("%Y%m%d")
|
||||
server.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
||||
p.populate_set_out(message, 'MISP_The_Hive_feeder')
|
||||
|
|
|
@ -2,8 +2,10 @@
|
|||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
import redis
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/'))
|
||||
import Flask_config
|
||||
|
||||
r_serv_metadata = Flask_config.r_serv_metadata
|
||||
|
@ -14,9 +16,11 @@ class Correlation(object):
|
|||
def __init__(self, correlation_name):
|
||||
self.correlation_name = correlation_name
|
||||
|
||||
def _exist_corelation_field(self, correlation_type, field_name):
|
||||
return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
|
||||
|
||||
def _exist_corelation_field(self, correlation_type, field_name, item_type='paste'):
|
||||
if type=='paste':
|
||||
return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
|
||||
else:
|
||||
return r_serv_metadata.exists('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
|
||||
|
||||
def _get_items(self, correlation_type, field_name):
|
||||
res = r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
|
||||
|
@ -25,6 +29,12 @@ class Correlation(object):
|
|||
else:
|
||||
return []
|
||||
|
||||
def _get_domains(self, correlation_type, field_name):
|
||||
res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
|
||||
if res:
|
||||
return list(res)
|
||||
else:
|
||||
return []
|
||||
|
||||
def _get_metadata(self, correlation_type, field_name):
|
||||
meta_dict = {}
|
||||
|
@ -35,14 +45,14 @@ class Correlation(object):
|
|||
def _get_correlation_by_date(self, correlation_type, date):
|
||||
return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date))
|
||||
|
||||
def verify_correlation_field_request(self, request_dict, correlation_type):
|
||||
def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'):
|
||||
if not request_dict:
|
||||
return Response({'status': 'error', 'reason': 'Malformed JSON'}, 400)
|
||||
return ({'status': 'error', 'reason': 'Malformed JSON'}, 400)
|
||||
|
||||
field_name = request_dict.get(correlation_type, None)
|
||||
if not field_name:
|
||||
return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 )
|
||||
if not self._exist_corelation_field(correlation_type, field_name):
|
||||
if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type):
|
||||
return ( {'status': 'error', 'reason': 'Item not found'}, 404 )
|
||||
|
||||
def get_correlation(self, request_dict, correlation_type, field_name):
|
||||
|
@ -58,7 +68,37 @@ class Correlation(object):
|
|||
|
||||
return (dict_resp, 200)
|
||||
|
||||
def get_correlation_domain(self, request_dict, correlation_type, field_name):
|
||||
dict_resp = {}
|
||||
|
||||
dict_resp['domain'] = self._get_domains(correlation_type, field_name)
|
||||
|
||||
#if request_dict.get('metadata'):
|
||||
# dict_resp['metadata'] = self._get_metadata(correlation_type, field_name)
|
||||
|
||||
#cryptocurrency_all:cryptocurrency name cryptocurrency address nb seen
|
||||
dict_resp[correlation_type] = field_name
|
||||
|
||||
return (dict_resp, 200)
|
||||
|
||||
######## INTERNAL ########
|
||||
|
||||
def _get_domain_correlation_obj(correlation_name, correlation_type, domain):
|
||||
print('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain))
|
||||
res = r_serv_metadata.smembers('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain))
|
||||
if res:
|
||||
return list(res)
|
||||
else:
|
||||
return []
|
||||
|
||||
######## ########
|
||||
|
||||
######## API EXPOSED ########
|
||||
|
||||
def get_domain_correlation_obj(request_dict, correlation_name, correlation_type, domain):
|
||||
dict_resp = {}
|
||||
dict_resp[correlation_type] = _get_domain_correlation_obj(correlation_name, correlation_type, domain)
|
||||
dict_resp['domain'] = domain
|
||||
|
||||
return (dict_resp, 200)
|
||||
|
||||
######## ########
|
||||
|
|
|
@ -10,11 +10,13 @@ from hashlib import sha256
|
|||
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
|
||||
import Flask_config
|
||||
from Correlation import Correlation
|
||||
import Item
|
||||
|
||||
r_serv_metadata = Flask_config.r_serv_metadata
|
||||
|
||||
all_cryptocurrency = ['bitcoin', 'etherum']
|
||||
|
||||
digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
#address_validation = {'bitcoin': 'base58', 'dash': 'base58'}
|
||||
|
||||
cryptocurrency = Correlation('cryptocurrency')
|
||||
|
||||
|
@ -52,6 +54,21 @@ def get_cryptocurrency(request_dict, cryptocurrency_type):
|
|||
|
||||
return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name)
|
||||
|
||||
# # TODO: add get all cryptocurrency option
|
||||
def get_cryptocurrency_domain(request_dict, cryptocurrency_type):
|
||||
res = cryptocurrency.verify_correlation_field_request(request_dict, cryptocurrency_type, item_type='domain')
|
||||
if res:
|
||||
return res
|
||||
field_name = request_dict.get(cryptocurrency_type)
|
||||
if not verify_cryptocurrency_address(cryptocurrency_type, field_name):
|
||||
return ( {'status': 'error', 'reason': 'Invalid Cryptocurrency address'}, 400 )
|
||||
|
||||
return cryptocurrency.get_correlation_domain(request_dict, cryptocurrency_type, field_name)
|
||||
|
||||
def get_domain_cryptocurrency(request_dict, cryptocurrency_type):
|
||||
return cryptocurrency.get_domain_correlation_obj(self, request_dict, cryptocurrency_type, domain)
|
||||
|
||||
|
||||
def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address):
|
||||
# create basic medata
|
||||
if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)):
|
||||
|
@ -65,7 +82,8 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
|
|||
if int(last_seen) < int(date):
|
||||
r_serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date)
|
||||
|
||||
# global set
|
||||
## global set
|
||||
# item
|
||||
r_serv_metadata.sadd('set_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), item_path)
|
||||
|
||||
# daily
|
||||
|
@ -74,5 +92,12 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
|
|||
# all type
|
||||
r_serv_metadata.zincrby('cryptocurrency_all:{}'.format(cryptocurrency_name), cryptocurrency_address, 1)
|
||||
|
||||
# item_metadata
|
||||
## object_metadata
|
||||
# item
|
||||
r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address)
|
||||
|
||||
# domain
|
||||
if Item.is_crawled(item_path):
|
||||
domain = Item.get_item_domain(item_path)
|
||||
r_serv_metadata.sadd('domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, domain), cryptocurrency_address)
|
||||
r_serv_metadata.sadd('set_domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), domain)
|
||||
|
|
85
bin/packages/Domain.py
Executable file
85
bin/packages/Domain.py
Executable file
|
@ -0,0 +1,85 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
"""
|
||||
The ``Domain``
|
||||
===================
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import redis
|
||||
|
||||
import Item
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/'))
|
||||
import Flask_config
|
||||
|
||||
r_serv_onion = Flask_config.r_serv_onion
|
||||
|
||||
def get_domain_type(domain):
|
||||
if str(domain).endswith('.onion'):
|
||||
return 'onion'
|
||||
else:
|
||||
return 'regular'
|
||||
|
||||
def get_all_domain_up_by_type(domain_type):
|
||||
if domain_type in domains:
|
||||
list_domain = list(r_serv_onion.smembers('full_{}_up'.format(domain_type)))
|
||||
return ({'type': domain_type, 'domains': list_domain}, 200)
|
||||
else:
|
||||
return ({"status": "error", "reason": "Invalid domain type"}, 400)
|
||||
|
||||
def get_domain_items(domain, root_item_id):
|
||||
dom_item = get_domain_item_children(domain, root_item_id)
|
||||
dom_item.append(root_item_id)
|
||||
return dom_item
|
||||
|
||||
def get_domain_item_children(domain, root_item_id):
|
||||
all_items = []
|
||||
for item_id in Item.get_item_children(root_item_id):
|
||||
if Item.is_item_in_domain(domain, item_id):
|
||||
all_items.append(item_id)
|
||||
all_items.extend(get_domain_item_children(domain, item_id))
|
||||
return all_items
|
||||
|
||||
def get_link_tree():
|
||||
pass
|
||||
|
||||
|
||||
###
|
||||
### correlation
|
||||
###
|
||||
|
||||
def _get_domain_correlation(domain, correlation_name=None, correlation_type=None):
|
||||
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
|
||||
if res:
|
||||
return list(res)
|
||||
else:
|
||||
return []
|
||||
|
||||
def get_item_bitcoin(item_id):
|
||||
return _get_item_correlation('cryptocurrency', 'bitcoin', item_id)
|
||||
|
||||
def get_item_pgp_key(item_id):
|
||||
return _get_item_correlation('pgpdump', 'key', item_id)
|
||||
|
||||
def get_item_pgp_name(item_id):
|
||||
return _get_item_correlation('pgpdump', 'name', item_id)
|
||||
|
||||
def get_item_pgp_mail(item_id):
|
||||
return _get_item_correlation('pgpdump', 'mail', item_id)
|
||||
|
||||
def get_item_pgp_correlation(item_id):
|
||||
pass
|
||||
|
||||
|
||||
class Domain(object):
|
||||
"""docstring for Domain."""
|
||||
|
||||
def __init__(self, domain, port=80):
|
||||
self.domain = str(domain)
|
||||
## TODO: handle none port
|
||||
self.type = get_domain_type(domain)
|
|
@ -125,7 +125,6 @@ def get_item(request_dict):
|
|||
###
|
||||
|
||||
def _get_item_correlation(correlation_name, correlation_type, item_id):
|
||||
print('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
|
||||
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
|
||||
if res:
|
||||
return list(res)
|
||||
|
@ -144,6 +143,8 @@ def get_item_pgp_name(item_id):
|
|||
def get_item_pgp_mail(item_id):
|
||||
return _get_item_correlation('pgpdump', 'mail', item_id)
|
||||
|
||||
def get_item_pgp_correlation(item_id):
|
||||
pass
|
||||
|
||||
###
|
||||
### GET Internal Module DESC
|
||||
|
@ -153,3 +154,29 @@ def get_item_list_desc(list_item_id):
|
|||
for item_id in list_item_id:
|
||||
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} )
|
||||
return desc_list
|
||||
|
||||
# # TODO: add an option to check the tag
|
||||
def is_crawled(item_id):
|
||||
return item_id.startswith('crawled')
|
||||
|
||||
def is_onion(item_id):
|
||||
is_onion = False
|
||||
if len(is_onion) > 62:
|
||||
if is_crawled(item_id) and item_id[-42:-36] == '.onion':
|
||||
is_onion = True
|
||||
return is_onion
|
||||
|
||||
def is_item_in_domain(domain, item_id):
|
||||
is_in_domain = False
|
||||
domain_lenght = len(domain)
|
||||
if len(item_id) > (domain_lenght+48):
|
||||
if item_id[-36-domain_lenght:-36] == domain:
|
||||
is_in_domain = True
|
||||
return is_in_domain
|
||||
|
||||
def get_item_domain(item_id):
|
||||
return item_id[19:-36]
|
||||
|
||||
|
||||
def get_item_children(item_id):
|
||||
return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id)))
|
||||
|
|
|
@ -2,14 +2,18 @@
|
|||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
import redis
|
||||
|
||||
from hashlib import sha256
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
|
||||
import Flask_config
|
||||
from Correlation import Correlation
|
||||
|
||||
r_serv_metadata = Flask_config.r_serv_metadata
|
||||
from Correlation import Correlation
|
||||
import Item
|
||||
|
||||
serv_metadata = Flask_config.r_serv_metadata
|
||||
|
||||
pgpdump = Correlation('pgpdump')
|
||||
|
||||
|
@ -23,3 +27,36 @@ def get_pgp(request_dict, pgp_type):
|
|||
field_name = request_dict.get(pgp_type)
|
||||
|
||||
return pgpdump.get_correlation(request_dict, pgp_type, field_name)
|
||||
|
||||
def save_pgp_data(type_pgp, date, item_path, data):
|
||||
# create basic medata
|
||||
if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)):
|
||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date)
|
||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||
else:
|
||||
last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen')
|
||||
if not last_seen:
|
||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||
else:
|
||||
if int(last_seen) < int(date):
|
||||
serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date)
|
||||
|
||||
# global set
|
||||
serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path)
|
||||
|
||||
# daily
|
||||
serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1)
|
||||
|
||||
# all type
|
||||
serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1)
|
||||
|
||||
## object_metadata
|
||||
# paste
|
||||
serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data)
|
||||
|
||||
|
||||
# domain object
|
||||
if Item.is_crawled(item_path):
|
||||
domain = Item.get_item_domain(item_path)
|
||||
serv_metadata.sadd('domain_pgpdump_{}:{}'.format(type_pgp, domain), data)
|
||||
serv_metadata.sadd('set_domain_pgpdump_{}:{}'.format(type_pgp, data), domain)
|
||||
|
|
|
@ -121,6 +121,11 @@ def add_item_tag(tag, item_path):
|
|||
r_serv_metadata.sadd('tag:{}'.format(item_path), tag)
|
||||
r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path)
|
||||
|
||||
if Item.is_crawled(item_path):
|
||||
domain = Item.get_item_domain(item_path)
|
||||
r_serv_metadata.sadd('tag:{}'.format(domain), tag)
|
||||
r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain)
|
||||
|
||||
r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1)
|
||||
|
||||
tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')
|
||||
|
|
Loading…
Reference in a new issue