mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-27 00:07:16 +00:00
chg [telegram + correlation] new module: telegram (username + login code + join_chat) + add simple_correlation backend
This commit is contained in:
parent
0a2d431c74
commit
ef3d145c87
3 changed files with 307 additions and 0 deletions
21
OVERVIEW.md
21
OVERVIEW.md
|
@ -313,6 +313,27 @@ Redis and ARDB overview
|
|||
| | |
|
||||
| domain_pgpdump_**pgp_type**:**domain** | **key** |
|
||||
|
||||
#### SimpleCorrelation:
|
||||
##### zset:
|
||||
| Key | Field | Value |
|
||||
| ------ | ------ | ------ |
|
||||
| s_correl:*correlation name*:all | *object_id* | *nb_seen* |
|
||||
| s_correl:date:*correlation name*:*date_day* | *object_id* | *nb_seen |
|
||||
|
||||
##### set:
|
||||
| Key | Value |
|
||||
| ------ | ------ |
|
||||
| s_correl:set_*object type*_*correlation name*:*object_id* | *item_id* |
|
||||
| *object type*:s_correl:*correlation name*:*object_id* | *correlation_id* |
|
||||
|
||||
object type: item + domain
|
||||
|
||||
##### hset:
|
||||
| Key | Field | Value |
|
||||
| ------ | ------ | ------ |
|
||||
| 's_correl:*correlation name*:metadata:*obj_id* | first_seen | *first_seen* |
|
||||
| 's_correl:*correlation name*:metadata:*obj_id* | last_seen | *last_seen* |
|
||||
|
||||
#### Cryptocurrency
|
||||
|
||||
Supported cryptocurrency:
|
||||
|
|
282
bin/lib/simple_correlation.py
Executable file
282
bin/lib/simple_correlation.py
Executable file
|
@ -0,0 +1,282 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
import redis
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
|
||||
import Date
|
||||
import Item
|
||||
#import Tag
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
config_loader = None
|
||||
|
||||
def get_all_correlation_objects():
|
||||
'''
|
||||
Return a list of all correllated objects
|
||||
'''
|
||||
return ['domain', 'paste']
|
||||
|
||||
class SimpleCorrelation(object): #social_name
|
||||
|
||||
def __init__(self, correlation_name):
|
||||
self.correlation_name = correlation_name
|
||||
|
||||
def exist_correlation(self, obj_id):
|
||||
res = r_serv_metadata.zscore('s_correl:{}:all'.format(self.correlation_name), obj_id)
|
||||
if res is not None:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def _get_items(self, obj_id):
|
||||
res = r_serv_metadata.smembers('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id))
|
||||
if res:
|
||||
return list(res)
|
||||
else:
|
||||
return []
|
||||
|
||||
def get_correlation_first_seen(self, obj_id, r_int=False):
|
||||
res = r_serv_metadata.hget('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'first_seen')
|
||||
if r_int:
|
||||
if res:
|
||||
return int(res)
|
||||
else:
|
||||
return 99999999
|
||||
else:
|
||||
return res
|
||||
|
||||
def get_correlation_last_seen(self, obj_id, r_int=False):
|
||||
res = r_serv_metadata.hget('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'last_seen')
|
||||
if r_int:
|
||||
if res:
|
||||
return int(res)
|
||||
else:
|
||||
return 0
|
||||
else:
|
||||
return res
|
||||
|
||||
def _get_metadata(self, obj_id):
|
||||
meta_dict = {}
|
||||
meta_dict['first_seen'] = self.get_correlation_first_seen(obj_id)
|
||||
meta_dict['last_seen'] = self.get_correlation_last_seen(obj_id)
|
||||
meta_dict['nb_seen'] = r_serv_metadata.scard('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id))
|
||||
return meta_dict
|
||||
|
||||
def get_metadata(self, correlation_type, field_name, date_format='str_date'):
|
||||
meta_dict = self._get_metadata(obj_id)
|
||||
if date_format == "str_date":
|
||||
if meta_dict['first_seen']:
|
||||
meta_dict['first_seen'] = '{}/{}/{}'.format(meta_dict['first_seen'][0:4], meta_dict['first_seen'][4:6], meta_dict['first_seen'][6:8])
|
||||
if meta_dict['last_seen']:
|
||||
meta_dict['last_seen'] = '{}/{}/{}'.format(meta_dict['last_seen'][0:4], meta_dict['last_seen'][4:6], meta_dict['last_seen'][6:8])
|
||||
return meta_dict
|
||||
|
||||
def get_nb_object_seen_by_date(self, obj_id, date_day):
|
||||
nb = r_serv_metadata.zscore('s_correl:date:{}:{}'.format(self.correlation_name, date_day), obj_id)
|
||||
if nb is None:
|
||||
return 0
|
||||
else:
|
||||
return int(nb)
|
||||
|
||||
def get_list_nb_previous_correlation_object(self, obj_id, numDay):
|
||||
nb_previous_correlation = []
|
||||
for date_day in Date.get_previous_date_list(numDay):
|
||||
nb_previous_correlation.append(self.get_nb_object_seen_by_date(obj_id, date_day))
|
||||
return nb_previous_correlation
|
||||
|
||||
def _get_correlation_by_date(self, date_day):
|
||||
return r_serv_metadata.zrange('s_correl:date:{}:{}'.format(self.correlation_name, date_day), 0, -1)
|
||||
|
||||
# def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'):
|
||||
# if not request_dict:
|
||||
# return ({'status': 'error', 'reason': 'Malformed JSON'}, 400)
|
||||
#
|
||||
# field_name = request_dict.get(correlation_type, None)
|
||||
# if not field_name:
|
||||
# return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 )
|
||||
# if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type):
|
||||
# return ( {'status': 'error', 'reason': 'Item not found'}, 404 )
|
||||
|
||||
def get_correlation(self, request_dict, obj_id):
|
||||
dict_resp = {}
|
||||
|
||||
if request_dict.get('items'):
|
||||
dict_resp['items'] = self._get_items(obj_id)
|
||||
|
||||
if request_dict.get('metadata'):
|
||||
dict_resp['metadata'] = self._get_metadata(obj_id)
|
||||
return (dict_resp, 200)
|
||||
|
||||
def _get_domain_correlation_obj(self, domain):
|
||||
'''
|
||||
Return correlation of a given domain.
|
||||
|
||||
:param domain: crawled domain
|
||||
:type domain: str
|
||||
:param correlation_type: correlation type
|
||||
:type correlation_type: str
|
||||
|
||||
:return: a list of correlation
|
||||
:rtype: list
|
||||
'''
|
||||
res = r_serv_metadata.smembers('domain:s_correl:{}:{}'.format(self.correlation_name, domain))
|
||||
if res:
|
||||
return list(res)
|
||||
else:
|
||||
return []
|
||||
|
||||
def _get_correlation_obj_domain(self, correlation_id):
|
||||
'''
|
||||
Return all domains that contain this correlation.
|
||||
|
||||
:param domain: field name
|
||||
:type domain: str
|
||||
:param correlation_type: correlation type
|
||||
:type correlation_type: str
|
||||
|
||||
:return: a list of correlation
|
||||
:rtype: list
|
||||
'''
|
||||
res = r_serv_metadata.smembers('s_correl:set_domain_{}:{}'.format(self.correlation_name, correlation_id))
|
||||
if res:
|
||||
return list(res)
|
||||
else:
|
||||
return []
|
||||
|
||||
def _get_item_correlation_obj(self, item_id):
|
||||
'''
|
||||
Return correlation of a given item id.
|
||||
|
||||
:param item_id: item id
|
||||
:type item_id: str
|
||||
|
||||
:return: a list of correlation
|
||||
:rtype: list
|
||||
'''
|
||||
res = r_serv_metadata.smembers('item:s_correl:{}:{}'.format(self.correlation_name, item_id))
|
||||
if res:
|
||||
return list(res)
|
||||
else:
|
||||
return []
|
||||
|
||||
# def get_correlation_all_object(self, correlation_type, correlation_value, correlation_objects=[]):
|
||||
# if not correlation_objects:
|
||||
# correlation_objects = get_all_correlation_objects()
|
||||
# correlation_obj = {}
|
||||
# for correlation_object in correlation_objects:
|
||||
# if correlation_object == 'paste':
|
||||
# res = self._get_items(correlation_type, correlation_value)
|
||||
# elif correlation_object == 'domain':
|
||||
# res = self.get_correlation_obj_domain(correlation_value, correlation_type=correlation_type)
|
||||
# else:
|
||||
# res = None
|
||||
# if res:
|
||||
# correlation_obj[correlation_object] = res
|
||||
# return correlation_obj
|
||||
|
||||
def update_correlation_daterange(self, obj_id, date):
|
||||
date = int(date)
|
||||
# obj_id don't exit
|
||||
if not r_serv_metadata.exists('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id)):
|
||||
r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'first_seen', date)
|
||||
r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'last_seen', date)
|
||||
else:
|
||||
first_seen = self.get_correlation_last_seen(obj_id, r_int=True)
|
||||
last_seen = self.get_correlation_first_seen(obj_id, r_int=True)
|
||||
if date < first_seen:
|
||||
r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'first_seen', date)
|
||||
if date > last_seen:
|
||||
r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'last_seen', date)
|
||||
|
||||
def save_item_correlation(self, obj_id, item_id, item_date):
|
||||
self.update_correlation_daterange(obj_id, item_date)
|
||||
# global set
|
||||
r_serv_metadata.sadd('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id), item_id)
|
||||
|
||||
# daily
|
||||
r_serv_metadata.zincrby('s_correl:date:{}:{}'.format(self.correlation_name, item_date), obj_id, 1)
|
||||
|
||||
# all correlation
|
||||
r_serv_metadata.zincrby('s_correl:{}:all'.format(self.correlation_name), obj_id, 1)
|
||||
|
||||
# item
|
||||
r_serv_metadata.sadd('item:s_correl:{}:{}'.format(self.correlation_name, item_id), obj_id)
|
||||
|
||||
# domain
|
||||
if Item.is_crawled(item_id):
|
||||
domain = Item.get_item_domain(item_id)
|
||||
self.save_domain_correlation(domain, subtype, obj_id)
|
||||
|
||||
def delete_item_correlation(self, subtype, obj_id, item_id, item_date):
|
||||
#self.update_correlation_daterange ! # # TODO:
|
||||
r_serv_metadata.srem('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id), item_id)
|
||||
r_serv_metadata.srem('item:s_correl:{}:{}'.format(self.correlation_name, item_id), obj_id)
|
||||
|
||||
res = r_serv_metadata.zincrby('s_correl:date:{}:{}'.format(self.correlation_name, item_date), obj_id, -1)
|
||||
if int(res) < 0: # remove last
|
||||
r_serv_metadata.zrem('s_correl:date:{}:{}'.format(self.correlation_name, item_date), obj_id)
|
||||
|
||||
res = r_serv_metadata.zscore('s_correl:{}:all'.format(self.correlation_name), obj_id)
|
||||
if int(res) > 0:
|
||||
r_serv_metadata.zincrby('s_correl:{}:all'.format(self.correlation_name), obj_id, -1)
|
||||
|
||||
def save_domain_correlation(self, domain, obj_id):
|
||||
r_serv_metadata.sadd('domain:s_correl:{}:{}'.format(self.correlation_name, domain), obj_id)
|
||||
r_serv_metadata.sadd('s_correl:set_domain_{}:{}'.format(self.correlation_name, obj_id), domain)
|
||||
|
||||
def delete_domain_correlation(self, domain, obj_id):
|
||||
r_serv_metadata.srem('domain:s_correl:{}:{}'.format(self.correlation_name, domain), obj_id)
|
||||
r_serv_metadata.srem('s_correl:set_domain_{}:{}'.format(self.correlation_name, obj_id), domain)
|
||||
|
||||
######
|
||||
|
||||
def save_correlation(self, obj_id, date_range):
|
||||
r_serv_metadata.zincrby('s_correl:{}:all'.format(self.correlation_name), obj_id, 0)
|
||||
self.update_correlation_daterange(obj_id, date_range['date_from'])
|
||||
if date_range['date_from'] != date_range['date_to']:
|
||||
self.update_correlation_daterange(obj_id, date_range['date_to'])
|
||||
return True
|
||||
|
||||
def save_obj_relationship(self, obj_id, obj2_type, obj2_id):
|
||||
if obj2_type == 'domain':
|
||||
self.save_domain_correlation(obj2_id, obj_id)
|
||||
elif obj2_type == 'item':
|
||||
self.save_item_correlation(obj_id, obj2_id, Item.get_item_date(obj2_id))
|
||||
|
||||
def delete_obj_relationship(self, obj_id, obj2_type, obj2_id):
|
||||
if obj2_type == 'domain':
|
||||
self.delete_domain_correlation(obj2_id, obj_id)
|
||||
elif obj2_type == 'item':
|
||||
self.delete_item_correlation(obj_id, obj2_id, Item.get_item_date(obj2_id))
|
||||
|
||||
# def create_correlation(self, subtype, obj_id, obj_meta):
|
||||
# res = self.sanythise_correlation_types([subtype], r_boolean=True)
|
||||
# if not res:
|
||||
# print('invalid subtype')
|
||||
# return False
|
||||
# first_seen = obj_meta.get('first_seen', None)
|
||||
# last_seen = obj_meta.get('last_seen', None)
|
||||
# date_range = Date.sanitise_date_range(first_seen, last_seen, separator='', date_type='datetime')
|
||||
# res = self.save_correlation(subtype, obj_id, date_range)
|
||||
# if res and 'tags' in obj_meta:
|
||||
# # # TODO: handle mixed tags: taxonomies and Galaxies
|
||||
# pass
|
||||
# #Tag.api_add_obj_tags(tags=obj_meta['tags'], object_id=obj_id, object_type=self.get_correlation_obj_type())
|
||||
# return True
|
||||
#
|
||||
# # # TODO: handle tags
|
||||
# def delete_correlation(self, obj_id):
|
||||
# pass
|
||||
|
||||
|
||||
######## API EXPOSED ########
|
||||
|
||||
|
||||
######## ########
|
|
@ -34,6 +34,10 @@ publish = Redis_Tags
|
|||
subscribe = Redis_Global
|
||||
publish = Redis_Tags
|
||||
|
||||
[Telegram]
|
||||
subscribe = Redis_Global
|
||||
publish = Redis_Tags
|
||||
|
||||
[Categ]
|
||||
subscribe = Redis_Global
|
||||
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey
|
||||
|
|
Loading…
Reference in a new issue