chg: [Kvrocks migration] rewrite obj tags + migration

This commit is contained in:
Terrtia 2022-09-01 14:04:00 +02:00
parent 9c1bfb7073
commit d27d47dc70
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
33 changed files with 1524 additions and 1400 deletions

View file

@ -15,6 +15,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib import Tag
from lib import Users
from lib.objects import Decodeds
from lib.objects import Domains
@ -23,6 +24,7 @@ from lib.objects.CryptoCurrencies import CryptoCurrency
from lib.objects.Pgps import Pgp
from lib.objects.Screenshots import Screenshot, get_all_screenshots
from lib.objects.Usernames import Username
from packages import Date
# # # # CONFIGS # # # #
config_loader = ConfigLoader()
@ -43,13 +45,6 @@ spec.loader.exec_module(old_ail_2_ail)
old_ail_2_ail.r_serv_sync = r_serv_db
from packages import Tag
spec = importlib.util.find_spec('Tag')
old_Tag = importlib.util.module_from_spec(spec)
spec.loader.exec_module(old_Tag)
old_Tag.r_serv_tags = r_serv_tags
from lib import Tracker
spec = importlib.util.find_spec('Tracker')
old_Tracker = importlib.util.module_from_spec(spec)
@ -118,16 +113,32 @@ def core_migration():
crawlers.save_splash_manager_url_api(manager_url, manager_api_key)
crawlers.reload_splash_and_proxies_list()
# ail:misp
# ail:thehive
# hive:auto-alerts
# list_export_tags
# misp:auto-events
# whitelist_hive
# whitelist_misp
# Auto Export Migration
ail_misp = r_serv_db.get('ail:misp')
if ail_misp != 'True':
ail_misp == 'False'
r_kvrocks.set('ail:misp', ail_misp)
ail_thehive = r_serv_db.get('ail:thehive')
if ail_thehive != 'True':
ail_thehive == 'False'
r_kvrocks.set('ail:thehive', ail_thehive)
# # TODO: TO CHECK
misp_auto_events = r_serv_db.get('misp:auto-events')
if misp_auto_events != '1':
misp_auto_events = '0'
r_kvrocks.set('misp:auto-events', misp_auto_events)
hive_auto_alerts = r_serv_db.get('hive:auto-alerts')
if hive_auto_alerts != '1':
hive_auto_alerts = '0'
r_kvrocks.set('hive:auto-alerts', hive_auto_alerts)
for tag in r_serv_db.smembers('whitelist_misp'):
r_kvrocks.sadd('whitelist_misp', tag)
for tag in r_serv_db.smembers('whitelist_hive'):
r_kvrocks.sadd('whitelist_hive', tag)
# # # # # # # # # # # # # # # #
@ -248,36 +259,71 @@ def investigations_migration():
def item_submit_migration():
pass
# /!\ KEY COLISION
# # TODO: change db -> olds modules + blueprints
# # TODO: HANDLE LOCAL TAGS
# # TODO: HANDLE LOCAL TAGS
# # TODO: HANDLE LOCAL TAGS
# # TODO: HANDLE LOCAL TAGS
###############################
# #
# ITEMS MIGRATION #
# #
###############################
def get_all_items_tags():
return r_serv_tags.smembers('list_tags:item')
def get_all_items_tags_by_day(tag, date):
return r_serv_tags.smembers(f'{tag}:{date}')
def get_tag_first_seen(tag, r_int=False):
res = r_serv_tags.hget(f'tag_metadata:{tag}', 'first_seen')
if r_int:
if res is None:
return 99999999
else:
return int(res)
return res
def get_tags_first_seen():
first_seen = int(Date.get_today_date_str())
for tag in get_all_items_tags():
tag_first = get_tag_first_seen(tag, r_int=True)
if tag_first < first_seen:
first_seen = tag_first
return str(first_seen)
def get_active_taxonomies():
return r_serv_tags.smembers('active_taxonomies')
def get_active_galaxies():
return r_serv_tags.smembers('active_galaxies')
# # TODO: HANDLE LOCAL TAGS
def tags_migration():
for taxonomy in get_active_taxonomies():
Tag.enable_taxonomy(taxonomy)
for galaxy in get_active_galaxies():
Tag.enable_galaxy(galaxy)
# for tag in get_all_items_tags():
# print(tag)
# tag_first = get_tag_first_seen(tag)
# if tag_first:
# for date in Date.get_date_range_today(tag_first):
# print(date)
# for item_id in get_all_items_tags_by_day(tag, date):
# item = Items.Item(item_id)
# item.add_tag(tag)
# HANDLE LOCAL TAGS
print(old_Tag.get_all_tags())
#
# /!\ OBJECTS TAGS ISSUE /!\
# -> only one layer
#
# issue with subtypes + between objects with same ID
#
#
#
#
pass
# # TODO: MIGRATE item_basic.add_map_obj_id_item_id ??????????????????????
# # TODO: BUILD FIRST/LAST object DATE
###############################
# #
# ITEMS MIGRATION #
@ -298,7 +344,7 @@ def items_migration():
# TODO: migrate cookies
# TODO: test cookies migration
# TODO: migrate auto crawlers
###############################
@ -326,7 +372,7 @@ def crawler_migration():
meta = old_crawlers.get_cookiejar_metadata(cookiejar_uuid, level=True)
#print(meta)
#crawlers.create_cookiejar(meta['user_id'], level=meta['level'], description=meta['description'], cookiejar_uuid=cookiejar_uuid)
#_set_cookiejar_date(meta['date'])
#crawlers._set_cookiejar_date(meta['date'])
for meta_cookie, cookie_uuid in old_crawlers.get_cookiejar_cookies_list(cookiejar_uuid, add_cookie_uuid=True):
print(cookie_uuid)
@ -349,6 +395,9 @@ def crawler_migration():
# BY TYPE - FIRST DATE DOWN / UP
def get_domain_down_by_date(domain_type, date):
return r_crawler.smembers(f'{domain_type}_down:{date}')
def get_item_link(item_id):
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link')
@ -415,66 +464,90 @@ def get_domain_history_by_port(domain_type, domain, port):
history.append(dict_history)
return history
def get_domain_tags(domain):
return r_serv_metadata.smembers(f'tag:{domain}')
def domain_migration():
print('Domains MIGRATION...')
for domain_type in ['onion', 'regular']:
for dom in get_domains_up_by_type(domain_type):
if domain_type == 'onion':
if not crawlers.is_valid_onion_domain(dom):
print(dom)
continue
# ports = get_domain_ports(domain_type, dom)
# first_seen = get_domain_first_seen(domain_type, dom)
# last_check = get_domain_last_check(domain_type, dom)
# last_origin = get_domain_last_origin(domain_type, dom)
# languages = get_domain_languages(dom)
#
# domain = Domains.Domain(dom)
# # domain.update_daterange(first_seen)
# # domain.update_daterange(last_check)
# # domain._set_ports(ports)
# # if last_origin:
# # domain.set_last_origin(last_origin)
# for language in languages:
# print(language)
# # domain.add_language(language)
# for tag in get_domain_tags(domain):
# domain.add_tag(tag)
# #print('------------------')
# #print('------------------')
# #print('------------------')
# #print('------------------')
# #print('------------------')
# print(dom)
# #print(first_seen)
# #print(last_check)
# #print(ports)
#
# # # TODO: FIXME filter invalid hostname
#
# # CREATE DOMAIN HISTORY
# for port in ports:
# for history in get_domain_history_by_port(domain_type, dom, port):
# epoch = history['epoch']
# # DOMAIN DOWN
# if not history.get('status'): # domain DOWN
# # domain.add_history(epoch, port)
# print(f'DOWN {epoch}')
# # DOMAIN UP
# else:
# root_id = history.get('root')
# if root_id:
# # domain.add_history(epoch, port, root_item=root_id)
# #print(f'UP {root_id}')
# crawled_items = get_crawled_items(dom, root_id)
# for item_id in crawled_items:
# url = get_item_link(item_id)
# item_father = get_item_father(item_id)
# if item_father and url:
# #print(f'{url} {item_id}')
# pass
# # domain.add_crawled_item(url, port, item_id, item_father)
#
#
# #print()
ports = get_domain_ports(domain_type, dom)
for domain_type in ['onion', 'regular']:
for date in Date.get_date_range_today('20190101'):
for dom in get_domain_down_by_date(domain_type, date):
if domain_type == 'onion':
if not crawlers.is_valid_onion_domain(dom):
print(dom)
continue
first_seen = get_domain_first_seen(domain_type, dom)
last_check = get_domain_last_check(domain_type, dom)
last_origin = get_domain_last_origin(domain_type, dom)
languages = get_domain_languages(dom)
domain = Domains.Domain(dom)
# domain.update_daterange(first_seen)
# domain.update_daterange(last_check)
# domain._set_ports(ports)
# if last_origin:
# domain.set_last_origin(last_origin)
for language in languages:
print(language)
# domain.add_language(language)
#print('------------------')
#print('------------------')
#print('------------------')
#print('------------------')
#print('------------------')
print(dom)
#print(first_seen)
#print(last_check)
#print(ports)
# # TODO: FIXME filter invalid hostname
# CREATE DOMAIN HISTORY
for port in ports:
for history in get_domain_history_by_port(domain_type, dom, port):
epoch = history['epoch']
# DOMAIN DOWN
if not history.get('status'): # domain DOWN
# domain.add_history(epoch, port)
print(f'DOWN {epoch}')
# DOMAIN UP
else:
root_id = history.get('root')
if root_id:
# domain.add_history(epoch, port, root_item=root_id)
#print(f'UP {root_id}')
crawled_items = get_crawled_items(dom, root_id)
for item_id in crawled_items:
url = get_item_link(item_id)
item_father = get_item_father(item_id)
if item_father and url:
#print(f'{url} {item_id}')
pass
# domain.add_crawled_item(url, port, item_id, item_father)
#print()
# domain.add_history(None, None, date=date)
###############################
@ -489,7 +562,8 @@ def get_decoded_items_list_by_decoder(decoder_type, decoded_id): ###############
#return r_serv_metadata.zrange('nb_seen_hash:{}'.format(sha1_string), 0, -1)
return r_serv_metadata.zrange(f'{decoder_type}_hash:{decoded_id}', 0, -1)
def get_decodeds_tags(decoded_id):
return r_serv_metadata.smembers(f'tag:{decoded_id}')
def decodeds_migration():
print('Decoded MIGRATION...')
@ -508,6 +582,9 @@ def decodeds_migration():
filepath = decoded.get_filepath(mimetype=mimetype)
decoded._save_meta(filepath, mimetype)
for tag in get_decodeds_tags(decoded_id):
decoded.add_tag(tag)
for decoder_type in decoder_names:
for item_id in get_decoded_items_list_by_decoder(decoder_type, decoded_id):
print(item_id, decoder_type)
@ -530,6 +607,9 @@ def get_screenshot_items_list(screenshot_id): ######################### # TODO:
def get_screenshot_domain(screenshot_id):
return r_crawler.smembers(f'screenshot_domain:{screenshot_id}')
def get_screenshot_tags(screenshot_id):
return r_serv_metadata.smembers(f'tag:{screenshot_id}')
# Tags + Correlations
# # TODO: save orphelin screenshot ?????
def screenshots_migration():
@ -541,14 +621,13 @@ def screenshots_migration():
screenshot = Screenshot(screenshot_id)
tags = old_Tag.get_obj_tag(screenshot_id) ################## # TODO:
if tags:
print(screenshot_id)
print(tags)
for tag in get_screenshot_tags(screenshot_id):
screenshot.add_tag(tag)
# Correlations
for item_id in get_screenshot_items_list(screenshot_id):
print(item_id)
date = get_item_date(item_id)
screenshot.add_correlation('item', '', item_id)
for domain_id in get_screenshot_domain(screenshot_id):
print(domain_id)
@ -615,23 +694,24 @@ def statistics_migration():
if __name__ == '__main__':
#core_migration()
#user_migration()
core_migration()
# user_migration()
# tags_migration()
#items_migration()
#crawler_migration()
#domain_migration()
# domain_migration() # TO TEST
#decodeds_migration()
#screenshots_migration()
# screenshots_migration()
#subtypes_obj_migration()
#ail_2_ail_migration()
#trackers_migration()
#investigations_migration()
# ail_2_ail_migration()
# trackers_migration()
# investigations_migration()
# custom tags
# crawler queues + auto_crawlers
# stats - Cred - Mail - Provider

View file

@ -16,13 +16,13 @@ from pubsublogger import publisher
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'core/'))
import screen
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
from Item import Item
import Tag
config_loader = ConfigLoader.ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")

View file

@ -11,10 +11,9 @@ from io import BytesIO
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Item
import Date
import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import Tag
import ConfigLoader
@ -113,7 +112,7 @@ def get_decoded_metadata(sha1_string, nb_seen=False, size=False, file_type=False
return metadata_dict
def get_decoded_tag(sha1_string):
return Tag.get_obj_tag(sha1_string)
return Tag.get_object_tags('decoded', sha1_string)
def get_list_nb_previous_hash(sha1_string, num_day):
nb_previous_hash = []
@ -351,7 +350,7 @@ def delete_decoded_file(obj_id):
if not os.path.isfile(filepath):
return False
Tag.delete_obj_tags(obj_id, 'decoded', Tag.get_obj_tag(obj_id))
Tag.delete_obj_tags(obj_id, 'decoded', Tag.get_obj_tag(obj_id)) #############
os.remove(filepath)
return True

View file

@ -16,16 +16,14 @@ import random
import time
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Cryptocurrency
import Pgp
import Date
import Decoded
import Item
import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Correlate_object
import Tag
import Language
import Screenshot
import Username
@ -154,6 +152,11 @@ def get_domains_up_by_daterange(date_from, date_to, domain_type):
domains_up = []
return domains_up
# Retun last crawled domains by type
# domain;epoch
def get_last_crawled_domains(domain_type):
return r_serv_onion.lrange('last_{}'.format(domain_type), 0 ,-1)
def paginate_iterator(iter_elems, nb_obj=50, page=1):
dict_page = {}
dict_page['nb_all_elem'] = len(iter_elems)
@ -231,15 +234,21 @@ def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[],
def get_domains_by_filters():
pass
def create_domains_metadata_list(list_domains, domain_type):
def create_domains_metadata_list(list_domains, domain_type, tags=True):
# # TODO:
# tags => optional
# last check timestamp
l_domains = []
for domain in list_domains:
if domain_type=='all':
dom_type = get_domain_type(domain)
else:
dom_type = domain_type
l_domains.append(get_domain_metadata(domain, dom_type, first_seen=True, last_ckeck=True, status=True,
ports=True, tags=True, languages=True, screenshot=True, tags_safe=True))
ports=True, tags=tags, languages=True, screenshot=True, tags_safe=True))
return l_domains
def sanithyse_domain_name_to_search(name_to_search, domain_type):
@ -653,7 +662,7 @@ def get_domain_tags(domain):
:param domain: crawled domain
'''
return Tag.get_obj_tag(domain)
return Tag.get_object_tags('domain', domain)
def get_domain_random_screenshot(domain):
'''
@ -712,97 +721,6 @@ def get_domain_metadata_basic(domain, domain_type=None):
domain_type = get_domain_type(domain)
return get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=False)
def get_domain_cryptocurrency(domain, currencies_type=None, get_nb=False):
'''
Retun all cryptocurrencies of a given domain.
:param domain: crawled domain
:param currencies_type: list of cryptocurrencies type
:type currencies_type: list, optional
'''
return Cryptocurrency.cryptocurrency.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb)
def get_domain_pgp(domain, currencies_type=None, get_nb=False):
'''
Retun all pgp of a given domain.
:param domain: crawled domain
:param currencies_type: list of pgp type
:type currencies_type: list, optional
'''
return Pgp.pgp.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb)
def get_domain_username(domain, currencies_type=None, get_nb=False):
'''
Retun all pgp of a given domain.
:param domain: crawled domain
:param currencies_type: list of pgp type
:type currencies_type: list, optional
'''
return Username.correlation.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb)
def get_domain_decoded(domain):
'''
Retun all decoded item of a given domain.
:param domain: crawled domain
'''
return Decoded.get_domain_decoded_item(domain)
def get_domain_screenshot(domain):
'''
Retun all decoded item of a given domain.
:param domain: crawled domain
'''
return Screenshot.get_domain_screenshot(domain)
def get_domain_all_correlation(domain, correlation_names=[], get_nb=False):
'''
Retun all correlation of a given domain.
:param domain: crawled domain
:type domain: str
:return: a dict of all correlation for a given domain
:rtype: dict
'''
if not correlation_names:
correlation_names = Correlate_object.get_all_correlation_names()
domain_correl = {}
for correlation_name in correlation_names:
if correlation_name=='cryptocurrency':
res = get_domain_cryptocurrency(domain, get_nb=get_nb)
elif correlation_name=='pgp':
res = get_domain_pgp(domain, get_nb=get_nb)
elif correlation_name=='username':
res = get_domain_username(domain, get_nb=get_nb)
elif correlation_name=='decoded':
res = get_domain_decoded(domain)
elif correlation_name=='screenshot':
res = get_domain_screenshot(domain)
else:
res = None
# add correllation to dict
if res:
domain_correl[correlation_name] = res
return domain_correl
def get_domain_total_nb_correlation(correlation_dict):
total_correlation = 0
if 'decoded' in correlation_dict:
total_correlation += len(correlation_dict['decoded'])
if 'screenshot' in correlation_dict:
total_correlation += len(correlation_dict['screenshot'])
if 'cryptocurrency' in correlation_dict:
total_correlation += correlation_dict['cryptocurrency'].get('nb', 0)
if 'pgp' in correlation_dict:
total_correlation += correlation_dict['pgp'].get('nb', 0)
return total_correlation
# TODO: handle port
def get_domain_history(domain, domain_type, port): # TODO: add date_range: from to + nb_elem
'''
@ -972,12 +890,6 @@ class Domain(object):
'''
return get_domain_languages(self.domain)
def get_domain_correlation(self):
'''
Retun all correlation of a given domain.
'''
return get_domain_all_correlation(self.domain, get_nb=True)
def get_domain_history(self):
'''
Retun the full history of a given domain and port.
@ -998,4 +910,6 @@ class Domain(object):
return get_domain_items_crawled(self.domain, self.type, port, epoch=epoch, items_link=items_link, item_screenshot=item_screenshot, item_tag=item_tag)
if __name__ == '__main__':
search_domains_by_name('c', 'onion')
#search_domains_by_name('c', 'onion')
res = get_last_crawled_domains('onion')
print(res)

View file

@ -22,10 +22,8 @@ from flask import escape
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from exceptions import UpdateInvestigationError
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Tag
from exceptions import UpdateInvestigationError
config_loader = ConfigLoader.ConfigLoader()
r_tracking = config_loader.get_db_conn("Kvrocks_DB")

954
bin/lib/Tag.py Executable file
View file

@ -0,0 +1,954 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
import datetime
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib import ail_core
from lib import item_basic
from packages import Date
from pytaxonomies import Taxonomies
from pymispgalaxies import Galaxies, Clusters
config_loader = ConfigLoader()
r_tags = config_loader.get_db_conn("Kvrocks_Tags")
config_loader = None
#### CORE FUNCTIONS ####
def build_unsafe_tags():
unsafe_tags = set()
## CE content
unsafe_tags.add('dark-web:topic="pornography-child-exploitation"')
# add copine-scale tags
taxonomies = Taxonomies()
copine_scale = taxonomies.get('copine-scale')
if copine_scale:
for tag in copine_scale.machinetags():
unsafe_tags.add(tag)
return unsafe_tags
# set of unsafe tags
unsafe_tags = build_unsafe_tags()
def is_tags_safe(ltags):
'''
Check if a list of tags contain an unsafe tag (CE, ...)
:param ltags: list of tags
:type ltags: list
:return: is a tag in the unsafe set
:rtype: boolean
'''
return unsafe_tags.isdisjoint(ltags)
# # TODO: verify tags + object_type
# get set_keys: intersection
def get_obj_keys_by_tags(tags, obj_type, subtype='', date=None):
l_set_keys = []
if obj_type=='item':
for tag in tags:
l_set_keys.append(f'{obj_type}:{subtype}:{tag}:{date}')
else:
for tag in tags:
l_set_keys.append(f'{obj_type}:{subtype}:{tag}')
return l_set_keys
def get_obj_by_tag(key_tag):
return r_tags.smembers(key_tag)
##-- CORE FUNCTIONS --##
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
def is_obj_tagged(obj_type, obj_id, subtype=''):
'''
Check if a object is tagged
:param object_id: object id
:type domain: str
:return: is object tagged
:rtype: boolean
'''
return r_tags.exists(f'tag:{obj_type}:{subtype}:{obj_id}')
def is_obj_tagged_by_tag(obj_type, obj_id, tag, subtype=''):
'''
Check if a object is tagged
:param object_id: object id
:type domain: str
:param tag: object type
:type domain: str
:return: is object tagged
:rtype: boolean
'''
return r_tags.sismember(f'tag:{obj_type}:{subtype}:{obj_id}', tag)
#
# f'tag:{obj_type}:{subtype}:{id}' f'tag:{id}'
#
# f'list_tags:{obj_type}:{subtype}' f'list_tags:{obj_type}'
#
# graph tags by days ???????????????????????????????
#
#
# # TODO: metadata by object type ????????????
# tag_metadata:
# f'{tag}:{date}' -> set of item_id
# # TODO: ADD subtype support
# f'{obj_type}:{tag}' -> set of item_id
def get_tag_first_seen(tag, object_type=None, r_int=False):
first_seen = r_tags.hget(f'tag_metadata:{tag}', 'first_seen')
if r_int:
if first_seen:
first_seen = int(first_seen)
else:
first_seen = 99999999
return first_seen
# # TODO: LATER ADD object metadata
# if not object_type:
# r_tags.hget(f'tag_metadata:{tag}', 'first_seen')
# else:
# r_tags.hget(f'tag_metadata:{tag}', 'first_seen:{object_type}')
def get_tag_last_seen(tag, object_type=None, r_int=False):
last_seen = r_tags.hget(f'tag_metadata:{tag}', 'last_seen')
if r_int:
if last_seen:
last_seen = int(last_seen)
else:
last_seen = 0
return last_seen
def get_tag_metadata_date(tag, r_int=False):
return {'first_seen': get_tag_first_seen(tag, r_int=r_int), 'last_seen': get_tag_last_seen(tag, r_int=r_int)}
def set_tag_first_seen(tag, date):
r_tags.hset(f'tag_metadata:{tag}', 'first_seen', date)
def set_tag_last_seen(tag, date):
r_tags.hset(f'tag_metadata:{tag}', 'last_seen', date)
# # TODO: handle others objects date
def _update_tag_first_seen(tag, first_seen, last_seen):
if first_seen == last_seen:
if r_tags.scard(f'item::{tag}:{first_seen}') > 0:
r_tags.hset(f'tag_metadata:{tag}', 'first_seen', first_seen)
# no tag in db
else:
r_tags.hdel(f'tag_metadata:{tag}', 'first_seen')
r_tags.hdel(f'tag_metadata:{tag}', 'last_seen')
else:
if r_tags.scard(f'item::{tag}:{first_seen}') > 0:
r_tags.hset(f'tag_metadata:{tag}', 'first_seen', first_seen)
else:
first_seen = Date.date_add_day(first_seen)
if int(last_seen) >= int(first_seen):
_update_tag_first_seen(tag, first_seen, last_seen)
# # TODO:
def _update_tag_last_seen(tag, first_seen, last_seen):
if first_seen == last_seen:
if r_tags.scard(f'item::{tag}:{last_seen}') > 0:
r_tags.hset(f'tag_metadata:{tag}', 'last_seen', last_seen)
# no tag in db
else:
r_tags.hdel(f'tag_metadata:{tag}', 'first_seen')
r_tags.hdel(f'tag_metadata:{tag}', 'last_seen')
else:
if r_tags.scard(f'item::{tag}:{last_seen}') > 0:
r_tags.hset(f'tag_metadata:{tag}', 'last_seen', last_seen)
else:
last_seen = Date.date_substract_day(str(last_seen))
if int(last_seen) >= int(first_seen):
_update_tag_last_seen(tag, first_seen, last_seen)
def update_tag_metadata(tag, date, delete=False): # # TODO: delete Tags
date = int(date)
tag_date = get_tag_metadata_date(tag, r_int=True)
# Add Tag
if not delete:
# update first_seen
if date < tag_date['first_seen']:
set_tag_first_seen(tag, date)
# update last_seen
if date > tag_date['last_seen']:
set_tag_last_seen(tag, date)
# Delete Tag
else:
if date == tag_date['first_seen']:
_update_tag_first_seen(tag, tag_date['first_seen'], tag_date['last_seen'])
if date == tag_date['last_seen']:
_update_tag_last_seen(tag, tag_date['first_seen'], tag_date['last_seen'])
# old
# r_tags.smembers(f'{tag}:{date}')
# r_tags.smembers(f'{obj_type}:{tag}')
def get_tag_objects(obj_type, subtype='', date=''):
if obj_type == 'item':
return r_tags.smembers(f'{obj_type}:{subtype}:{tag}:{date}')
else:
return r_tags.smembers(f'{obj_type}:{subtype}:{tag}')
def get_object_tags(obj_type, obj_id, subtype=''):
return r_tags.smembers(f'tag:{obj_type}:{subtype}:{obj_id}')
def add_object_tag(tag, obj_type, id, subtype=''): #############################
if r_tags.sadd(f'tag:{obj_type}:{subtype}:{id}', tag) == 1:
r_tags.sadd('list_tags', tag)
r_tags.sadd(f'list_tags:{obj_type}', tag)
r_tags.sadd(f'list_tags:{obj_type}:{subtype}', tag)
if obj_type == 'item':
date = item_basic.get_item_date(id)
r_tags.sadd(f'{obj_type}:{subtype}:{tag}:{date}', id)
# add domain tag
if item_basic.is_crawled(id) and tag!='infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"':
domain = item_basic.get_item_domain(id)
add_object_tag(tag, "domain", domain)
update_tag_metadata(tag, date)
else:
r_tags.sadd(f'{obj_type}:{subtype}:{tag}', id)
r_tags.hincrby(f'daily_tags:{datetime.date.today().strftime("%Y%m%d")}', tag, 1)
def update_tag_global_by_obj_type(tag, object_type, subtype=''):
tag_deleted = False
if object_type=='item':
if not r_tags.exists(f'tag_metadata:{tag}'):
tag_deleted = True
else:
if not r_tags.exists(f'{object_type}:{subtype}:{tag}'):
r_tags.srem(f'list_tags:{obj_type}:{subtype}', tag)
# Iterate on all subtypes
delete_global_obj_tag = True
for obj_subtype in ail_core.get_object_all_subtypes():
if r_tags.exists(f'list_tags:{obj_type}:{subtype}'):
delete_global_obj_tag = False
break
if delete_global_obj_tag:
r_tags.srem(f'list_tags:{obj_type}', tag)
tag_deleted = True
if tag_deleted:
# update global tags
for obj_type in ail_core.get_all_objects():
if r_tags.exists(f'{obj_type}:{tag}'):
tag_deleted = False
if tag_deleted:
r_tags.srem('list_tags', tag)
def delete_object_tag(tag, obj_type, id, subtype=''):
if is_obj_tagged_by_tag(obj_type, id, tag, subtype=subtype):
r_tags.sadd('list_tags', tag)
r_tags.sadd(f'list_tags:{obj_type}', tag)
r_tags.sadd(f'list_tags:{obj_type}:{subtype}', tag)
if obj_type == 'item':
date = item_basic.get_item_date(id)
r_tags.srem(f'{obj_type}:{subtype}:{tag}:{date}', id)
update_tag_metadata(tag, date, delete=True)
else:
r_tags.srem(f'{obj_type}:{subtype}:{tag}', id)
r_tags.srem(f'tag:{obj_type}:{subtype}:{id}', tag)
update_tag_global_by_obj_type(tag, obj_type, subtype=subtype)
################################################################################################################
# TODO: rewrite me
# TODO: other objects
def get_obj_by_tags(obj_type, l_tags, date_from=None, date_to=None, nb_obj=50, page=1):
# with daterange
l_tagged_obj = []
if obj_type=='item':
#sanityze date
date_range = sanitise_tags_date_range(l_tags, date_from=date_from, date_to=date_to)
l_dates = Date.substract_date(date_range['date_from'], date_range['date_to'])
for date_day in l_dates:
l_set_keys = get_obj_keys_by_tags(l_tags, obj_type, date=date_day)
# if len(l_set_keys) > nb_obj:
# return l_tagged_obj
if len(l_set_keys) < 2:
date_day_obj = get_obj_by_tag(l_set_keys[0])
else:
date_day_obj = r_tags.sinter(l_set_keys[0], *l_set_keys[1:])
# next_nb_start = len(l_tagged_obj) + len(date_day_obj) - nb_obj
# if next_nb_start > 0:
# get + filter nb_start
l_tagged_obj.extend( date_day_obj )
# handle pagination
nb_all_elem = len(l_tagged_obj)
nb_pages = nb_all_elem / nb_obj
if not nb_pages.is_integer():
nb_pages = int(nb_pages)+1
else:
nb_pages = int(nb_pages)
if page > nb_pages:
page = nb_pages
start = nb_obj*(page -1)
if nb_pages > 1:
stop = (nb_obj*page)
l_tagged_obj = l_tagged_obj[start:stop]
# only one page
else:
stop = nb_all_elem
l_tagged_obj = l_tagged_obj[start:]
if stop > nb_all_elem:
stop = nb_all_elem
stop = stop -1
return {"tagged_obj":l_tagged_obj, "date" : date_range,
"page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop+1, "nb_all_elem":nb_all_elem}
# without daterange
else:
l_set_keys = get_obj_keys_by_tags(l_tags, obj_type)
if len(l_set_keys) < 2:
l_tagged_obj = get_obj_by_tag(l_set_keys[0])
else:
l_tagged_obj = r_tags.sinter(l_set_keys[0], *l_set_keys[1:])
if not l_tagged_obj:
return {"tagged_obj":l_tagged_obj, "page":0, "nb_pages":0}
# handle pagination
nb_all_elem = len(l_tagged_obj)
nb_pages = nb_all_elem / nb_obj
if not nb_pages.is_integer():
nb_pages = int(nb_pages)+1
else:
nb_pages = int(nb_pages)
if page > nb_pages:
page = nb_pages
# multiple pages
if nb_pages > 1:
start = nb_obj*(page -1)
stop = (nb_obj*page) -1
current_index = 0
l_obj = []
for elem in l_tagged_obj:
if current_index > stop:
break
if start <= current_index and stop >= current_index:
l_obj.append(elem)
current_index += 1
l_tagged_obj = l_obj
stop += 1
if stop > nb_all_elem:
stop = nb_all_elem
# only one page
else:
start = 0
stop = nb_all_elem
l_tagged_obj = list(l_tagged_obj)
return {"tagged_obj":l_tagged_obj, "page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop, "nb_all_elem":nb_all_elem}
################################################################################
################################################################################
################################################################################
################################################################################
#### Taxonomies - Galaxies ####
################################################################################
# galaxies = Galaxies()
# clusters = Clusters(skip_duplicates=True)
#
# list_all_tags = {}
# for name, c in clusters.items(): #galaxy name + tags
# list_all_tags[name] = c
#
# list_galaxies = []
# for g in galaxies.values():
# list_galaxies.append(g.to_json())
#
# list_clusters = []
# for c in clusters.values():
# list_clusters.append(c.to_json())
#
# # tags numbers in galaxies
# total_tags = {}
# for name, tags in clusters.items(): #galaxie name + tags
# total_tags[name] = len(tags)
################################################################################
#### Taxonomies ####
def get_taxonomy_tags_from_cluster(taxonomy_name):
taxonomies = Taxonomies()
taxonomy = taxonomies[taxonomy_name]
return taxonomy.machinetags()
# TODO: ADD api handler
def enable_taxonomy(taxonomy):
tags = get_taxonomy_tags_from_cluster(taxonomy)
r_tags.sadd('active_taxonomies', taxonomy)
for tag in tags:
r_tags.sadd(f'active_tag_{taxonomy}', tag)
# def enable_taxonomy(taxonomie, enable_tags=True):
# '''
# Enable a taxonomy. (UI)
#
# :param taxonomie: MISP taxonomy
# :type taxonomie: str
# :param enable_tags: crawled domain
# :type enable_tags: boolean
# '''
# taxonomies = Taxonomies()
# if enable_tags:
# taxonomie_info = taxonomies.get(taxonomie)
# if taxonomie_info:
# # activate taxonomie
# r_tags.sadd('active_taxonomies', taxonomie)
# # activate taxonomie tags
# for tag in taxonomie_info.machinetags():
# r_tags.sadd('active_tag_{}'.format(taxonomie), tag)
# #r_tags.sadd('active_taxonomies_tags', tag)
# else:
# print('Error: {}, please update pytaxonomies'.format(taxonomie))
#### Galaxies ####
def get_galaxy_tags_from_cluster(galaxy_name):
clusters = Clusters(skip_duplicates=True)
cluster = clusters[galaxy_name]
return cluster.machinetags()
def get_galaxy_tags_with_sysnonym_from_cluster(galaxy_name):
tags = {}
clusters = Clusters(skip_duplicates=True)
cluster = clusters[galaxy_name]
for data in cluster.to_dict()['values']:
tag = f'misp-galaxy:{cluster.type}="{data.value}"'
synonyms = data.meta.synonyms
if not synonyms:
synonyms = []
tags[tag] = synonyms
return tags
def enable_galaxy(galaxy):
tags = get_galaxy_tags_with_sysnonym_from_cluster(galaxy)
r_tags.sadd('active_galaxies', galaxy)
for tag in tags:
r_tags.sadd(f'active_tag_galaxies_{galaxy}', tag)
# synonyms
for synonym in tags[tag]:
r_tags.sadd(f'synonym_tag_{tag}', synonym)
################################################################################
################################################################################
################################################################################
################################################################################
def get_taxonomie_from_tag(tag):
try:
return tag.split(':')[0]
except IndexError:
return None
def get_galaxy_from_tag(tag):
try:
galaxy = tag.split(':')[1]
galaxy = galaxy.split('=')[0]
return galaxy
except IndexError:
return None
def get_taxonomies():
return Taxonomies().keys()
def is_taxonomie(taxonomie, taxonomies=[]):
if not taxonomies:
taxonomies = get_taxonomies()
return taxonomie in taxonomies
def get_active_taxonomies(r_set=False):
res = r_tags.smembers('active_taxonomies')
if r_set:
return set(res)
return res
def get_active_galaxies(r_set=False):
res = r_tags.smembers('active_galaxies')
if r_set:
return set(res)
return res
def get_all_taxonomies_tags(): # # TODO: add + REMOVE + Update
return r_tags.smembers('active_taxonomies_tags')
def get_all_galaxies_tags(): # # TODO: add + REMOVE + Update
return r_tags.smembers('active_galaxies_tags')
def get_all_custom_tags(): # # TODO: add + REMOVE + Update
return r_tags.smembers('tags:custom')
def get_taxonomies_enabled_tags(r_list=False):
l_tag_keys = []
for taxonomie in get_active_taxonomies():
l_tag_keys.append(f'active_tag_{taxonomie}')
if len(l_tag_keys) > 1:
res = r_tags.sunion(l_tag_keys[0], *l_tag_keys[1:])
elif l_tag_keys:
res = r_tags.smembers(l_tag_keys[0])
#### # WARNING: # TODO: DIRTY FIX, REPLACE WITH LOCAL TAGS ####
if r_list:
return list(res)
else:
return res
def get_galaxies_enabled_tags():
l_tag_keys = []
for galaxy in get_active_galaxies():
l_tag_keys.append(f'active_tag_galaxies_{galaxy}')
if len(l_tag_keys) > 1:
return r_tags.sunion(l_tag_keys[0], *l_tag_keys[1:])
elif l_tag_keys:
return r_tags.smembers(l_tag_keys[0])
else:
return []
def get_custom_enabled_tags(r_list=False):
res = r_tags.smembers('tags:custom:enabled_tags')
if r_list:
return list(res)
else:
return res
def get_taxonomies_customs_tags(r_list=False):
tags = get_custom_enabled_tags().union(get_taxonomies_enabled_tags())
if r_list:
tags = list(tags)
return tags
def get_taxonomie_enabled_tags(taxonomie, r_list=False):
res = r_tags.smembers(f'active_tag_{taxonomie}')
if r_list:
return list(res)
else:
return res
def get_galaxy_enabled_tags(galaxy, r_list=False):
res = r_tags.smembers(f'active_tag_galaxies_{galaxy}')
if r_list:
return list(res)
else:
return res
def is_taxonomie_tag_enabled(taxonomie, tag):
if tag in r_tags.smembers('active_tag_' + taxonomie):
return True
else:
return False
def is_galaxy_tag_enabled(galaxy, tag):
if tag in r_tags.smembers('active_tag_galaxies_' + galaxy):
return True
else:
return False
def is_custom_tag_enabled(tag):
return r_tags.sismember('tags:custom:enabled_tags', tag)
# Check if tags are enabled in AIL
def is_valid_tags_taxonomies_galaxy(list_tags, list_tags_galaxy):
if list_tags:
active_taxonomies = get_active_taxonomies()
for tag in list_tags:
taxonomie = get_taxonomie_from_tag(tag)
if taxonomie is None:
return False
if taxonomie not in active_taxonomies:
return False
if not is_taxonomie_tag_enabled(taxonomie, tag):
return False
if list_tags_galaxy:
active_galaxies = get_active_galaxies()
for tag in list_tags_galaxy:
galaxy = get_galaxy_from_tag(tag)
if galaxy is None:
return False
if galaxy not in active_galaxies:
return False
if not is_galaxy_tag_enabled(galaxy, tag):
return False
return True
def is_taxonomie_tag(tag, namespace=None):
if not namespace:
namespace = tag.split(':')[0]
if namespace != 'misp-galaxy':
return is_taxonomie(namespace)
else:
return False
def is_galaxy_tag(tag, namespace=None):
if not namespace:
namespace = tag.split(':')[0]
if namespace == 'misp-galaxy':
return True
else:
return False
def is_custom_tag(tag):
return r_tags.sismember('tags:custom', tag)
# # TODO:
# def is_valid_tag(tag):
# pass
def is_enabled_tag(tag, enabled_namespace=None):
if is_taxonomie_tag(tag):
return is_enabled_taxonomie_tag(tag, enabled_taxonomies=enabled_namespace)
else:
return is_enabled_galaxy_tag(tag, enabled_galaxies=enabled_namespace)
def are_enabled_tags(tags):
enabled_taxonomies = get_active_taxonomies(r_set=True)
enabled_galaxies = get_active_galaxies(r_set=True)
for tag in tags:
if is_taxonomie_tag(tag):
res = is_enabled_taxonomie_tag(tag, enabled_taxonomies=enabled_taxonomies)
else:
res = is_enabled_galaxy_tag(tag, enabled_galaxies=enabled_galaxies)
if not res:
return False
return True
def is_enabled_taxonomie_tag(tag, enabled_taxonomies=None):
if not enabled_taxonomies:
enabled_taxonomies = get_active_taxonomies()
taxonomie = get_taxonomie_from_tag(tag)
if taxonomie is None:
return False
if taxonomie not in enabled_taxonomies:
return False
if not is_taxonomie_tag_enabled(taxonomie, tag):
return False
return True
def is_enabled_galaxy_tag(tag, enabled_galaxies=None):
if not enabled_galaxies:
enabled_galaxies = get_active_galaxies()
galaxy = get_galaxy_from_tag(tag)
if galaxy is None:
return False
if galaxy not in enabled_galaxies:
return False
if not is_galaxy_tag_enabled(galaxy, tag):
return False
return True
def sort_tags_taxonomies_galaxies(tags):
taxonomies_tags = []
galaxies_tags = []
for tag in tags:
if is_taxonomie_tag(tag):
taxonomies_tags.append(tag)
else:
galaxies_tags.append(tag)
return taxonomies_tags, galaxies_tags
##-- Taxonomies - Galaxies --##
def is_tag_in_all_tag(tag):
if r_tags.sismember('list_tags', tag):
return True
else:
return False
def get_tag_synonyms(tag):
return r_tags.smembers(f'synonym_tag_{tag}')
def get_tag_dislay_name(tag):
tag_synonyms = get_tag_synonyms(tag)
if not tag_synonyms:
return tag
else:
return tag + ', '.join(tag_synonyms)
def get_tags_selector_dict(tags):
list_tags = []
for tag in tags:
list_tags.append(get_tag_selector_dict(tag))
return list_tags
def get_tag_selector_dict(tag):
return {'name':get_tag_dislay_name(tag),'id':tag}
def get_tags_selector_data():
dict_selector = {}
dict_selector['active_taxonomies'] = get_active_taxonomies()
dict_selector['active_galaxies'] = get_active_galaxies()
return dict_selector
def get_min_tag(tag):
tag = tag.split('=')
if len(tag) > 1:
if tag[1] != '':
tag = tag[1][1:-1]
# no value
else:
tag = tag[0][1:-1]
# custom tags
else:
tag = tag[0]
return tag
# TODO: ADD object type
def get_obj_tags_minimal(item_id): ####?
return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_object_tags('item', item_id) ]
def unpack_str_tags_list(str_tags_list):
str_tags_list = str_tags_list.replace('"','\"')
if str_tags_list:
return str_tags_list.split(',')
else:
return []
# used by modal
def get_modal_add_tags(item_id, object_type='item'):
'''
Modal: add tags to domain or Paste
'''
return {"active_taxonomies": get_active_taxonomies(), "active_galaxies": get_active_galaxies(),
"object_id": item_id, "object_type": object_type}
######## NEW VERSION ########
def create_custom_tag(tag):
r_tags.sadd('tags:custom', tag)
r_tags.sadd('tags:custom:enabled_tags', tag)
# # TODO: ADD color
def get_tag_metadata(tag, r_int=False):
'''
Get tag metadata (current: item only)
'''
tag_metadata = {"tag": tag}
tag_metadata['first_seen'] = get_tag_first_seen(tag, r_int=r_int)
tag_metadata['last_seen'] = get_tag_last_seen(tag, r_int=r_int)
return tag_metadata
def get_tags_min_last_seen(l_tags, r_int=False):
'''
Get max last seen from a list of tags (current: item only)
'''
min_last_seen = 99999999
for tag in l_tags:
last_seen = get_tag_last_seen(tag, r_int=True)
if last_seen < min_last_seen:
min_last_seen = last_seen
if r_int:
return min_last_seen
else:
return str(min_last_seen)
def get_all_tags():
return list(r_tags.smembers('list_tags'))
def get_all_obj_tags(obj_type):
return list(r_tags.smembers(f'list_tags:{obj_type}'))
## Objects tags ##
###################################################################################
###################################################################################
###################################################################################
###################################################################################
###################################################################################
###################################################################################
###################################################################################
###################################################################################
###################################################################################
def add_global_tag(tag, object_type=None):
'''
Create a set of all tags used in AIL (all + by object)
:param tag: tag
:type domain: str
:param object_type: object type
:type domain: str
'''
r_tags.sadd('list_tags', tag)
if object_type:
r_tags.sadd('list_tags:{}'.format(object_type), tag)
def add_obj_tags(object_id, object_type, tags=[], galaxy_tags=[]):
obj_date = get_obj_date(object_type, object_id)
for tag in tags:
if tag:
taxonomie = get_taxonomie_from_tag(tag)
if is_taxonomie_tag_enabled(taxonomie, tag):
add_object_tag(tag, object_type, object_id)
else:
return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400)
for tag in galaxy_tags:
if tag:
galaxy = get_galaxy_from_tag(tag)
if is_galaxy_tag_enabled(galaxy, tag):
add_object_tag(tag, object_type, object_id)
else:
return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400)
# TEMPLATE + API QUERY
def api_add_obj_tags(tags=[], galaxy_tags=[], object_id=None, object_type="item"):
res_dict = {}
if object_id == None:
return ({'status': 'error', 'reason': 'object_id id not found'}, 404)
if not tags and not galaxy_tags:
return ({'status': 'error', 'reason': 'Tags or Galaxy not specified'}, 400)
if object_type not in ('item', 'domain', 'image', 'decoded'): # # TODO: put me in another file
return ({'status': 'error', 'reason': 'Incorrect object_type'}, 400)
# remove empty tags
tags = list(filter(bool, tags))
galaxy_tags = list(filter(bool, galaxy_tags))
res = add_obj_tags(object_id, object_type, tags=tags, galaxy_tags=galaxy_tags)
if res:
return res
res_dict['tags'] = tags + galaxy_tags
res_dict['id'] = object_id
res_dict['type'] = object_type
return (res_dict, 200)
# def add_tag(object_type, tag, object_id, obj_date=None):
# # new tag
# if not is_obj_tagged(object_id, tag):
# # # TODO: # FIXME: sanityze object_type
# if obj_date:
# try:
# obj_date = int(obj_date)
# except:
# obj_date = None
# if not obj_date:
# obj_date = get_obj_date(object_type, object_id)
# add_global_tag(tag, object_type=object_type)
# add_obj_tag(object_type, object_id, tag, obj_date=obj_date)
# update_tag_metadata(tag, obj_date, object_type=object_type)
#
# # create tags stats # # TODO: put me in cache
# r_tags.hincrby('daily_tags:{}'.format(datetime.date.today().strftime("%Y%m%d")), tag, 1)
# def delete_obj_tag(object_type, object_id, tag, obj_date):
# if object_type=="item": # # TODO: # FIXME: # REVIEW: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# obj_date = get_obj_date(object_type, object_id)
# r_serv_metadata.srem('tag:{}'.format(object_id), tag)
# r_tags.srem('{}:{}'.format(tag, obj_date), object_id)
# else:
# r_serv_metadata.srem('tag:{}'.format(object_id), tag)
# r_tags.srem('{}:{}'.format(object_type, tag), object_id)
def delete_tag(object_type, tag, object_id, obj_date=None): ################################ # TODO:
# tag exist
if is_obj_tagged(object_id, tag):
if not obj_date:
obj_date = get_obj_date(object_type, object_id)
delete_obj_tag(object_type, object_id, tag, obj_date)
update_tag_metadata(tag, obj_date, object_type=object_type, add_tag=False)
update_tag_global_by_obj_type(object_type, tag)
else:
return ({'status': 'error', 'reason': 'object id or tag not found', 'value': tag}, 400)
# # TODO: move me
def get_obj_date(object_type, object_id):
if object_type == "item":
return int(item_basic.get_item_date(object_id))
else:
return None
# API QUERY
def api_delete_obj_tags(tags=[], object_id=None, object_type="item"):
if not object_id:
return ({'status': 'error', 'reason': 'object id not found'}, 404)
if not tags:
return ({'status': 'error', 'reason': 'No Tag(s) specified'}, 400)
for tag in tags:
res = delete_object_tag(tag, object_type, object_id, subtype='')
if res:
return res
dict_res = {}
dict_res['tags'] = tags
dict_res['id'] = object_id
return (dict_res, 200)
# def delete_obj_tags(object_id, object_type, tags):
# obj_date = get_obj_date(object_type, object_id)
# for tag in tags:
# res = delete_tag(object_type, tag, object_id, obj_date=obj_date)
# if res:
# return res
#
# def delete_obj_all_tags(obj_id, obj_type):
# delete_obj_tags(obj_id, obj_type, get_obj_tag(obj_id))
def sanitise_tags_date_range(l_tags, date_from=None, date_to=None):
if date_from is None or date_to is None:
date_from = get_tags_min_last_seen(l_tags, r_int=False)
date_to = date_from
return Date.sanitise_date_range(date_from, date_to)
#### TAGS EXPORT ####
# # TODO:
def is_updated_tags_to_export(): # by type
return False
def get_list_of_solo_tags_to_export_by_type(export_type): # by type
if export_type in ['misp', 'thehive']:
return r_serv_db.smembers('whitelist_{}'.format(export_type))
else:
return None
#r_serv_db.smembers('whitelist_hive')
# if __name__ == '__main__':
# galaxy = 'infoleak'
# get_taxonomy_tags_from_cluster(galaxy)

View file

@ -16,11 +16,11 @@ from flask import escape
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Date
import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import item_basic
import Tag
config_loader = ConfigLoader.ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")

35
bin/lib/ail_core.py Executable file
View file

@ -0,0 +1,35 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
config_loader = None
def get_ail_uuid():
pass
#### AIL OBJECTS ####
# # TODO: check change paste => item
def get_all_objects():
return ['domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username']
def get_object_all_subtypes(obj_type):
if obj_type == 'cryptocurrency':
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
if obj_type == 'pgp':
return ['key', 'mail', 'name']
if obj_type == 'username':
return ['telegram', 'twitter', 'jabber']
##-- AIL OBJECTS --##

View file

@ -506,11 +506,11 @@ def reset_all_spash_crawler_status():
r_cache.delete('all_splash_crawlers')
def get_splash_crawler_status(spash_url):
crawler_type = r_cache.hget('metadata_crawler:{}'.format(spash_url), 'type')
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(spash_url), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(spash_url), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(spash_url), 'status')
crawler_info = '{} - {}'.format(spash_url, started_time)
crawler_type = r_cache.hget(f'metadata_crawler:{spash_url}', 'type')
crawling_domain = r_cache.hget(f'metadata_crawler:{spash_url}', 'crawling_domain')
started_time = r_cache.hget(f'metadata_crawler:{spash_url}', 'started_time')
status_info = r_cache.hget(f'metadata_crawler:{spash_url}', 'status')
crawler_info = f'{spash_url} - {started_time}'
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
@ -520,13 +520,13 @@ def get_splash_crawler_status(spash_url):
def set_current_crawler_status(splash_url, status, started_time=False, crawled_domain=None, crawler_type=None):
# TODO: get crawler type if None
# Status: ['Waiting', 'Error', ...]
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', status)
r_cache.hset(f'metadata_crawler:{splash_url}', 'status', status)
if started_time:
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'started_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
r_cache.hset(f'metadata_crawler:{splash_url}', 'started_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
if crawler_type:
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'type', crawler_type)
r_cache.hset(f'metadata_crawler:{splash_url}', 'type', crawler_type)
if crawled_domain:
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'crawling_domain', crawled_domain)
r_cache.hset(f'metadata_crawler:{splash_url}', 'crawling_domain', crawled_domain)
#r_cache.sadd('all_splash_crawlers', splash_url) # # TODO: add me in fct: create_ail_crawler

View file

@ -0,0 +1,53 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import ConfigLoader
config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
def get_first_object_date(object_type, subtype, field=''):
first_date = r_serv_db.zscore('objs:first_date', f'{object_type}:{subtype}:{field}')
if not first_date:
first_date = 99999999
return int(first_date)
def get_last_object_date(object_type, subtype, field=''):
last_date = r_serv_db.zscore('objs:last_date', f'{object_type}:{subtype}:{field}')
if not last_date:
last_date = 0
return int(last_date)
def _set_first_object_date(object_type, subtype, date, field=''):
return r_serv_db.zadd('objs:first_date', f'{object_type}:{subtype}:{field}', date)
def _set_last_object_date(object_type, subtype, date, field=''):
return r_serv_db.zadd('objs:last_date', f'{object_type}:{subtype}:{field}', date)
def update_first_object_date(object_type, subtype, date, field=''):
first_date = get_first_object_date(object_type, subtype, field=field)
if int(date) < first_date:
_set_first_object_date(object_typel, subtype, date, field=field)
return date
else:
return first_date
def update_last_object_date(object_type, subtype, date, field=''):
last_date = get_last_object_date(object_type, subtype, field=field)
if int(date) > last_date:
_set_last_object_date(object_type, subtype, date, field=field)
return date
else:
return last_date
def update_object_date(object_type, subtype, date, field=''):
update_first_object_date(object_type, subtype, date, field=field)
update_last_object_date(object_type, subtype, date, field=field)
###############################################################

View file

@ -61,9 +61,9 @@ def delete_index_by_name(index_name):
index_path = os.path.realpath(index_path)
# incorrect filename
if not os.path.commonprefix([index_path, INDEX_PATH]) == INDEX_PATH:
raise Exception('Path traversal detected {}'.format(index_path))
raise Exception(f'Path traversal detected {index_path}')
if not os.path.isdir(index_path):
print('Error: The index directory {} doesn\'t exist'.format(index_path))
print('Error: The index directory {index_path} doesn\'t exist')
return None
res = rmtree(index_path)
_remove_index_name_from_all_index(index_name)
@ -85,7 +85,7 @@ def delete_older_index_by_time(int_time):
if int(all_index[-1]) > int_time: # make sure to keep one files
for index_name in all_index:
if int(index_name) < int_time:
print('deleting index {} ...'.format(index_name))
print(f'deleting index {index_name} ...')
delete_index_by_name(index_name)
# keep x most recent index
@ -94,7 +94,7 @@ def delete_older_index(number_of_index_to_keep):
all_index = get_all_index()
if len(get_all_index()) > number_of_index_to_keep:
for index_name in all_index[0:-number_of_index_to_keep]:
print('deleting index {} ...'.format(index_name))
print(f'deleting index {index_name} ...')
delete_index_by_name(index_name)
##-- DATA RETENTION --##

View file

@ -7,11 +7,9 @@ import gzip
import magic
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Tag
config_loader = ConfigLoader.ConfigLoader()
# get and sanityze PASTE DIRECTORY
@ -247,7 +245,7 @@ def verify_sources_list(sources):
def get_all_items_metadata_dict(list_id):
list_meta = []
for item_id in list_id:
list_meta.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_obj_tag(item_id)} )
list_meta.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_object_tags('item', item_id)} )
return list_meta
##-- --##

View file

@ -97,6 +97,7 @@ class CryptoCurrency(AbstractSubtypeObject):
############################################################################
def get_all_subtypes():
#return ail_core.get_object_all_subtypes(self.type)
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
# def build_crypto_regex(subtype, search_id):

View file

@ -12,6 +12,7 @@ from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_object import AbstractObject
from lib.item_basic import get_item_children, get_item_date, get_item_url
from lib import data_retention_engine
config_loader = ConfigLoader()
r_onion = config_loader.get_redis_conn("ARDB_Onion")
@ -48,7 +49,6 @@ class Domain(AbstractObject):
if first_seen:
if separator:
first_seen = f'{first_seen[0:4]}/{first_seen[4:6]}/{first_seen[6:8]}'
first_seen = int(first_seen)
elif r_int==True:
first_seen = int(first_seen)
return first_seen
@ -92,10 +92,17 @@ class Domain(AbstractObject):
res = r_onion.zrevrange(f'crawler_history_{self.domain_type}:{self.id}:{port}', 0, 0, withscores=True)
if res:
item_core, epoch = res[0]
if item_core != str(epoch):
try:
epoch = int(item_core)
except:
print('True')
return True
print('False')
return False
def was_up(self):
return r_onion.hexists(f'{self.domain_type}_metadata:{self.id}', 'ports')
def get_ports(self, r_set=False):
l_ports = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'ports')
if l_ports:
@ -144,18 +151,26 @@ class Domain(AbstractObject):
def get_languages(self):
return r_onion.smembers(f'domain:language:{self.id}')
def get_meta(self):
def get_meta_keys(self):
return ['type', 'first_seen', 'last_check', 'last_origin', 'ports', 'status', 'tags', 'languages']
# options: set of optional meta fields
def get_meta(self, options=set()):
meta = {}
meta['type'] = self.domain_type
meta['first_seen'] = self.get_first_seen()
meta['last_check'] = self.get_last_check()
meta['last_origin'] = self.last_origin()
meta['tags'] = self.get_tags()
meta['ports'] = self.get_ports()
meta['status'] = self.is_up(ports=ports)
meta['tags'] = self.get_last_origin()
#meta['is_tags_safe'] =
meta['status'] = self.is_up(ports=meta['ports'])
if 'last_origin' in options:
meta['last_origin'] = self.get_last_origin()
#meta['is_tags_safe'] = ##################################
if 'languages' in options:
meta['languages'] = self.get_languages()
#meta['screenshot'] =
return meta
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
@ -272,21 +287,32 @@ class Domain(AbstractObject):
r_onion.zadd(f'crawler_history_{self.domain_type}:{self.id}:{port}', epoch, int(root_item))
# if domain down -> root_item = epoch
def add_history(self, epoch, port, root_item=None):
def add_history(self, epoch, port, root_item=None, date=None):
if not date:
date = time.strftime('%Y%m%d', time.gmtime(epoch))
try:
int(root_item)
except ValueError:
root_item = None
data_retention_engine.update_object_date('domain', self.domain_type, date)
update_first_object_date(date, self.domain_type)
update_last_object_date(date, self.domain_type)
# UP
if root_item:
r_onion.srem(f'full_{self.domain_type}_down', self.id)
r_onion.sadd(f'full_{self.domain_type}_up', self.id)
r_onion.sadd(f'{self.domain_type}_up:{date}', self.id) # # TODO: -> store first day
r_onion.sadd(f'month_{self.domain_type}_up:{date[0:6]}', self.id) # # TODO: -> store first month
self._add_history_root_item(root_item, epoch, port)
else:
if port:
r_onion.sadd(f'{self.domain_type}_down:{date}', self.id) # # TODO: -> store first month
self._add_history_root_item(epoch, epoch, port)
else:
r_onion.sadd(f'{self.domain_type}_down:{date}', self.id)
if not self.was_up():
r_onion.sadd(f'full_{self.domain_type}_down', self.id)
def add_crawled_item(self, url, port, item_id, item_father):
r_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father)
@ -310,6 +336,44 @@ def get_all_domains_languages():
def get_domains_up_by_type(domain_type):
return r_onion.smembers(f'full_{domain_type}_up')
def get_domains_down_by_type(domain_type):
return r_onion.smembers(f'full_{domain_type}_down')
def get_first_object_date(subtype, field=''):
first_date = r_onion.zscore('objs:first_date', f'domain:{subtype}:{field}')
if not first_date:
first_date = 99999999
return int(first_date)
def get_last_object_date(subtype, field=''):
last_date = r_onion.zscore('objs:last_date', f'domain:{subtype}:{field}')
if not last_date:
last_date = 0
return int(last_date)
def _set_first_object_date(date, subtype, field=''):
return r_onion.zadd('objs:first_date', f'domain:{subtype}:{field}', date)
def _set_last_object_date(date, subtype, field=''):
return r_onion.zadd('objs:last_date', f'domain:{subtype}:{field}', date)
def update_first_object_date(date, subtype, field=''):
first_date = get_first_object_date(subtype, field=field)
if int(date) < first_date:
_set_first_object_date(date, subtype, field=field)
return date
else:
return first_date
def update_last_object_date(date, subtype, field=''):
last_date = get_last_object_date(subtype, field=field)
if int(date) > last_date:
_set_last_object_date(date, subtype, field=field)
return date
else:
return last_date
################################################################################
################################################################################

View file

@ -22,8 +22,7 @@ from export.Export import get_ail_uuid # # TODO: REPLACE
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib import item_basic
from packages import Tag
from lib import Tag
from flask import url_for
@ -493,7 +492,7 @@ def get_item(request_dict):
dict_item['date'] = get_item_date(item_id, add_separator=add_separator)
tags = request_dict.get('tags', True)
if tags:
dict_item['tags'] = Tag.get_obj_tag(item_id)
dict_item['tags'] = Tag.get_object_tags('item', item_id)
size = request_dict.get('size', False)
if size:
@ -568,7 +567,7 @@ def api_get_items_sources():
def get_item_list_desc(list_item_id):
desc_list = []
for item_id in list_item_id:
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_obj_tag(item_id)} )
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_object_tags('item', item_id)} )
return desc_list
def is_crawled(item_id):
@ -579,7 +578,7 @@ def get_crawler_matadata(item_id, tags=None):
if is_crawled(item_id):
dict_crawler['domain'] = get_item_domain(item_id)
if not ltags:
ltags = Tag.get_obj_tag(item_id)
ltags = Tag.get_object_tags('item', item_id)
dict_crawler['is_tags_safe'] = Tag.is_tags_safe(ltags)
dict_crawler['url'] = get_item_link(item_id)
dict_crawler['screenshot'] = get_item_screenshot(item_id)

View file

@ -80,6 +80,7 @@ class Pgp(AbstractSubtypeObject):
############################################################################
def get_all_subtypes():
#return get_object_all_subtypes(self.type)
return ['key', 'mail', 'name']
def get_all_pgps():

View file

@ -8,6 +8,7 @@ from io import BytesIO
from flask import url_for
sys.path.append(os.environ['AIL_BIN'])
#from lib import Tag
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_object import AbstractObject

View file

@ -86,6 +86,7 @@ class Username(AbstractSubtypeObject):
############################################################################
def get_all_subtypes():
#return ail_core.get_object_all_subtypes(self.type)
return ['telegram', 'twitter', 'jabber']
def get_all_usernames():

View file

@ -16,7 +16,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from packages import Tag
from lib import Tag
from lib import Duplicate
from lib.correlations_engine import get_correlations, add_obj_correlation, delete_obj_correlation, exists_obj_correlation, is_obj_correlated
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
@ -66,7 +66,7 @@ class AbstractObject(ABC):
## Tags ##
def get_tags(self, r_set=False):
tags = Tag.get_obj_tag(self.id)
tags = Tag.get_object_tags(self.type, self.id, self.get_subtype(r_str=True))
if r_set:
tags = set(tags)
return tags
@ -75,7 +75,8 @@ class AbstractObject(ABC):
return Duplicate.get_duplicates(self.type, self.get_subtype(r_str=True), self.id)
## ADD TAGS ????
#def add_tags(self):
def add_tag(self, tag):
Tag.add_object_tag(tag, self.type, self.id, subtype=self.get_subtype(r_str=True))
#- Tags -#
@ -120,7 +121,7 @@ class AbstractObject(ABC):
def _delete(self):
# DELETE TAGS
Tag.delete_obj_all_tags(self.id, self.type)
Tag.delete_obj_all_tags(self.id, self.type) ############ # TODO: # TODO: # FIXME:
# remove from tracker
self.delete_trackers()
# remove from investigations
@ -135,12 +136,12 @@ class AbstractObject(ABC):
"""
pass
# @abstractmethod
# def get_meta(self):
# """
# get Object metadata
# """
# pass
@abstractmethod
def get_meta(self):
"""
get Object metadata
"""
pass
@abstractmethod
def get_link(self, flask_context=False):

View file

@ -9,40 +9,33 @@ import redis
from abc import ABC
from flask import url_for
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
from lib.ConfigLoader import ConfigLoader
from lib.ail_core import get_all_objects
from lib import correlations_engine
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from lib.objects.CryptoCurrencies import CryptoCurrency
from lib.objects.Decodeds import Decoded
from lib.objects.Domains import Domain
from lib.objects.Items import Item
from lib.objects.Pgps import Pgp
from lib.objects.Screenshots import Screenshot
from lib.objects.Usernames import Username
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/objects'))
from CryptoCurrencies import CryptoCurrency
from Decodeds import Decoded
from Domains import Domain
from Items import Item
from Pgps import Pgp
from Screenshots import Screenshot
from Usernames import Username
##################################################################
##################################################################
#sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
#sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
##################################################################
##################################################################
config_loader = ConfigLoader.ConfigLoader()
config_loader = ConfigLoader()
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
class AILObjects(object):
class AILObjects(object): ## ??????????????????????
initial = 0
ongoing = 1
completed = 2
# # TODO: check change paste => item
def get_all_objects():
return ['domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username']
def is_valid_object_type(obj_type):
return obj_type in get_all_objects()
def get_object(obj_type, subtype, id):
if obj_type == 'item':
@ -60,21 +53,40 @@ def get_object(obj_type, subtype, id):
elif obj_type == 'username':
return Username(id, subtype)
def get_object_link(obj_type, subtype, id, flask_context=False):
object = get_object(obj_type, subtype, id)
return object.get_link(flask_context=flask_context)
def get_object_svg(obj_type, subtype, id):
object = get_object(obj_type, subtype, id)
return object.get_svg_icon()
def get_objects_meta(l_dict_objs, icon=False, url=False, flask_context=False):
l_meta = []
for dict_obj in l_dict_objs:
object = get_object(dict_obj['type'], dict_obj['subtype'], dict_obj['id'])
dict_meta = object.get_default_meta(tags=True)
if icon:
dict_meta['icon'] = object.get_svg_icon()
if url:
dict_meta['link'] = object.get_link(flask_context=flask_context)
l_meta.append(dict_meta)
return l_meta
def get_object_meta(obj_type, subtype, id, flask_context=False):
object = get_object(obj_type, subtype, id)
meta = object.get_meta()
meta['icon'] = object.get_svg_icon()
meta['link'] = object.get_link(flask_context=flask_context)
return meta
def get_ui_obj_tag_table_keys(obj_type):
'''
Warning: use only in flask (dynamic templates)
'''
if obj_type=="domain":
return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot
# # TODO: # FIXME:
# def get_objects_meta(l_dict_objs, icon=False, url=False, flask_context=False):
# l_meta = []
# for dict_obj in l_dict_objs:
# object = get_object(dict_obj['type'], dict_obj['subtype'], dict_obj['id'])
# dict_meta = object.get_default_meta(tags=True)
# if icon:
# dict_meta['icon'] = object.get_svg_icon()
# if url:
# dict_meta['link'] = object.get_link(flask_context=flask_context)
# l_meta.append(dict_meta)
# return l_meta
# # TODO: CHECK IF object already have an UUID
def get_misp_object(obj_type, subtype, id):
@ -126,7 +138,21 @@ def get_objects_relationship(obj_1, obj2):
return relationship
def api_sanitize_object_type(obj_type):
if not is_valid_object_type(obj_type):
return ({'status': 'error', 'reason': 'Incorrect object type'}, 400)
################################################################################
# DATA RETENTION
# # TODO: TO ADD ??????????????????????
# def get_first_objects_date():
# return r_object.zrange('objs:first_date', 0, -1)
#
# def get_first_object_date(obj_type, subtype):
# return r_object.zscore('objs:first_date', f'{obj_type}:{subtype}')
#
# def set_first_object_date(obj_type, subtype, date):
# return r_object.zadd('objs:first_date', f'{obj_type}:{subtype}', date)
################################################################################
@ -142,3 +168,41 @@ def delete_obj(obj_type, subtype, id):
################################################################################
################################################################################
################################################################################
def create_correlation_graph_links(links_set):
links = []
for link in links_set:
links.append({"source": link[0], "target": link[1]})
return links
def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
graph_nodes_list = []
for node_id in nodes_set:
obj_type, subtype, obj_id = node_id.split(';', 2)
dict_node = {"id": node_id}
dict_node['style'] = get_object_svg(obj_type, subtype, obj_id)
# # TODO: # FIXME: in UI
dict_node['style']['icon_class'] = dict_node['style']['style']
dict_node['style']['icon_text'] = dict_node['style']['icon']
dict_node['style']['node_color'] = dict_node['style']['color']
dict_node['style']['node_radius'] = dict_node['style']['radius']
# # TODO: # FIXME: in UI
dict_node['style']
dict_node['text'] = obj_id
if node_id == obj_str_id:
dict_node["style"]["node_color"] = 'orange'
dict_node["style"]["node_radius"] = 7
dict_node['url'] = get_object_link(obj_type, subtype, obj_id, flask_context=flask_context)
graph_nodes_list.append(dict_node)
return graph_nodes_list
def get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, flask_context=False):
obj_str_id, nodes, links = correlations_engine.get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=level, flask_context=flask_context)
return {"nodes": create_correlation_graph_nodes(nodes, obj_str_id, flask_context=flask_context), "links": create_correlation_graph_links(links)}
###############

View file

@ -1,282 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Date
import Item
#import Tag
config_loader = ConfigLoader.ConfigLoader()
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
def get_all_correlation_objects():
'''
Return a list of all correllated objects
'''
return ['domain', 'paste']
class SimpleCorrelation(object): #social_name
def __init__(self, correlation_name):
self.correlation_name = correlation_name
def exist_correlation(self, obj_id):
res = r_serv_metadata.zscore('s_correl:{}:all'.format(self.correlation_name), obj_id)
if res is not None:
return True
else:
return False
def _get_items(self, obj_id):
res = r_serv_metadata.smembers('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id))
if res:
return list(res)
else:
return []
def get_correlation_first_seen(self, obj_id, r_int=False):
res = r_serv_metadata.hget('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'first_seen')
if r_int:
if res:
return int(res)
else:
return 99999999
else:
return res
def get_correlation_last_seen(self, obj_id, r_int=False):
res = r_serv_metadata.hget('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'last_seen')
if r_int:
if res:
return int(res)
else:
return 0
else:
return res
def _get_metadata(self, obj_id):
meta_dict = {}
meta_dict['first_seen'] = self.get_correlation_first_seen(obj_id)
meta_dict['last_seen'] = self.get_correlation_last_seen(obj_id)
meta_dict['nb_seen'] = r_serv_metadata.scard('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id))
return meta_dict
def get_metadata(self, correlation_type, field_name, date_format='str_date'):
meta_dict = self._get_metadata(obj_id)
if date_format == "str_date":
if meta_dict['first_seen']:
meta_dict['first_seen'] = '{}/{}/{}'.format(meta_dict['first_seen'][0:4], meta_dict['first_seen'][4:6], meta_dict['first_seen'][6:8])
if meta_dict['last_seen']:
meta_dict['last_seen'] = '{}/{}/{}'.format(meta_dict['last_seen'][0:4], meta_dict['last_seen'][4:6], meta_dict['last_seen'][6:8])
return meta_dict
def get_nb_object_seen_by_date(self, obj_id, date_day):
nb = r_serv_metadata.zscore('s_correl:date:{}:{}'.format(self.correlation_name, date_day), obj_id)
if nb is None:
return 0
else:
return int(nb)
def get_list_nb_previous_correlation_object(self, obj_id, numDay):
nb_previous_correlation = []
for date_day in Date.get_previous_date_list(numDay):
nb_previous_correlation.append(self.get_nb_object_seen_by_date(obj_id, date_day))
return nb_previous_correlation
def _get_correlation_by_date(self, date_day):
return r_serv_metadata.zrange('s_correl:date:{}:{}'.format(self.correlation_name, date_day), 0, -1)
# def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'):
# if not request_dict:
# return ({'status': 'error', 'reason': 'Malformed JSON'}, 400)
#
# field_name = request_dict.get(correlation_type, None)
# if not field_name:
# return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 )
# if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type):
# return ( {'status': 'error', 'reason': 'Item not found'}, 404 )
def get_correlation(self, request_dict, obj_id):
dict_resp = {}
if request_dict.get('items'):
dict_resp['items'] = self._get_items(obj_id)
if request_dict.get('metadata'):
dict_resp['metadata'] = self._get_metadata(obj_id)
return (dict_resp, 200)
def _get_domain_correlation_obj(self, domain):
'''
Return correlation of a given domain.
:param domain: crawled domain
:type domain: str
:param correlation_type: correlation type
:type correlation_type: str
:return: a list of correlation
:rtype: list
'''
res = r_serv_metadata.smembers('domain:s_correl:{}:{}'.format(self.correlation_name, domain))
if res:
return list(res)
else:
return []
def _get_correlation_obj_domain(self, correlation_id):
'''
Return all domains that contain this correlation.
:param domain: field name
:type domain: str
:param correlation_type: correlation type
:type correlation_type: str
:return: a list of correlation
:rtype: list
'''
res = r_serv_metadata.smembers('s_correl:set_domain_{}:{}'.format(self.correlation_name, correlation_id))
if res:
return list(res)
else:
return []
def _get_item_correlation_obj(self, item_id):
'''
Return correlation of a given item id.
:param item_id: item id
:type item_id: str
:return: a list of correlation
:rtype: list
'''
res = r_serv_metadata.smembers('item:s_correl:{}:{}'.format(self.correlation_name, item_id))
if res:
return list(res)
else:
return []
def get_correlation_all_object(self, correlation_value, correlation_objects=[]):
if not correlation_objects:
correlation_objects = get_all_correlation_objects()
correlation_obj = {}
for correlation_object in correlation_objects:
if correlation_object == 'paste':
res = self._get_items(correlation_value)
elif correlation_object == 'domain':
res = self.get_correlation_obj_domain(correlation_value)
else:
res = None
if res:
correlation_obj[correlation_object] = res
return correlation_obj
def update_correlation_daterange(self, obj_id, date):
date = int(date)
# obj_id don't exit
if not r_serv_metadata.exists('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id)):
r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'first_seen', date)
r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'last_seen', date)
else:
first_seen = self.get_correlation_last_seen(obj_id, r_int=True)
last_seen = self.get_correlation_first_seen(obj_id, r_int=True)
if date < first_seen:
r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'first_seen', date)
if date > last_seen:
r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'last_seen', date)
def save_item_correlation(self, obj_id, item_id, item_date):
self.update_correlation_daterange(obj_id, item_date)
# global set
r_serv_metadata.sadd('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id), item_id)
# daily
r_serv_metadata.zincrby('s_correl:date:{}:{}'.format(self.correlation_name, item_date), obj_id, 1)
# all correlation
r_serv_metadata.zincrby('s_correl:{}:all'.format(self.correlation_name), obj_id, 1)
# item
r_serv_metadata.sadd('item:s_correl:{}:{}'.format(self.correlation_name, item_id), obj_id)
# domain
if Item.is_crawled(item_id):
domain = Item.get_item_domain(item_id)
self.save_domain_correlation(domain, subtype, obj_id)
def delete_item_correlation(self, subtype, obj_id, item_id, item_date):
#self.update_correlation_daterange ! # # TODO:
r_serv_metadata.srem('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id), item_id)
r_serv_metadata.srem('item:s_correl:{}:{}'.format(self.correlation_name, item_id), obj_id)
res = r_serv_metadata.zincrby('s_correl:date:{}:{}'.format(self.correlation_name, item_date), obj_id, -1)
if int(res) < 0: # remove last
r_serv_metadata.zrem('s_correl:date:{}:{}'.format(self.correlation_name, item_date), obj_id)
res = r_serv_metadata.zscore('s_correl:{}:all'.format(self.correlation_name), obj_id)
if int(res) > 0:
r_serv_metadata.zincrby('s_correl:{}:all'.format(self.correlation_name), obj_id, -1)
def save_domain_correlation(self, domain, obj_id):
r_serv_metadata.sadd('domain:s_correl:{}:{}'.format(self.correlation_name, domain), obj_id)
r_serv_metadata.sadd('s_correl:set_domain_{}:{}'.format(self.correlation_name, obj_id), domain)
def delete_domain_correlation(self, domain, obj_id):
r_serv_metadata.srem('domain:s_correl:{}:{}'.format(self.correlation_name, domain), obj_id)
r_serv_metadata.srem('s_correl:set_domain_{}:{}'.format(self.correlation_name, obj_id), domain)
######
def save_correlation(self, obj_id, date_range):
r_serv_metadata.zincrby('s_correl:{}:all'.format(self.correlation_name), obj_id, 0)
self.update_correlation_daterange(obj_id, date_range['date_from'])
if date_range['date_from'] != date_range['date_to']:
self.update_correlation_daterange(obj_id, date_range['date_to'])
return True
def save_obj_relationship(self, obj_id, obj2_type, obj2_id):
if obj2_type == 'domain':
self.save_domain_correlation(obj2_id, obj_id)
elif obj2_type == 'item':
self.save_item_correlation(obj_id, obj2_id, Item.get_item_date(obj2_id))
def delete_obj_relationship(self, obj_id, obj2_type, obj2_id):
if obj2_type == 'domain':
self.delete_domain_correlation(obj2_id, obj_id)
elif obj2_type == 'item':
self.delete_item_correlation(obj_id, obj2_id, Item.get_item_date(obj2_id))
# def create_correlation(self, subtype, obj_id, obj_meta):
# res = self.sanythise_correlation_types([subtype], r_boolean=True)
# if not res:
# print('invalid subtype')
# return False
# first_seen = obj_meta.get('first_seen', None)
# last_seen = obj_meta.get('last_seen', None)
# date_range = Date.sanitise_date_range(first_seen, last_seen, separator='', date_type='datetime')
# res = self.save_correlation(subtype, obj_id, date_range)
# if res and 'tags' in obj_meta:
# # # TODO: handle mixed tags: taxonomies and Galaxies
# pass
# #Tag.api_add_obj_tags(tags=obj_meta['tags'], object_id=obj_id, object_type=self.get_correlation_obj_type())
# return True
#
# # # TODO: handle tags
# def delete_correlation(self, obj_id):
# pass
######## API EXPOSED ########
######## ########

View file

@ -21,7 +21,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from packages import Tag
from lib import Tag
class Tags(AbstractModule):
@ -47,7 +47,7 @@ class Tags(AbstractModule):
item = Item(mess_split[1])
# Create a new tag
Tag.add_tag('item', tag, item.get_id())
Tag.add_object_tag(tag, 'item', item.get_id())
print(f'{item.get_id()}: Tagged {tag}')
# Forward message to channel

View file

@ -28,8 +28,8 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages import Tag
from lib import ConfigLoader
from lib import Tag
class SubmitPaste(AbstractModule):
@ -298,10 +298,10 @@ class SubmitPaste(AbstractModule):
# add tags
for tag in ltags:
Tag.add_tag('item', tag, rel_item_path)
Tag.add_object_tag(tag, 'item', rel_item_path)
for tag in ltagsgalaxies:
Tag.add_tag('item', tag, rel_item_path)
Tag.add_object_tag(tag, 'item', rel_item_path)
self.r_serv_log_submit.incr(f'{uuid}:nb_end')
self.r_serv_log_submit.incr(f'{uuid}:nb_sucess')

View file

@ -116,6 +116,8 @@ def get_nb_days_by_daterange(date_from, date_to):
delta = date_to - date_from # timedelta
return len(range(delta.days + 1))
def get_date_range_today(date_from):
return substract_date(date_from, get_today_date_str())
def substract_date(date_from, date_to):
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))

View file

@ -1,785 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
import datetime
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Date
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import item_basic
from pytaxonomies import Taxonomies
from pymispgalaxies import Galaxies, Clusters
config_loader = ConfigLoader.ConfigLoader()
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
#r_serv_tags = config_loader.get_db_conn("Kvrocks_Tags")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
def build_unsafe_tags():
unsafe_tags = set()
## CE content
unsafe_tags.add('dark-web:topic="pornography-child-exploitation"')
# add copine-scale tags
taxonomies = Taxonomies()
copine_scale = taxonomies.get('copine-scale')
if copine_scale:
for tag in copine_scale.machinetags():
unsafe_tags.add(tag)
return unsafe_tags
# set of unsafe tags
unsafe_tags = build_unsafe_tags()
def is_tags_safe(ltags):
'''
Check if a list of tags contain an unsafe tag (CE, ...)
:param ltags: list of tags
:type ltags: list
:return: is a tag in the unsafe set
:rtype: boolean
'''
return unsafe_tags.isdisjoint(ltags)
#### Taxonomies - Galaxies ####
def get_taxonomie_from_tag(tag):
try:
return tag.split(':')[0]
except IndexError:
return None
def get_galaxy_from_tag(tag):
try:
galaxy = tag.split(':')[1]
galaxy = galaxy.split('=')[0]
return galaxy
except IndexError:
return None
def get_taxonomies():
return Taxonomies().keys()
def is_taxonomie(taxonomie, taxonomies=[]):
if not taxonomies:
taxonomies = get_taxonomies()
return taxonomie in taxonomies
def get_active_taxonomies(r_set=False):
res = r_serv_tags.smembers('active_taxonomies')
if r_set:
return set(res)
return res
def get_active_galaxies(r_set=False):
res = r_serv_tags.smembers('active_galaxies')
if r_set:
return set(res)
return res
def get_all_taxonomies_tags(): # # TODO: add + REMOVE + Update
return r_serv_tags.smembers('active_taxonomies_tags')
def get_all_galaxies_tags(): # # TODO: add + REMOVE + Update
return r_serv_tags.smembers('active_galaxies_tags')
def get_all_custom_tags(): # # TODO: add + REMOVE + Update
return r_serv_tags.smembers('tags:custom')
def get_taxonomies_enabled_tags(r_list=False):
l_tag_keys = []
for taxonomie in get_active_taxonomies():
l_tag_keys.append(f'active_tag_{taxonomie}')
if len(l_tag_keys) > 1:
res = r_serv_tags.sunion(l_tag_keys[0], *l_tag_keys[1:])
elif l_tag_keys:
res = r_serv_tags.smembers(l_tag_keys[0])
#### # WARNING: # TODO: DIRTY FIX, REPLACE WITH LOCAL TAGS ####
if r_list:
return list(res)
else:
return res
def get_galaxies_enabled_tags():
l_tag_keys = []
for galaxy in get_active_galaxies():
l_tag_keys.append(f'active_tag_galaxies_{galaxy}')
if len(l_tag_keys) > 1:
return r_serv_tags.sunion(l_tag_keys[0], *l_tag_keys[1:])
elif l_tag_keys:
return r_serv_tags.smembers(l_tag_keys[0])
else:
return []
def get_custom_enabled_tags(r_list=False):
res = r_serv_tags.smembers('tags:custom:enabled_tags')
if r_list:
return list(res)
else:
return res
def get_taxonomies_customs_tags(r_list=False):
tags = get_custom_enabled_tags().union(get_taxonomies_enabled_tags())
if r_list:
tags = list(tags)
return tags
def get_taxonomie_enabled_tags(taxonomie, r_list=False):
res = r_serv_tags.smembers(f'active_tag_{taxonomie}')
if r_list:
return list(res)
else:
return res
def get_galaxy_enabled_tags(galaxy, r_list=False):
res = r_serv_tags.smembers(f'active_tag_galaxies_{galaxy}')
if r_list:
return list(res)
else:
return res
def is_taxonomie_tag_enabled(taxonomie, tag):
if tag in r_serv_tags.smembers('active_tag_' + taxonomie):
return True
else:
return False
def is_galaxy_tag_enabled(galaxy, tag):
if tag in r_serv_tags.smembers('active_tag_galaxies_' + galaxy):
return True
else:
return False
def is_custom_tag_enabled(tag):
return r_serv_tags.sismember('tags:custom:enabled_tags', tag)
def enable_taxonomy(taxonomie, enable_tags=True):
'''
Enable a taxonomy. (UI)
:param taxonomie: MISP taxonomy
:type taxonomie: str
:param enable_tags: crawled domain
:type enable_tags: boolean
'''
taxonomies = Taxonomies()
if enable_tags:
taxonomie_info = taxonomies.get(taxonomie)
if taxonomie_info:
# activate taxonomie
r_serv_tags.sadd('active_taxonomies', taxonomie)
# activate taxonomie tags
for tag in taxonomie_info.machinetags():
r_serv_tags.sadd('active_tag_{}'.format(taxonomie), tag)
#r_serv_tags.sadd('active_taxonomies_tags', tag)
else:
print('Error: {}, please update pytaxonomies'.format(taxonomie))
# Check if tags are enabled in AIL
def is_valid_tags_taxonomies_galaxy(list_tags, list_tags_galaxy):
if list_tags:
active_taxonomies = get_active_taxonomies()
for tag in list_tags:
taxonomie = get_taxonomie_from_tag(tag)
if taxonomie is None:
return False
if taxonomie not in active_taxonomies:
return False
if not is_taxonomie_tag_enabled(taxonomie, tag):
return False
if list_tags_galaxy:
active_galaxies = get_active_galaxies()
for tag in list_tags_galaxy:
galaxy = get_galaxy_from_tag(tag)
if galaxy is None:
return False
if galaxy not in active_galaxies:
return False
if not is_galaxy_tag_enabled(galaxy, tag):
return False
return True
def is_taxonomie_tag(tag, namespace=None):
if not namespace:
namespace = tag.split(':')[0]
if namespace != 'misp-galaxy':
return is_taxonomie(namespace)
else:
return False
def is_galaxy_tag(tag, namespace=None):
if not namespace:
namespace = tag.split(':')[0]
if namespace == 'misp-galaxy':
return True
else:
return False
def is_custom_tag(tag):
return r_serv_tags.sismember('tags:custom', tag)
# # TODO:
# def is_valid_tag(tag):
# pass
def is_enabled_tag(tag, enabled_namespace=None):
if is_taxonomie_tag(tag):
return is_enabled_taxonomie_tag(tag, enabled_taxonomies=enabled_namespace)
else:
return is_enabled_galaxy_tag(tag, enabled_galaxies=enabled_namespace)
def are_enabled_tags(tags):
enabled_taxonomies = get_active_taxonomies(r_set=True)
enabled_galaxies = get_active_galaxies(r_set=True)
for tag in tags:
if is_taxonomie_tag(tag):
res = is_enabled_taxonomie_tag(tag, enabled_taxonomies=enabled_taxonomies)
else:
res = is_enabled_galaxy_tag(tag, enabled_galaxies=enabled_galaxies)
if not res:
return False
return True
def is_enabled_taxonomie_tag(tag, enabled_taxonomies=None):
if not enabled_taxonomies:
enabled_taxonomies = get_active_taxonomies()
taxonomie = get_taxonomie_from_tag(tag)
if taxonomie is None:
return False
if taxonomie not in enabled_taxonomies:
return False
if not is_taxonomie_tag_enabled(taxonomie, tag):
return False
return True
def is_enabled_galaxy_tag(tag, enabled_galaxies=None):
if not enabled_galaxies:
enabled_galaxies = get_active_galaxies()
galaxy = get_galaxy_from_tag(tag)
if galaxy is None:
return False
if galaxy not in enabled_galaxies:
return False
if not is_galaxy_tag_enabled(galaxy, tag):
return False
return True
def sort_tags_taxonomies_galaxies(tags):
taxonomies_tags = []
galaxies_tags = []
for tag in tags:
if is_taxonomie_tag(tag):
taxonomies_tags.append(tag)
else:
galaxies_tags.append(tag)
return taxonomies_tags, galaxies_tags
#### ####
def is_tag_in_all_tag(tag):
if r_serv_tags.sismember('list_tags', tag):
return True
else:
return False
def get_tag_synonyms(tag):
return r_serv_tags.smembers(f'synonym_tag_{tag}')
def get_tag_dislay_name(tag):
tag_synonyms = get_tag_synonyms(tag)
if not tag_synonyms:
return tag
else:
return tag + ', '.join(tag_synonyms)
def get_tags_selector_dict(tags):
list_tags = []
for tag in tags:
list_tags.append(get_tag_selector_dict(tag))
return list_tags
def get_tag_selector_dict(tag):
return {'name':get_tag_dislay_name(tag),'id':tag}
def get_tags_selector_data():
dict_selector = {}
dict_selector['active_taxonomies'] = get_active_taxonomies()
dict_selector['active_galaxies'] = get_active_galaxies()
return dict_selector
def get_min_tag(tag):
tag = tag.split('=')
if len(tag) > 1:
if tag[1] != '':
tag = tag[1][1:-1]
# no value
else:
tag = tag[0][1:-1]
# custom tags
else:
tag = tag[0]
return tag
def get_obj_tags_minimal(item_id):
return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_obj_tag(item_id) ]
def unpack_str_tags_list(str_tags_list):
str_tags_list = str_tags_list.replace('"','\"')
if str_tags_list:
return str_tags_list.split(',')
else:
return []
# used by modal
def get_modal_add_tags(item_id, object_type='item'):
'''
Modal: add tags to domain or Paste
'''
return {"active_taxonomies": get_active_taxonomies(), "active_galaxies": get_active_galaxies(),
"object_id": item_id, "object_type": object_type}
######## NEW VERSION ########
def create_custom_tag(tag):
r_serv_tags.sadd('tags:custom', tag)
r_serv_tags.sadd('tags:custom:enabled_tags', tag)
def get_tag_first_seen(tag, r_int=False):
'''
Get tag first seen (current: item only)
'''
res = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'first_seen')
if r_int:
if res is None:
return 99999999
else:
return int(res)
return res
def get_tag_last_seen(tag, r_int=False):
'''
Get tag last seen (current: item only)
'''
res = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')
if r_int:
if res is None:
return 0
else:
return int(res)
return res
# # TODO: ADD color
def get_tag_metadata(tag, r_int=False):
'''
Get tag metadata (current: item only)
'''
tag_metadata = {"tag": tag}
tag_metadata['first_seen'] = get_tag_first_seen(tag, r_int=r_int)
tag_metadata['last_seen'] = get_tag_last_seen(tag, r_int=r_int)
return tag_metadata
def get_tags_min_last_seen(l_tags, r_int=False):
'''
Get max last seen from a list of tags (current: item only)
'''
min_last_seen = 99999999
for tag in l_tags:
last_seen = get_tag_last_seen(tag, r_int=True)
if last_seen < min_last_seen:
min_last_seen = last_seen
if r_int:
return min_last_seen
else:
return str(min_last_seen)
def is_obj_tagged(object_id, tag):
'''
Check if a object is tagged
:param object_id: object id
:type domain: str
:param tag: object type
:type domain: str
:return: is object tagged
:rtype: boolean
'''
return r_serv_metadata.sismember('tag:{}'.format(object_id), tag)
def get_all_tags():
return list(r_serv_tags.smembers('list_tags'))
def get_all_obj_tags(object_type):
return list(r_serv_tags.smembers('list_tags:{}'.format(object_type)))
def get_obj_tag(object_id):
'''
Retun all the tags of a given object.
:param object_id: (item_id, domain, ...)
'''
res = r_serv_metadata.smembers('tag:{}'.format(object_id))
if res:
return list(res)
else:
return []
def update_tag_first_seen(tag, tag_first_seen, tag_last_seen):
if tag_first_seen == tag_last_seen:
if r_serv_tags.scard('{}:{}'.format(tag, tag_first_seen)) > 0:
r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', tag_first_seen)
# no tag in db
else:
r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'first_seen')
r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'last_seen')
else:
if r_serv_tags.scard('{}:{}'.format(tag, tag_first_seen)) > 0:
r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', tag_first_seen)
else:
tag_first_seen = Date.date_add_day(tag_first_seen)
update_tag_first_seen(tag, tag_first_seen, tag_last_seen)
def update_tag_last_seen(tag, tag_first_seen, tag_last_seen):
if tag_first_seen == tag_last_seen:
if r_serv_tags.scard('{}:{}'.format(tag, tag_last_seen)) > 0:
r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', tag_last_seen)
# no tag in db
else:
r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'first_seen')
r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'last_seen')
else:
if r_serv_tags.scard('{}:{}'.format(tag, tag_last_seen)) > 0:
r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', tag_last_seen)
else:
# # TODO: # FIXME:
#tag_last_seen = Date.date_substract_day(str(tag_last_seen))
#update_tag_last_seen(tag, tag_first_seen, tag_last_seen)
pass
## Objects tags ##
def update_tag_metadata(tag, tag_date, object_type=None, add_tag=True):
'''
Update tag metadata (current: item only)
'''
if object_type=="item": # # TODO: use another getter (get all object with date)
# get object metadata
tag_metadata = get_tag_metadata(tag, r_int=True)
#############
## ADD tag ##
if add_tag:
# update fisrt_seen
if tag_date < tag_metadata['first_seen']:
r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', tag_date)
# update last_seen
if tag_date > tag_metadata['last_seen']:
r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', tag_date)
################
## REMOVE tag ##
else:
if tag_date == tag_metadata['first_seen']:
update_tag_first_seen(tag, tag_metadata['first_seen'], tag_metadata['last_seen'])
if tag_date == tag_metadata['last_seen']:
update_tag_last_seen(tag, tag_metadata['first_seen'], tag_metadata['last_seen'])
def update_tag_global_by_obj_type(object_type, tag):
tag_deleted = False
if object_type=='item':
if not r_serv_tags.exists('tag_metadata:{}'.format(tag)):
tag_deleted = True
else:
if not r_serv_tags.exists('{}:{}'.format(object_type, tag)):
tag_deleted = True
if tag_deleted:
# update object global tags
r_serv_tags.srem('list_tags:{}'.format(object_type), tag)
# update global tags
for obj_type in get_all_objects():
if r_serv_tags.exists('{}:{}'.format(obj_type, tag)):
tag_deleted = False
if tag_deleted:
r_serv_tags.srem('list_tags', tag)
def get_all_objects():
return ['domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'image']
def add_global_tag(tag, object_type=None):
'''
Create a set of all tags used in AIL (all + by object)
:param tag: tag
:type domain: str
:param object_type: object type
:type domain: str
'''
r_serv_tags.sadd('list_tags', tag)
if object_type:
r_serv_tags.sadd('list_tags:{}'.format(object_type), tag)
def add_obj_tags(object_id, object_type, tags=[], galaxy_tags=[]):
obj_date = get_obj_date(object_type, object_id)
for tag in tags:
if tag:
taxonomie = get_taxonomie_from_tag(tag)
if is_taxonomie_tag_enabled(taxonomie, tag):
add_tag(object_type, tag, object_id, obj_date=obj_date)
else:
return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400)
for tag in galaxy_tags:
if tag:
galaxy = get_galaxy_from_tag(tag)
if is_galaxy_tag_enabled(galaxy, tag):
add_tag(object_type, tag, object_id, obj_date=obj_date)
else:
return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400)
# TEMPLATE + API QUERY
def api_add_obj_tags(tags=[], galaxy_tags=[], object_id=None, object_type="item"):
res_dict = {}
if object_id == None:
return ({'status': 'error', 'reason': 'object_id id not found'}, 404)
if not tags and not galaxy_tags:
return ({'status': 'error', 'reason': 'Tags or Galaxy not specified'}, 400)
if object_type not in ('item', 'domain', 'image', 'decoded'): # # TODO: put me in another file
return ({'status': 'error', 'reason': 'Incorrect object_type'}, 400)
# remove empty tags
tags = list(filter(bool, tags))
galaxy_tags = list(filter(bool, galaxy_tags))
res = add_obj_tags(object_id, object_type, tags=tags, galaxy_tags=galaxy_tags)
if res:
return res
res_dict['tags'] = tags + galaxy_tags
res_dict['id'] = object_id
res_dict['type'] = object_type
return (res_dict, 200)
def add_obj_tag(object_type, object_id, tag, obj_date=None):
if object_type=="item": # # TODO: # FIXME: # REVIEW: rename me !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
if obj_date is None:
raise ValueError("obj_date is None")
# add tag
r_serv_metadata.sadd('tag:{}'.format(object_id), tag)
r_serv_tags.sadd('{}:{}'.format(tag, obj_date), object_id)
# add domain tag
if item_basic.is_crawled(object_id) and tag!='infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"':
domain = item_basic.get_item_domain(object_id)
add_tag("domain", tag, domain)
else:
r_serv_metadata.sadd('tag:{}'.format(object_id), tag)
r_serv_tags.sadd('{}:{}'.format(object_type, tag), object_id)
def add_tag(object_type, tag, object_id, obj_date=None):
# new tag
if not is_obj_tagged(object_id, tag):
# # TODO: # FIXME: sanityze object_type
if obj_date:
try:
obj_date = int(obj_date)
except:
obj_date = None
if not obj_date:
obj_date = get_obj_date(object_type, object_id)
add_global_tag(tag, object_type=object_type)
add_obj_tag(object_type, object_id, tag, obj_date=obj_date)
update_tag_metadata(tag, obj_date, object_type=object_type)
# create tags stats # # TODO: put me in cache
r_serv_tags.hincrby('daily_tags:{}'.format(datetime.date.today().strftime("%Y%m%d")), tag, 1)
def delete_obj_tag(object_type, object_id, tag, obj_date):
if object_type=="item": # # TODO: # FIXME: # REVIEW: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
obj_date = get_obj_date(object_type, object_id)
r_serv_metadata.srem('tag:{}'.format(object_id), tag)
r_serv_tags.srem('{}:{}'.format(tag, obj_date), object_id)
else:
r_serv_metadata.srem('tag:{}'.format(object_id), tag)
r_serv_tags.srem('{}:{}'.format(object_type, tag), object_id)
def delete_tag(object_type, tag, object_id, obj_date=None):
# tag exist
if is_obj_tagged(object_id, tag):
if not obj_date:
obj_date = get_obj_date(object_type, object_id)
delete_obj_tag(object_type, object_id, tag, obj_date)
update_tag_metadata(tag, obj_date, object_type=object_type, add_tag=False)
update_tag_global_by_obj_type(object_type, tag)
else:
return ({'status': 'error', 'reason': 'object id or tag not found', 'value': tag}, 400)
# # TODO: move me
def get_obj_date(object_type, object_id):
if object_type == "item":
return int(item_basic.get_item_date(object_id))
else:
return None
# API QUERY
def api_delete_obj_tags(tags=[], object_id=None, object_type="item"):
if not object_id:
return ({'status': 'error', 'reason': 'object id not found'}, 404)
if not tags:
return ({'status': 'error', 'reason': 'No Tag(s) specified'}, 400)
res = delete_obj_tags(object_id, object_type, tags)
if res:
return res
dict_res = {}
dict_res['tags'] = tags
dict_res['id'] = object_id
return (dict_res, 200)
def delete_obj_tags(object_id, object_type, tags):
obj_date = get_obj_date(object_type, object_id)
for tag in tags:
res = delete_tag(object_type, tag, object_id, obj_date=obj_date)
if res:
return res
def delete_obj_all_tags(obj_id, obj_type):
delete_obj_tags(obj_id, obj_type, get_obj_tag(obj_id))
def sanitise_tags_date_range(l_tags, date_from=None, date_to=None):
if date_from is None or date_to is None:
date_from = get_tags_min_last_seen(l_tags, r_int=False)
date_to = date_from
return Date.sanitise_date_range(date_from, date_to)
# # TODO: verify tags + object_type
# get set_keys: intersection
def get_obj_keys_by_tags(object_type, l_tags, date_day=None):
l_set_keys = []
if object_type=='item':
for tag in l_tags:
l_set_keys.append('{}:{}'.format(tag, date_day))
else:
for tag in l_tags:
l_set_keys.append('{}:{}'.format(object_type, tag))
return l_set_keys
def get_obj_by_tag(key_tag):
return r_serv_tags.smembers(key_tag)
def get_obj_by_tags(object_type, l_tags, date_from=None, date_to=None, nb_obj=50, page=1): # remove old object
# with daterange
l_tagged_obj = []
if object_type=='item':
#sanityze date
date_range = sanitise_tags_date_range(l_tags, date_from=date_from, date_to=date_to)
l_dates = Date.substract_date(date_range['date_from'], date_range['date_to'])
for date_day in l_dates:
l_set_keys = get_obj_keys_by_tags(object_type, l_tags, date_day)
# if len(l_set_keys) > nb_obj:
# return l_tagged_obj
if len(l_set_keys) < 2:
date_day_obj = get_obj_by_tag(l_set_keys[0])
else:
date_day_obj = r_serv_tags.sinter(l_set_keys[0], *l_set_keys[1:])
# next_nb_start = len(l_tagged_obj) + len(date_day_obj) - nb_obj
# if next_nb_start > 0:
# get + filter nb_start
l_tagged_obj.extend( date_day_obj )
# handle pagination
nb_all_elem = len(l_tagged_obj)
nb_pages = nb_all_elem / nb_obj
if not nb_pages.is_integer():
nb_pages = int(nb_pages)+1
else:
nb_pages = int(nb_pages)
if page > nb_pages:
page = nb_pages
start = nb_obj*(page -1)
if nb_pages > 1:
stop = (nb_obj*page)
l_tagged_obj = l_tagged_obj[start:stop]
# only one page
else:
stop = nb_all_elem
l_tagged_obj = l_tagged_obj[start:]
if stop > nb_all_elem:
stop = nb_all_elem
stop = stop -1
return {"tagged_obj":l_tagged_obj, "date" : date_range,
"page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop+1, "nb_all_elem":nb_all_elem}
# without daterange
else:
l_set_keys = get_obj_keys_by_tags(object_type, l_tags)
if len(l_set_keys) < 2:
l_tagged_obj = get_obj_by_tag(l_set_keys[0])
else:
l_tagged_obj = r_serv_tags.sinter(l_set_keys[0], *l_set_keys[1:])
if not l_tagged_obj:
return {"tagged_obj":l_tagged_obj, "page":0, "nb_pages":0}
# handle pagination
nb_all_elem = len(l_tagged_obj)
nb_pages = nb_all_elem / nb_obj
if not nb_pages.is_integer():
nb_pages = int(nb_pages)+1
else:
nb_pages = int(nb_pages)
if page > nb_pages:
page = nb_pages
# multiple pages
if nb_pages > 1:
start = nb_obj*(page -1)
stop = (nb_obj*page) -1
current_index = 0
l_obj = []
for elem in l_tagged_obj:
if current_index > stop:
break
if start <= current_index and stop >= current_index:
l_obj.append(elem)
current_index += 1
l_tagged_obj = l_obj
stop += 1
if stop > nb_all_elem:
stop = nb_all_elem
# only one page
else:
start = 0
stop = nb_all_elem
l_tagged_obj = list(l_tagged_obj)
return {"tagged_obj":l_tagged_obj, "page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop, "nb_all_elem":nb_all_elem}
#### TAGS EXPORT ####
# # TODO:
def is_updated_tags_to_export(): # by type
return False
def get_list_of_solo_tags_to_export_by_type(export_type): # by type
if export_type in ['misp', 'thehive']:
return r_serv_db.smembers('whitelist_{}'.format(export_type))
else:
return None
#r_serv_db.smembers('whitelist_hive')

View file

@ -72,6 +72,7 @@ maxclients 10000
# use a very strong password otherwise it will be very easy to break.
#
# requirepass foobared
requirepass ail
# If the master is password protected (using the "masterauth" configuration
# directive below) it is possible to tell the slave to authenticate before
@ -100,7 +101,7 @@ dir /home/aurelien/git/ail-framework/DATA_KVROCKS
# When running daemonized, kvrocks writes a pid file in ${CONFIG_DIR}/kvrocks.pid by
# default. You can specify a custom pid file location here.
# pidfile /var/run/kvrocks.pid
pidfile ""
pidfile /home/aurelien/git/ail-framework/DATA_KVROCKS/kvrocks.pid
# You can configure a slave instance to accept writes or not. Writing against
# a slave instance may be useful to store some ephemeral data (because data
@ -648,3 +649,30 @@ rocksdb.max_bytes_for_level_multiplier 10
################################ NAMESPACE #####################################
# namespace.test change.me
backup-dir /home/aurelien/git/ail-framework/DATA_KVROCKS/backup
fullsync-recv-file-delay 0
log-dir /home/aurelien/git/ail-framework/DATA_KVROCKS
unixsocketperm 26
namespace.cor ail_correls
#namespace.correl ail_correls
namespace.crawl ail_crawlers
namespace.db ail_datas
namespace.dup ail_dups
namespace.obj ail_objs
namespace.stat ail_stats
namespace.tag ail_tags
namespace.track ail_trackers
# investigation -> db ????
#

View file

@ -23,9 +23,6 @@ from os.path import join
# # TODO: put me in lib/Tag
from pytaxonomies import Taxonomies
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Tag
sys.path.append('./modules/')
sys.path.append(os.environ['AIL_BIN'])
@ -33,6 +30,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from lib.Users import User
from lib import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
@ -72,9 +70,7 @@ except Exception:
FLASK_PORT = 7000
# ========= REDIS =========#
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
# logs
log_dir = os.path.join(os.environ['AIL_HOME'], 'logs')

View file

@ -21,8 +21,8 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib import item_basic
from lib.objects.Items import Item
from lib import Tag
from export import Export
from packages import Tag
# ============ BLUEPRINT ============

View file

@ -19,16 +19,13 @@ import Flask_config
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Date
import Tag
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
from packages import Date
from lib import Tag
from lib.objects import ail_objects
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import Correlate_object
r_cache = Flask_config.r_cache
r_serv_db = Flask_config.r_serv_db
r_serv_tags = Flask_config.r_serv_tags
bootstrap_label = Flask_config.bootstrap_label
# ============ BLUEPRINT ============
@ -51,6 +48,7 @@ def add_tags():
tagsgalaxies = request.args.get('tagsgalaxies')
object_id = request.args.get('object_id')
object_type = request.args.get('object_type')
subtype = '' # TODO: handle subtype object
list_tag = tags.split(',')
list_tag_galaxies = tagsgalaxies.split(',')
@ -60,7 +58,7 @@ def add_tags():
if res[1] != 200:
return str(res[0])
return redirect(Correlate_object.get_item_url(object_type, object_id))
return redirect(ail_objects.get_object_link(object_type, subtype, object_id, flask_context=True))
@tags_ui.route('/tag/delete_tag')
@login_required
@ -69,12 +67,13 @@ def delete_tag():
object_type = request.args.get('object_type')
object_id = request.args.get('object_id')
subtype = '' # TODO: handle subtype object
tag = request.args.get('tag')
res = Tag.api_delete_obj_tags(tags=[tag], object_id=object_id, object_type=object_type)
if res[1] != 200:
return str(res[0])
return redirect(Correlate_object.get_item_url(object_type, object_id))
return redirect(ail_objects.get_object_link(object_type, subtype, object_id, flask_context=True))
@tags_ui.route('/tag/get_all_tags')
@ -94,7 +93,7 @@ def get_all_taxonomies_customs_tags():
@login_read_only
def get_all_obj_tags():
object_type = request.args.get('object_type')
res = Correlate_object.sanitize_object_type(object_type)
res = ail_objects.api_sanitize_object_type(object_type)
if res:
return jsonify(res)
return jsonify(Tag.get_all_obj_tags(object_type))
@ -173,6 +172,7 @@ def get_obj_by_tags():
# # TODO: sanityze all
object_type = request.args.get('object_type')
subtype = '' # TODO: handle subtype
ltags = request.args.get('ltags')
page = request.args.get('page')
date_from = request.args.get('date_from')
@ -191,7 +191,7 @@ def get_obj_by_tags():
list_tag.append(tag.replace('"','\"'))
# object_type
res = Correlate_object.sanitize_object_type(object_type)
res = ail_objects.api_sanitize_object_type(object_type)
if res:
return jsonify(res)
@ -209,11 +209,12 @@ def get_obj_by_tags():
"nb_first_elem":dict_obj['nb_first_elem'], "nb_last_elem":dict_obj['nb_last_elem'], "nb_all_elem":dict_obj['nb_all_elem']}
for obj_id in dict_obj['tagged_obj']:
obj_metadata = Correlate_object.get_object_metadata(object_type, obj_id)
obj_metadata = ail_objects.get_object_meta(object_type, subtype, obj_id, flask_context=True)
#ail_objects.
obj_metadata['id'] = obj_id
dict_tagged["tagged_obj"].append(obj_metadata)
dict_tagged['tab_keys'] = Correlate_object.get_obj_tag_table_keys(object_type)
dict_tagged['tab_keys'] = ail_objects.get_ui_obj_tag_table_keys(object_type)
if len(list_tag) == 1:
dict_tagged['current_tags'] = [ltags.replace('"', '\"')]

View file

@ -33,12 +33,13 @@ r_serv_charts = config_loader.get_redis_conn("ARDB_Trending")
r_serv_sentiment = config_loader.get_redis_conn("ARDB_Sentiment")
r_serv_term = config_loader.get_redis_conn("ARDB_Tracker")
r_serv_cred = config_loader.get_redis_conn("ARDB_TermCred")
r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_statistics = config_loader.get_redis_conn("ARDB_Statistics")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
# # # # # # #
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
r_serv_tags = config_loader.get_db_conn("Kvrocks_Tags")
# Logger (Redis)
redis_logger = publisher

View file

@ -30,9 +30,10 @@ from flask_login import login_required
##################################
# Import Project packages
##################################
from lib import Tag
import Paste
import Import_helper
import Tag
from pytaxonomies import Taxonomies
from pymispgalaxies import Galaxies, Clusters

View file

@ -20,7 +20,7 @@ from pymispgalaxies import Galaxies, Clusters
# ============ VARIABLES ============
import Flask_config
import Tag
from lib import Tag
app = Flask_config.app
baseUrl = Flask_config.baseUrl
@ -31,7 +31,6 @@ max_preview_char = Flask_config.max_preview_char
max_preview_modal = Flask_config.max_preview_modal
bootstrap_label = Flask_config.bootstrap_label
max_tags_result = Flask_config.max_tags_result
PASTES_FOLDER = Flask_config.PASTES_FOLDER
Tags = Blueprint('Tags', __name__, template_folder='templates')

View file

@ -130,7 +130,7 @@ def get_domain_from_url(url):
pass
return domain
def get_last_domains_crawled(type):
def get_last_domains_crawled(type): # DONE
return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1)
def get_nb_domains_inqueue(type):