diff --git a/bin/DB_KVROCKS_MIGRATION.py b/bin/DB_KVROCKS_MIGRATION.py index b6220036..2b5d3c1b 100755 --- a/bin/DB_KVROCKS_MIGRATION.py +++ b/bin/DB_KVROCKS_MIGRATION.py @@ -43,34 +43,34 @@ config_loader = None # # - - CONFIGS - - # # from core import ail_2_ail -spec = importlib.util.find_spec('ail_2_ail') +spec = importlib.util.find_spec('core.ail_2_ail') old_ail_2_ail = importlib.util.module_from_spec(spec) spec.loader.exec_module(old_ail_2_ail) old_ail_2_ail.r_serv_sync = r_serv_db from lib import Tracker -spec = importlib.util.find_spec('Tracker') +spec = importlib.util.find_spec('lib.Tracker') old_Tracker = importlib.util.module_from_spec(spec) spec.loader.exec_module(old_Tracker) old_Tracker.r_serv_tracker = r_serv_tracker from lib import Investigations -spec = importlib.util.find_spec('Investigations') +spec = importlib.util.find_spec('lib.Investigations') old_Investigations = importlib.util.module_from_spec(spec) spec.loader.exec_module(old_Investigations) old_Investigations.r_tracking = r_serv_tracker from lib import crawlers -spec = importlib.util.find_spec('crawlers') +spec = importlib.util.find_spec('lib.crawlers') old_crawlers = importlib.util.module_from_spec(spec) spec.loader.exec_module(old_crawlers) old_crawlers.r_serv_onion = r_crawler -# # TODO: desable features - credentials - stats ? - sentiment analysis +# # TODO: disable features - credentials - stats ? - sentiment analysis # CREATE FUNCTION BY DB/FEATURES @@ -97,7 +97,7 @@ def core_migration(): for version in dict_update: r_kvrocks.hset('ail:update_date', version, dict_update[version]) - versions_to_update = r_serv_db.smembers('ail:to_update') + versions_to_update = r_serv_db.smembers('ail:to_update') for version in versions_to_update: r_kvrocks.sadd('ail:update:to_update', version) update_error = r_serv_db.get('ail:update_error') @@ -107,15 +107,15 @@ def core_migration(): # d4 passivedns d4_enabled = r_serv_db.hget('d4:passivedns', 'enabled') - d4_update_time = r_serv_db.hget('d4:passivedns', 'update_time') + d4_update_time = r_serv_db.hget('d4:passivedns', 'update_time') r_kvrocks.hset('d4:passivedns', 'enabled', bool(d4_enabled)) r_kvrocks.hset('d4:passivedns', 'update_time', d4_update_time) # Crawler Manager - manager_url = old_crawlers.get_splash_manager_url() - manager_api_key = old_crawlers.get_splash_api_key() - crawlers.save_splash_manager_url_api(manager_url, manager_api_key) - crawlers.reload_splash_and_proxies_list() + # manager_url = old_crawlers.get_splash_manager_url() + # manager_api_key = old_crawlers.get_splash_api_key() + # crawlers.save_splash_manager_url_api(manager_url, manager_api_key) + # crawlers.reload_splash_and_proxies_list() # Auto Export Migration ail_misp = r_serv_db.get('ail:misp') @@ -237,6 +237,7 @@ def trackers_migration(): # object migration # # TODO: in background for item_id in old_Tracker.get_tracker_items_by_daterange(tracker_uuid, meta['first_seen'], meta['last_seen']): + print(item_id) Tracker.add_tracked_item(tracker_uuid, item_id) print('RETRO HUNT MIGRATION...') @@ -269,7 +270,7 @@ def item_submit_migration(): ############################### # # -# ITEMS MIGRATION # +# TAGS MIGRATION # # # ############################### @@ -340,15 +341,53 @@ def tags_migration(): def get_item_father(item_id): return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'father') +def get_item_duplicate(item_id, r_list=True): + res = r_serv_metadata.smembers(f'dup:{item_id}') + if r_list: + if res: + return list(res) + else: + return [] + return res + +def get_item_duplicates_dict(item_id): + dict_duplicates = {} + for duplicate in get_item_duplicate(item_id): + duplicate = duplicate[1:-1].replace('\'', '').replace(' ', '').split(',') + duplicate_id = duplicate[1] + if duplicate_id not in dict_duplicates: + dict_duplicates[duplicate_id] = {} + algo = duplicate[0] + if algo == 'tlsh': + similarity = 100 - int(duplicate[2]) + else: + similarity = int(duplicate[2]) + dict_duplicates[duplicate_id][algo] = similarity + return dict_duplicates + + def items_migration(): print('ITEMS MIGRATION...') # MIGRATE IMPORTED URLEXTRACT Father - for item_id in Items.get_items_by_source('urlextract'): - father_id = get_item_father(item_id) - if father_id: - item = Items.Item(item_id) - item.set_father(father_id) + # for item_id in Items.get_items_by_source('urlextract'): + # father_id = get_item_father(item_id) + # if father_id: + # item = Items.Item(item_id) + # item.set_father(father_id) + for tag in ['infoleak:automatic-detection="credential"']: # Creditcards, Mail, Keys ??????????????????????????????? + print(f'Duplicate migration: {tag}') + tag_first = get_tag_first_seen(tag) + if tag_first: + for date in Date.get_date_range_today(tag_first): + print(date) + for item_id in get_all_items_tags_by_day(tag, date): + item = Items.Item(item_id) + duplicates_dict = get_item_duplicates_dict(item_id) + for id_2 in duplicates_dict: + for algo in duplicates_dict[id_2]: + print(algo, duplicates_dict[id_2][algo], id_2) + item.add_duplicate(algo, duplicates_dict[id_2][algo], id_2) # TODO: test cookies migration @@ -360,10 +399,10 @@ def items_migration(): # # ############################### -# Retun last crawled domains by type +# Return last crawled domains by type # domain;epoch def get_last_crawled_domains(domain_type): - return r_crawler.lrange(f'last_{domain_type}', 0 ,-1) + return r_crawler.lrange(f'last_{domain_type}', 0, -1) def get_domains_blacklist(domain_type): return r_crawler.smembers(f'blacklist_{domain_type}') @@ -414,9 +453,6 @@ def get_domain_down_by_date(domain_type, date): def get_item_link(item_id): return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link') -def get_item_father(item_id): - return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'father') - def get_item_children(item_id): return r_serv_metadata.smembers(f'paste_children:{item_id}') @@ -466,7 +502,7 @@ def get_domain_history_by_port(domain_type, domain, port): history = [] for root_id, epoch in history_tuple: dict_history = {} - epoch = int(epoch) # force int + epoch = int(epoch) # force int dict_history["epoch"] = epoch try: int(root_id) @@ -564,7 +600,7 @@ def domain_migration(): ############################### # # -# DECODEDS MIGRATION # +# DECODED MIGRATION # # # ############################### def get_estimated_type(decoded_id): @@ -803,16 +839,16 @@ if __name__ == '__main__': #core_migration() #user_migration() #tags_migration() - #items_migration() + # items_migration() #crawler_migration() # domain_migration() # TO TEST ########################### #decodeds_migration() - #screenshots_migration() + # screenshots_migration() #subtypes_obj_migration() - ail_2_ail_migration() + # ail_2_ail_migration() trackers_migration() - investigations_migration() - statistics_migration() + # investigations_migration() + # statistics_migration() diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 918a404b..476be2f6 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -233,6 +233,8 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Onion" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Onion.py; read x" sleep 0.1 + screen -S "Script_AIL" -X screen -t "PgpDump" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./PgpDump.py; read x" + sleep 0.1 screen -S "Script_AIL" -X screen -t "Telegram" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Telegram.py; read x" sleep 0.1 @@ -281,8 +283,6 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Mixer" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Mixer.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "PgpDump" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./PgpDump.py; read x" - sleep 0.1 screen -S "Script_AIL" -X screen -t "Tools" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Tools.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "MISPtheHIVEfeeder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./MISP_The_Hive_feeder.py; read x" diff --git a/bin/MISP_The_Hive_feeder.py b/bin/MISP_The_Hive_feeder.py index 82ebc4de..875a2447 100755 --- a/bin/MISP_The_Hive_feeder.py +++ b/bin/MISP_The_Hive_feeder.py @@ -126,7 +126,7 @@ def feeder(message, count=0): ## FIXME: remove it if not item_basic.exist_item(item_id): if count < 10: - r_serv_db.zincrby('mess_not_saved_export', message, 1) + r_serv_db.zincrby('mess_not_saved_export', 1, message) return 0 else: r_serv_db.zrem('mess_not_saved_export', message) diff --git a/bin/export/Export.py b/bin/export/Export.py index c81c1b3a..faa79859 100755 --- a/bin/export/Export.py +++ b/bin/export/Export.py @@ -34,13 +34,6 @@ r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None ## -- ## -def get_ail_uuid(): - uuid_ail = r_serv_db.get('ail:uuid') - if uuid_ail is None: - uuid_ail = str(uuid4()) - r_serv_db.set('ail:uuid', uuid_ail) - return uuid_ail - def load_tags_to_export_in_cache(): all_exports = ['misp', 'thehive'] for export_target in all_exports: diff --git a/bin/lib/Duplicate.py b/bin/lib/Duplicate.py index ab457e85..2c597689 100755 --- a/bin/lib/Duplicate.py +++ b/bin/lib/Duplicate.py @@ -16,7 +16,7 @@ sys.path.append(os.environ['AIL_BIN']) from lib.ConfigLoader import ConfigLoader config_loader = ConfigLoader() -r_serv_db = config_loader.get_db_conn("Kvrocks_DB") +r_serv_db = config_loader.get_db_conn("Kvrocks_Duplicates") MIN_ITEM_SIZE = float(config_loader.get_config_str('Modules_Duplicates', 'min_paste_size')) # # TODO: RENAME ME config_loader = None @@ -71,33 +71,40 @@ def save_object_hash(algo, date_ymonth, hash, obj_id): r_serv_db.hset(f'duplicates:hashs:{algo}:{date_ymonth}', hash, obj_id) -def get_duplicates(obj_type, subtype, id): +def get_obj_duplicates(obj_type, subtype, obj_id): dict_dup = {} - duplicates = r_serv_db.smembers(f'obj:duplicates:{obj_type}:{subtype}:{id}') + duplicates = r_serv_db.smembers(f'obj:duplicates:{obj_type}:{subtype}:{obj_id}') for str_dup in duplicates: - similarity, algo, id = str_dup.split(':', 2) - if not dict_dup.get(id): - dict_dup[id] = [] - dict_dup[id].append({'algo': algo, 'similarity': int(similarity)}) + similarity, algo, id_2 = str_dup.split(':', 2) + if not dict_dup.get(id_2): + dict_dup[id_2] = [] + dict_dup[id_2].append({'algo': algo, 'similarity': int(similarity)}) return dict_dup +def add_obj_duplicate(algo, similarity, obj_type, subtype, obj_id, id_2): + r_serv_db.sadd(f'obj:duplicates:{obj_type}:{subtype}:{obj_id}', f'{similarity}:{algo}:{id_2}') -def _add_obj_duplicate(algo, similarity, obj_type, subtype, id, id_2): - r_serv_db.sadd(f'obj:duplicates:{obj_type}:{subtype}:{id}', f'{similarity}:{algo}:{id_2}') -def add_obj_duplicate(algo, hash, similarity, obj_type, subtype, id, date_ymonth): - obj2_id = get_object_id_by_hash(algo, hash, date_ymonth) +def add_duplicate(algo, hash_, similarity, obj_type, subtype, id, date_ymonth): + obj2_id = get_object_id_by_hash(algo, hash_, date_ymonth) # same content if similarity == 100: - dups = get_duplicates(obj_type, subtype, id) + dups = get_obj_duplicates(obj_type, subtype, id) for dup_id in dups: for algo_dict in dups[dup_id]: if algo_dict['similarity'] == 100 and algo_dict['algo'] == algo: - _add_obj_duplicate(algo, similarity, obj_type, subtype, id, dups[dup_id]) - _add_obj_duplicate(algo, similarity, obj_type, subtype, dups[dup_id], id) - _add_obj_duplicate(algo, similarity, obj_type, subtype, id, obj2_id) - _add_obj_duplicate(algo, similarity, obj_type, subtype, obj2_id, id) + add_obj_duplicate(algo, similarity, obj_type, subtype, id, dups[dup_id]) + add_obj_duplicate(algo, similarity, obj_type, subtype, dups[dup_id], id) + add_obj_duplicate(algo, similarity, obj_type, subtype, id, obj2_id) + add_obj_duplicate(algo, similarity, obj_type, subtype, obj2_id, id) +# TODO +def delete_obj_duplicates(): + pass + +# TODO +def delete_obj_duplicate(): + pass def get_last_x_month_dates(nb_months): now = datetime.datetime.now() diff --git a/bin/lib/Investigations.py b/bin/lib/Investigations.py index fb5874f1..fe3d3d38 100755 --- a/bin/lib/Investigations.py +++ b/bin/lib/Investigations.py @@ -15,7 +15,6 @@ import datetime import time import uuid -from abc import ABC from enum import Enum from flask import escape @@ -279,12 +278,12 @@ def get_obj_investigations(obj_id, obj_type, subtype=''): return r_tracking.smembers(f'obj:investigations:{obj_type}:{subtype}:{obj_id}') def delete_obj_investigations(obj_id, obj_type, subtype=''): - unregistred = False + unregistered = False for investigation_uuid in get_obj_investigations(obj_id, obj_type, subtype=subtype): investigation = Investigation(investigation_uuid) investigation.unregister_object(obj_id, obj_type, subtype) - unregistred = True - return unregistred + unregistered = True + return unregistered def _set_timestamp(investigation_uuid, timestamp): @@ -304,8 +303,8 @@ def _re_create_investagation(investigation_uuid, user_id, date, name, threat_lev # # TODO: fix default threat_level analysis # # TODO: limit description + name -# # TODO: sanityze tags -# # TODO: sanityze date +# # TODO: sanitize tags +# # TODO: sanitize date def create_investigation(user_id, date, name, threat_level, analysis, info, tags=[], investigation_uuid=None): if investigation_uuid: if not is_valid_uuid_v4(investigation_uuid): @@ -472,18 +471,18 @@ def api_unregister_object(json_dict): ##-- API --## - -if __name__ == '__main__': - # res = create_star_list(user_id, name, description) - # print(res) - - # res = r_tracking.dbsize() - # print(res) - - investigation_uuid = 'a6545c38083444eeb9383d357f8fa747' - _set_timestamp(investigation_uuid, int(time.time())) - - # investigation = Investigation(investigation_uuid) - # investigation.delete() +# +# if __name__ == '__main__': +# # res = create_star_list(user_id, name, description) +# # print(res) +# +# # res = r_tracking.dbsize() +# # print(res) +# +# investigation_uuid = 'a6545c38083444eeb9383d357f8fa747' +# _set_timestamp(investigation_uuid, int(time.time())) +# +# # investigation = Investigation(investigation_uuid) +# # investigation.delete() # # TODO: PAGINATION diff --git a/bin/lib/Statistics.py b/bin/lib/Statistics.py index c5898760..9b570eaa 100755 --- a/bin/lib/Statistics.py +++ b/bin/lib/Statistics.py @@ -72,30 +72,31 @@ def update_item_stats_size_nb(item_id, source, size, date): # TOP Items Size if r_statistics.zcard(f'top_size_set_{date}') < PIE_CHART_MAX_CARDINALITY: - r_statistics.zadd(f'top_avg_size_set_{date}', new_avg, source) + r_statistics.zadd(f'top_avg_size_set_{date}', {source: new_avg}) + else: member_set = r_statistics.zrangebyscore(f'top_avg_size_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1) # Member set is a list of (value, score) pairs if float(member_set[0][1]) < new_avg: # remove min from set and add the new one r_statistics.zrem(f'top_avg_size_set_{date}', member_set[0][0]) - r_statistics.zadd(f'top_avg_size_set_{date}', new_avg, source) + r_statistics.zadd(f'top_avg_size_set_{date}', {source: new_avg}) # TOP Nb Items if r_statistics.zcard(f'providers_set_{date}') < PIE_CHART_MAX_CARDINALITY or r_statistics.zscore(f'providers_set_{date}', source) != None: - r_statistics.zadd(f'providers_set_{date}', float(nb_items), source) + r_statistics.zadd(f'providers_set_{date}', {source: float(nb_items)}) else: # zset at full capacity member_set = r_statistics.zrangebyscore(f'providers_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1) # Member set is a list of (value, score) pairs if int(member_set[0][1]) < nb_items: # remove min from set and add the new one r_statistics.zrem(member_set[0][0]) - r_statistics.zadd(f'providers_set_{date}', float(nb_items), source) + r_statistics.zadd(f'providers_set_{date}', {source: float(nb_items)}) # keyword num def _add_module_stats(module_name, total_sum, keyword, date): - r_statistics.zadd(f'top_{module_name}_set_{date}', float(total_sum), keyword) + r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(total_sum)}) # # TODO: ONE HSET BY MODULE / CUSTOM STATS def update_module_stats(module_name, num, keyword, date): @@ -111,14 +112,14 @@ def update_module_stats(module_name, num, keyword, date): keyword_total_sum += int(curr_value) if curr_value is not None else 0 if r_statistics.zcard(f'top_{module_name}_set_{date}') < PIE_CHART_MAX_CARDINALITY: - r_statistics.zadd(f'top_{module_name}_set_{date}', float(keyword_total_sum), keyword) + r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(keyword_total_sum)}) else: # zset at full capacity member_set = r_statistics.zrangebyscore(f'top_{module_name}_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1) # Member set is a list of (value, score) pairs if int(member_set[0][1]) < keyword_total_sum: #remove min from set and add the new one r_statistics.zrem(f'top_{module_name}_set_{date}', member_set[0][0]) - r_statistics.zadd(f'top_{module_name}_set_{date}', float(keyword_total_sum), keyword) + r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(keyword_total_sum)}) def get_module_tld_stats_by_tld_date(date, tld): nb_tld = r_statistics.hget(f'credential_by_tld:{date}', tld) @@ -132,5 +133,5 @@ def get_module_tld_stats_by_date(module, date): def add_module_tld_stats_by_date(module, date, tld, nb): r_statistics.hincrby(f'{module}_by_tld:{date}', tld, int(nb)) -# r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1) -# r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1) +# r_stats.zincrby('module:Global:incomplete_file', 1, datetime.datetime.now().strftime('%Y%m%d')) +# r_stats.zincrby('module:Global:invalid_file', 1, datetime.datetime.now().strftime('%Y%m%d')) diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index 79a695c3..99318e25 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -21,12 +21,12 @@ from packages import Date from lib import ConfigLoader from lib import item_basic from lib import Tag +from lib.Users import User config_loader = ConfigLoader.ConfigLoader() r_cache = config_loader.get_redis_conn("Redis_Cache") -r_serv_db = config_loader.get_db_conn("Kvrocks_DB") -r_serv_tracker = config_loader.get_db_conn("Kvrocks_DB") +r_serv_tracker = config_loader.get_db_conn("Kvrocks_Trackers") items_dir = config_loader.get_config_str("Directories", "pastes") if items_dir[-1] == '/': @@ -250,7 +250,7 @@ def add_tracked_item(tracker_uuid, item_id): res = r_serv_tracker.sadd(f'tracker:item:{tracker_uuid}:{item_date}', item_id) # track nb item by date if res == 1: - nb_items = r_serv_tracker.zincrby('tracker:stat:{}'.format(tracker_uuid), int(item_date), 1) + nb_items = r_serv_tracker.zincrby(f'tracker:stat:{tracker_uuid}', 1, int(item_date)) if nb_items == 1: update_tracker_daterange(tracker_uuid, item_date) @@ -289,7 +289,7 @@ def remove_tracked_item(item_id): r_serv_tracker.srem(f'obj:trackers:item:{item_id}', tracker_uuid) res = r_serv_tracker.srem(f'tracker:item:{tracker_uuid}:{item_date}', item_id) if res: - r_serv_tracker.zincrby('tracker:stat:{}'.format(tracker_uuid), int(item_date), -1) + r_serv_tracker.zincrby(f'tracker:stat:{tracker_uuid}', -1, int(item_date)) def get_item_all_trackers_uuid(obj_id): #obj_type = 'item' @@ -326,13 +326,6 @@ def trigger_trackers_refresh(tracker_type): ###################### #### TRACKERS ACL #### -# # TODO: use new package => duplicate fct -def is_in_role(user_id, role): - if r_serv_db.sismember('user_role:{}'.format(role), user_id): - return True - else: - return False - def is_tracker_in_global_level(tracker, tracker_type): res = r_serv_tracker.smembers('all:tracker_uuid:{}:{}'.format(tracker_type, tracker)) if res: @@ -364,11 +357,11 @@ def api_is_allowed_to_edit_tracker(tracker_uuid, user_id): tracker_creator = r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'user_id') if not tracker_creator: return {"status": "error", "reason": "Unknown uuid"}, 404 - if not is_in_role(user_id, 'admin') and user_id != tracker_creator: + user = User(user_id) + if not user.is_in_role('admin') and user_id != tracker_creator: return {"status": "error", "reason": "Access Denied"}, 403 return {"uuid": tracker_uuid}, 200 - ##-- ACL --## #### FIX DB #### @@ -385,7 +378,7 @@ def fix_tracker_stats_per_day(tracker_uuid): nb_items = r_serv_tracker.scard(f'tracker:item:{tracker_uuid}:{date_day}') if nb_items: - r_serv_tracker.zincrby('tracker:stat:{}'.format(tracker_uuid), int(date_day), nb_items) + r_serv_tracker.zincrby(f'tracker:stat:{tracker_uuid}', nb_items, int(date_day)) # update first_seen/last_seen update_tracker_daterange(tracker_uuid, date_day) @@ -470,12 +463,15 @@ def _re_create_tracker(tracker, tracker_type, user_id, level, tags, mails, descr def create_tracker(tracker, tracker_type, user_id, level, tags, mails, description, webhook, dashboard=0, tracker_uuid=None, sources=[]): # edit tracker if tracker_uuid: - edit_tracker = True # check if type changed old_type = get_tracker_type(tracker_uuid) - old_tracker = get_tracker_by_uuid(tracker_uuid) - old_level = get_tracker_level(tracker_uuid) - tracker_user_id = get_tracker_user_id(tracker_uuid) + if not old_type: + edit_tracker = False + else: + edit_tracker = True + old_tracker = get_tracker_by_uuid(tracker_uuid) + old_level = get_tracker_level(tracker_uuid) + tracker_user_id = get_tracker_user_id(tracker_uuid) # Create new tracker else: @@ -497,19 +493,19 @@ def create_tracker(tracker, tracker_type, user_id, level, tags, mails, descripti tracker_type = 'yara' # create metadata - r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'tracked', tracker) - r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'type', tracker_type) - r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'date', datetime.date.today().strftime("%Y%m%d")) - r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'level', level) - r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'dashboard', dashboard) + r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'tracked', tracker) + r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'type', tracker_type) + r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'date', datetime.date.today().strftime("%Y%m%d")) + r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'level', level) + r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'dashboard', dashboard) if not edit_tracker: - r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'user_id', user_id) + r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'user_id', user_id) if description: - r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'description', description) + r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'description', description) if webhook: - r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'webhook', webhook) + r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'webhook', webhook) # type change if edit_tracker: @@ -1165,7 +1161,7 @@ def save_retro_hunt_match(task_uuid, id, object_type='item'): res = r_serv_tracker.sadd(f'tracker:retro_hunt:task:item:{task_uuid}:{item_date}', id) # track nb item by date if res == 1: - r_serv_tracker.zincrby(f'tracker:retro_hunt:task:stat:{task_uuid}', int(item_date), 1) + r_serv_tracker.zincrby(f'tracker:retro_hunt:task:stat:{task_uuid}', 1, int(item_date)) # Add map obj_id -> task_uuid r_serv_tracker.sadd(f'obj:retro_hunt:item:{id}', task_uuid) diff --git a/bin/lib/Users.py b/bin/lib/Users.py index 1e5afb68..52056483 100755 --- a/bin/lib/Users.py +++ b/bin/lib/Users.py @@ -168,11 +168,11 @@ def get_all_roles(): # create role_list def _create_roles_list(): if not r_serv_db.exists('ail:roles:all'): - r_serv_db.zadd('ail:roles:all', 1, 'admin') - r_serv_db.zadd('ail:roles:all', 2, 'analyst') - r_serv_db.zadd('ail:roles:all', 3, 'user') - r_serv_db.zadd('ail:roles:all', 4, 'user_no_api') - r_serv_db.zadd('ail:roles:all', 5, 'read_only') + r_serv_db.zadd('ail:roles:all', {'admin': 1}) + r_serv_db.zadd('ail:roles:all', {'analyst': 2}) + r_serv_db.zadd('ail:roles:all', {'user': 3}) + r_serv_db.zadd('ail:roles:all', {'user_no_api': 4}) + r_serv_db.zadd('ail:roles:all', {'read_only': 5}) def get_role_level(role): return int(r_serv_db.zscore('ail:roles:all', role)) @@ -236,6 +236,9 @@ class User(UserMixin): else: self.id = "__anonymous__" + def exists(self): + return self.id != "__anonymous__" + # return True or False # def is_authenticated(): diff --git a/bin/lib/domain_basic.py b/bin/lib/domain_basic.py deleted file mode 100755 index 54bf236d..00000000 --- a/bin/lib/domain_basic.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/python3 - -""" -``basic domain lib`` -=================== - - -""" - -import os -import sys - -sys.path.append(os.environ['AIL_BIN']) -################################## -# Import Project packages -################################## -from lib import ConfigLoader - -config_loader = ConfigLoader.ConfigLoader() -r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") -config_loader = None - -def get_domain_type(domain): - if str(domain).endswith('.onion'): - return 'onion' - else: - return 'regular' - -def delete_domain_item_core(item_id, domain, port): - domain_type = get_domain_type(domain) - r_serv_onion.zrem('crawler_history_{}:{}:{}'.format(domain_type, domain, port), item_id) diff --git a/bin/lib/objects/Decodeds.py b/bin/lib/objects/Decodeds.py index c1bc995b..9535ae86 100755 --- a/bin/lib/objects/Decodeds.py +++ b/bin/lib/objects/Decodeds.py @@ -220,17 +220,17 @@ class Decoded(AbstractObject): if not self.is_seen_this_day(date): # mimetype - r_metadata.zincrby(f'decoded:mimetype:{date}', mimetype, 1) + r_metadata.zincrby(f'decoded:mimetype:{date}', 1, mimetype) r_metadata.sadd(f'decoded:mimetypes', mimetype) # filter hash encoded in the same object if not self.is_correlated('item', None, obj_id): r_metadata.hincrby(f'metadata_hash:{self.id}', f'{decoder_name}_decoder', 1) - r_metadata.zincrby(f'{decoder_name}_type:{mimetype}', date, 1) + r_metadata.zincrby(f'{decoder_name}_type:{mimetype}', 1, date) r_metadata.incrby(f'{decoder_name}_decoded:{date}', 1) - r_metadata.zincrby(f'{decoder_name}_date:{date}', self.id, 1) + r_metadata.zincrby(f'{decoder_name}_date:{date}', 1, self.id) self.update_daterange(date) @@ -268,7 +268,7 @@ class Decoded(AbstractObject): # mimetype # # # # # # # # - r_metadata.zincrby(f'decoded:mimetype:{date}', mimetype, 1) + r_metadata.zincrby(f'decoded:mimetype:{date}', 1, mimetype) # create hash metadata r_metadata.sadd(f'decoded:mimetypes', mimetype) @@ -280,13 +280,13 @@ class Decoded(AbstractObject): self.update_daterange(date) r_metadata.incrby(f'{decoder_type}_decoded:{date}', 1) - r_metadata.zincrby(f'{decoder_type}_date:{date}', self.id, 1) + r_metadata.zincrby(f'{decoder_type}_date:{date}', 1, self.id) r_metadata.hincrby(f'metadata_hash:{self.id}', f'{decoder_type}_decoder', 1) - r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', date, 1) # # TODO: # DUP1 + r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', 1, date) # # TODO: # DUP1 ################################################################ # TODO: REMOVE ????????????????????????????????? - r_metadata.zincrby(f'{decoder_type}_hash:{self.id}', obj_id, 1) # number of b64 on this item + r_metadata.zincrby(f'{decoder_type}_hash:{self.id}', 1, obj_id) # number of b64 on this item # first time we see this hash encoding on this item @@ -297,7 +297,7 @@ class Decoded(AbstractObject): # first time we see this hash encoding today if not r_metadata.zscore(f'{decoder_type}_date:{date}', self.id): - r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', date, 1) # # TODO: # DUP1 + r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', 1, date) # # TODO: # DUP1 # Correlations diff --git a/bin/lib/objects/Screenshots.py b/bin/lib/objects/Screenshots.py index 9539587d..8cdcf95a 100755 --- a/bin/lib/objects/Screenshots.py +++ b/bin/lib/objects/Screenshots.py @@ -17,7 +17,7 @@ from lib.ConfigLoader import ConfigLoader from lib.objects.abstract_object import AbstractObject config_loader = ConfigLoader() -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +r_serv_metadata = config_loader.get_db_conn("Kvrocks_Objects") SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') config_loader = None diff --git a/bin/lib/objects/abstract_daterange_object.py b/bin/lib/objects/abstract_daterange_object.py index bffa6d88..297c476a 100755 --- a/bin/lib/objects/abstract_daterange_object.py +++ b/bin/lib/objects/abstract_daterange_object.py @@ -10,7 +10,7 @@ import os import sys from abc import abstractmethod, ABC -#from flask import url_for +# from flask import url_for sys.path.append(os.environ['AIL_BIN']) ################################## @@ -24,7 +24,6 @@ from packages import Date # LOAD CONFIG config_loader = ConfigLoader() -# r_metadata = config_loader.get_redis_conn("ARDB_Metadata") r_object = config_loader.get_db_conn("Kvrocks_Objects") config_loader = None @@ -118,7 +117,7 @@ class AbstractDaterangeObject(AbstractObject, ABC): # NB Object seen by day r_object.hincrby(f'{self.type}:date:{date}', self.id, 1) - r_object.zincrby(f'{self.type}:date:{date}', self.id, 1) # # # # # # # # # # + r_object.zincrby(f'{self.type}:date:{date}', 1, self.id) # # # # # # # # # # # NB Object seen r_object.hincrby(f'{self.type}:meta:{self.id}', 'nb', 1) diff --git a/bin/lib/objects/abstract_object.py b/bin/lib/objects/abstract_object.py index 43d57d2f..76b3f04d 100755 --- a/bin/lib/objects/abstract_object.py +++ b/bin/lib/objects/abstract_object.py @@ -74,9 +74,6 @@ class AbstractObject(ABC): tags = list(tags) return tags - def get_duplicates(self): - return Duplicate.get_duplicates(self.type, self.get_subtype(r_str=True), self.id) - ## ADD TAGS ???? def add_tag(self, tag): Tag.add_object_tag(tag, self.type, self.id, subtype=self.get_subtype(r_str=True)) @@ -88,6 +85,14 @@ class AbstractObject(ABC): #- Tags -# + ## Duplicates ## + def get_duplicates(self): + return Duplicate.get_obj_duplicates(self.type, self.get_subtype(r_str=True), self.id) + + def add_duplicate(self, algo, similarity, id_2): + return Duplicate.add_obj_duplicate(algo, similarity, self.type, self.get_subtype(r_str=True), self.id, id_2) + # -Duplicates -# + ## Investigations ## # # TODO: unregister ===== diff --git a/bin/modules/Duplicates.py b/bin/modules/Duplicates.py index b9b5a440..1d0d2dae 100755 --- a/bin/modules/Duplicates.py +++ b/bin/modules/Duplicates.py @@ -77,16 +77,16 @@ class Duplicates(AbstractModule): obj_hash = self.algos[algo]['hash'] for date_ymonth in last_month_dates: if Duplicate.exists_algo_hash_by_month(algo, obj_hash, date_ymonth): - Duplicate.add_obj_duplicate(algo, obj_hash, 100, 'item', '', item.get_id(), date_ymonth) - nb_duplicates +=1 + Duplicate.add_duplicate(algo, obj_hash, 100, 'item', '', item.get_id(), date_ymonth) + nb_duplicates += 1 else: for hash in Duplicate.get_algo_hashs_by_month(algo, date_ymonth): # # FIXME: try - catch 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash similarity = Duplicate.get_algo_similarity(algo, obj_hash, hash) - print(f'[{algo}] comparing: {obj_hash} and {hash} similarity: {similarity}') # DEBUG: + print(f'[{algo}] comparing: {obj_hash} and {hash} similarity: {similarity}') # DEBUG: if similarity >= self.algos[algo]['threshold']: - Duplicate.add_obj_duplicate(algo, hash, similarity, 'item', '', item.get_id(), date_ymonth) - nb_duplicates +=1 + Duplicate.add_duplicate(algo, hash, similarity, 'item', '', item.get_id(), date_ymonth) + nb_duplicates += 1 # Save Hashs Duplicate.save_object_hash(algo, curr_date_ymonth, self.algos[algo]['hash'], item.get_id()) diff --git a/bin/modules/Global.py b/bin/modules/Global.py index a93712fe..fc43f7d1 100755 --- a/bin/modules/Global.py +++ b/bin/modules/Global.py @@ -194,12 +194,12 @@ class Global(AbstractModule): self.redis_logger.warning(f'Global; Incomplete file: {filename}') print(f'Global; Incomplete file: {filename}') # save daily stats - self.r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1) + self.r_stats.zincrby('module:Global:incomplete_file', 1, datetime.datetime.now().strftime('%Y%m%d')) except OSError: self.redis_logger.warning(f'Global; Not a gzipped file: {filename}') print(f'Global; Not a gzipped file: {filename}') # save daily stats - self.r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1) + self.r_stats.zincrby('module:Global:invalid_file', 1, datetime.datetime.now().strftime('%Y%m%d')) return curr_file_content diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 73d188d8..bc4fced0 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -24,7 +24,7 @@ from packages import Date from lib.objects import Items config_loader = ConfigLoader.ConfigLoader() -r_serv_term = config_loader.get_db_conn("Kvrocks_DB") +r_serv_term = config_loader.get_db_conn("Kvrocks_Trackers") config_loader = None email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}' @@ -387,11 +387,11 @@ def add_tracked_item(term_uuid, item_id, item_date): # track item r_serv_term.sadd('tracker:item:{}:{}'.format(term_uuid, item_date), item_id) # track nb item by date - r_serv_term.zadd('tracker:stat:{}'.format(term_uuid), item_date, int(item_date)) + r_serv_term.zadd('tracker:stat:{}'.format(term_uuid), {item_date: item_date}) def create_token_statistics(item_date, word, nb): - r_serv_term.zincrby('stat_token_per_item_by_day:{}'.format(item_date), word, 1) - r_serv_term.zincrby('stat_token_total_by_day:{}'.format(item_date), word, nb) + r_serv_term.zincrby('stat_token_per_item_by_day:{}'.format(item_date), 1, word) + r_serv_term.zincrby('stat_token_total_by_day:{}'.format(item_date), nb, word) r_serv_term.sadd('stat_token_history', item_date) def delete_token_statistics_by_date(item_date): diff --git a/bin/packages/User.py b/bin/packages/User.py deleted file mode 100755 index fe5197c7..00000000 --- a/bin/packages/User.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import redis -import bcrypt - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader - -from flask_login import UserMixin - -def get_all_users(): - return r_serv_db.hkeys('user:all') - -class User(UserMixin): - - def __init__(self, id): - - config_loader = ConfigLoader.ConfigLoader() - - self.r_serv_db = config_loader.get_redis_conn("ARDB_DB") - config_loader = None - - if self.r_serv_db.hexists('user:all', id): - self.id = id - else: - self.id = "__anonymous__" - - # return True or False - #def is_authenticated(): - - # return True or False - #def is_anonymous(): - - @classmethod - def get(self_class, id): - return self_class(id) - - def user_is_anonymous(self): - if self.id == "__anonymous__": - return True - else: - return False - - def check_password(self, password): - if self.user_is_anonymous(): - return False - - password = password.encode() - hashed_password = self.r_serv_db.hget('user:all', self.id).encode() - if bcrypt.checkpw(password, hashed_password): - return True - else: - return False - - def request_password_change(self): - if self.r_serv_db.hget('user_metadata:{}'.format(self.id), 'change_passwd') == 'True': - return True - else: - return False - - def is_in_role(self, role): - if self.r_serv_db.sismember('user_role:{}'.format(role), self.id): - return True - else: - return False diff --git a/update/v1.5/Update-ARDB_Onions.py b/update/v1.5/Update-ARDB_Onions.py index 7f36b99a..199ee194 100755 --- a/update/v1.5/Update-ARDB_Onions.py +++ b/update/v1.5/Update-ARDB_Onions.py @@ -102,7 +102,7 @@ if __name__ == '__main__': # create new history root_key = get_domain_root_from_paste_childrens(item_father, onion_domain) if root_key: - r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key) + r_serv_onion.zadd(f'crawler_history_onion:{onion_domain}:80', {root_key: get_date_epoch(date_history)}) print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key)) #update service metadata: paste_parent r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key) diff --git a/update/v2.5/Update.py b/update/v2.5/Update.py index 18966d47..6264c7f4 100755 --- a/update/v2.5/Update.py +++ b/update/v2.5/Update.py @@ -19,9 +19,9 @@ if __name__ == '__main__': r_serv = config_loader.get_redis_conn("ARDB_DB") config_loader = None - r_serv.zadd('ail:all_role', 3, 'user') - r_serv.zadd('ail:all_role', 4, 'user_no_api') - r_serv.zadd('ail:all_role', 5, 'read_only') + r_serv.zadd('ail:all_role', {'user': 3}) + r_serv.zadd('ail:all_role', {'user_no_api': 4}) + r_serv.zadd('ail:all_role', {'read_only': 5}) for user in r_serv.hkeys('user:all'): r_serv.sadd('user_role:user', user) diff --git a/var/www/create_default_user.py b/var/www/create_default_user.py index deadd829..9a1af059 100755 --- a/var/www/create_default_user.py +++ b/var/www/create_default_user.py @@ -3,7 +3,6 @@ import os import sys -import redis sys.path.append(os.environ['AIL_BIN']) ################################## diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index 5dd538f9..9bf057d5 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -191,7 +191,7 @@
{% for decoded in dict_domain['decoded']%}