From 73dbef2700af512bdf54e931974aeb59459b83b0 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 28 Nov 2022 15:01:40 +0100 Subject: [PATCH] chg: [all] remove old objects + migrate cryptocurrencies module + cleanup code --- bin/Cryptocurrencies.py | 204 ---- bin/LAUNCH.sh | 4 +- bin/MISP_The_Hive_feeder.py | 1 - bin/ModulesInformationV2.py | 4 +- bin/NotificationHelper.py | 4 +- bin/PreProcessFeed.py.sample | 9 +- bin/Update-conf.py | 6 +- bin/Update.py | 79 +- bin/ailleakObject.py | 44 +- bin/core/DbCleaner.py | 1 - bin/core/Sync_importer.py | 2 - bin/core/Sync_manager.py | 9 +- bin/core/Sync_module.py | 19 +- bin/core/ail_2_ail.py | 21 +- bin/export/AILObjects.py | 31 +- bin/export/Export.py | 5 +- bin/export/MispExport.py | 213 +--- bin/export/MispImport.py | 92 +- bin/launch_queues.py | 1 + bin/lib/Config_DB.py | 2 +- bin/lib/Correlate_object.py | 481 --------- bin/lib/Decoded.py | 407 -------- bin/lib/Domain.py | 915 ------------------ bin/lib/Investigations.py | 12 +- bin/lib/Language.py | 1 - bin/lib/MispModules.py | 8 +- bin/lib/Screenshot.py | 241 ----- bin/lib/Statistics.py | 10 +- bin/lib/Tracker.py | 16 +- bin/lib/Username.py | 21 - bin/lib/Users.py | 25 +- bin/lib/ail_core.py | 38 + bin/lib/ail_users.py | 6 +- bin/lib/btc_ail.py | 13 +- bin/lib/correlations_engine.py | 582 +---------- bin/lib/crawlers.py | 2 - bin/lib/d4.py | 8 +- bin/lib/data_retention_engine.py | 7 +- bin/lib/domain_basic.py | 8 +- bin/lib/index_whoosh.py | 8 +- bin/lib/objects/CryptoCurrencies.py | 100 +- bin/lib/objects/Domains.py | 144 ++- bin/lib/objects/abstract_object.py | 47 +- bin/lib/objects/ail_objects.py | 79 +- bin/lib/queues_modules.py | 8 +- bin/lib/regex_helper.py | 9 +- bin/modules/Cryptocurrencies.py | 163 ++++ bin/packages/Correlation.py | 451 --------- bin/packages/Cryptocurrency.py | 91 -- bin/packages/HiddenServices.py | 342 ------- bin/packages/Import_helper.py | 5 +- bin/packages/Item.py | 695 ------------- bin/packages/Paste.py | 430 -------- bin/packages/Pgp.py | 28 - bin/packages/Term.py | 82 +- bin/packages/lib_refine.py | 8 +- bin/packages/modules.cfg | 2 +- bin/update-background.py | 9 +- tests/testApi.py | 17 +- tests/test_modules.py | 18 +- update/v3.4/Update_domain.py | 2 - var/www/blueprints/correlation.py | 1 - var/www/blueprints/crawler_splash.py | 59 +- var/www/blueprints/hunters.py | 1 - var/www/blueprints/import_export.py | 1 - var/www/blueprints/objects_item.py | 9 +- var/www/blueprints/old_endpoints.py | 1 - var/www/blueprints/settings_b.py | 5 +- var/www/blueprints/tags_ui.py | 2 +- var/www/modules/Flask_config.py | 4 +- .../modules/PasteSubmit/Flask_PasteSubmit.py | 3 +- .../templates/edit_tag_export.html | 2 +- var/www/modules/Tags/Flask_Tags.py | 1 - var/www/modules/dashboard/Flask_dashboard.py | 13 +- .../modules/hashDecoded/Flask_hashDecoded.py | 15 +- .../hiddenServices/Flask_hiddenServices.py | 10 +- var/www/modules/hunter/Flask_hunter.py | 19 +- var/www/modules/restApi/Flask_restApi.py | 57 +- var/www/modules/search/Flask_search.py | 6 +- var/www/modules/sentiment/Flask_sentiment.py | 10 +- var/www/modules/settings/Flask_settings.py | 14 +- var/www/modules/terms/Flask_terms.py | 13 +- .../trendingcharts/Flask_trendingcharts.py | 10 +- .../trendingmodules/Flask_trendingmodules.py | 10 +- .../crawler_splash/domain_explorer.html | 99 +- .../templates/domains/card_img_domain.html | 1 - .../domains/domains_result_list.html | 18 +- ...block_add_user_object_to_export_small.html | 2 +- var/www/templates/modals/add_tags.html | 8 +- 89 files changed, 1037 insertions(+), 5647 deletions(-) delete mode 100755 bin/Cryptocurrencies.py delete mode 100755 bin/lib/Correlate_object.py delete mode 100755 bin/lib/Decoded.py delete mode 100755 bin/lib/Domain.py delete mode 100755 bin/lib/Screenshot.py delete mode 100755 bin/lib/Username.py create mode 100755 bin/modules/Cryptocurrencies.py delete mode 100755 bin/packages/Correlation.py delete mode 100755 bin/packages/Cryptocurrency.py delete mode 100755 bin/packages/HiddenServices.py delete mode 100755 bin/packages/Item.py delete mode 100755 bin/packages/Paste.py delete mode 100755 bin/packages/Pgp.py diff --git a/bin/Cryptocurrencies.py b/bin/Cryptocurrencies.py deleted file mode 100755 index ded939e7..00000000 --- a/bin/Cryptocurrencies.py +++ /dev/null @@ -1,204 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -The Cryptocurrency Module -============================ - -It trying to extract Bitcoin address and secret key from paste - - ..seealso:: Paste method (get_regex) - -Requirements ------------- - -*Need running Redis instances. (Redis). - -""" - -from Helper import Process -from pubsublogger import publisher - -import os -import re -import sys -import time -import redis -import signal - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import Item -import Cryptocurrency - - -class TimeoutException(Exception): - pass - -def timeout_handler(signum, frame): - raise TimeoutException - -signal.signal(signal.SIGALRM, timeout_handler) - - -def search_crytocurrency(item_id, item_content): - - - is_cryptocurrency_found = False - - for dict_field in cryptocurrency_dict: - crypto_dict = cryptocurrency_dict[dict_field] - crypto_name = crypto_dict['name'] - - signal.alarm(crypto_dict['max_execution_time']) - try: - crypto_addr = re.findall(crypto_dict['regex'], item_content) - except TimeoutException: - crypto_addr = [] - p.incr_module_timeout_statistic() # add encoder type - print ("{0} processing timeout".format(item_id)) - continue - else: - signal.alarm(0) - - if crypto_addr: - is_valid_crypto_addr = False - # validate cryptocurrency address - for address in crypto_addr: - if(Cryptocurrency.verify_cryptocurrency_address(dict_field, address)): - is_valid_crypto_addr = True - print('{} address found : {}'.format(crypto_name, address)) - # build bitcoin correlation - Cryptocurrency.cryptocurrency.save_item_correlation(crypto_name, address, item_id, Item.get_item_date(item_id)) - - # At least one valid cryptocurrency address was found - if(is_valid_crypto_addr): - # valid cryptocurrency found in this item - is_cryptocurrency_found = True - - # Tag Item - msg = '{};{}'.format(crypto_dict['tag'], item_id) - p.populate_set_out(msg, 'Tags') - - # search cryptocurrency private key - if crypto_dict.get('private_key'): - signal.alarm(crypto_dict['private_key']['max_execution_time']) - try: - addr_private_key = re.findall(crypto_dict['private_key']['regex'], item_content) - except TimeoutException: - addr_private_key = [] - p.incr_module_timeout_statistic() # add encoder type - print ("{0} processing timeout".format(item_id)) - continue - else: - signal.alarm(0) - - if addr_private_key: - # Tag Item - msg = '{};{}'.format(crypto_dict['private_key']['tag'], item_id) - p.populate_set_out(msg, 'Tags') - - # debug - print(addr_private_key) - to_print = '{} found: {} address and {} private Keys'.format(crypto_name, len(crypto_addr), len(addr_private_key)) - print(to_print) - publisher.warning(to_print) - - to_print = 'Cryptocurrency;{};{};{};'.format(Item.get_source(item_id), Item.get_item_date(item_id), Item.get_item_basename(item_id)) - publisher.warning('{}Detected {} {} private key;{}'.format( - to_print, len(addr_private_key), crypto_name, item_id)) - - - if is_cryptocurrency_found: - # send to duplicate module - p.populate_set_out(item_id, 'Duplicate') - - - - -default_max_execution_time = 30 - -cryptocurrency_dict = { - 'bitcoin': { - 'name': 'bitcoin', # e.g. 1NbEPRwbBZrFDsx1QW19iDs8jQLevzzcms - 'regex': r'\b(? 0: - nb_dot = nb_dot -1 + nb_dot = nb_dot - 1 current_tag_val = current_tag.rsplit('.', nb_dot) current_tag_val = ''.join(current_tag_val) - if is_fork: process = subprocess.run(['git', 'tag'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: @@ -180,13 +181,13 @@ def get_git_upper_tags_remote(current_tag, is_fork): list_upper_tags = [] if list_all_tags[-1][1:] == current_tag: - list_upper_tags.append( (list_all_tags[-1], None) ) + list_upper_tags.append((list_all_tags[-1], None)) # force update order list_upper_tags.sort() return list_upper_tags for tag in list_all_tags: if float(tag[1:]) >= float(current_tag_val): - list_upper_tags.append( (tag, None) ) + list_upper_tags.append((tag, None)) # force update order list_upper_tags.sort() return list_upper_tags @@ -195,7 +196,7 @@ def get_git_upper_tags_remote(current_tag, is_fork): aborting_update() sys.exit(0) else: - process = subprocess.run(['git', 'ls-remote' ,'--tags'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + process = subprocess.run(['git', 'ls-remote', '--tags'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: list_all_tags = process.stdout.decode().splitlines() @@ -204,7 +205,7 @@ def get_git_upper_tags_remote(current_tag, is_fork): last_tag = last_tag[1].split('^{}')[0] list_upper_tags = [] if last_tag[1:] == current_tag: - list_upper_tags.append( (last_tag, last_commit) ) + list_upper_tags.append((last_tag, last_commit)) # force update order list_upper_tags.sort() return list_upper_tags @@ -219,7 +220,7 @@ def get_git_upper_tags_remote(current_tag, is_fork): # keep only first dot nb_dot = tag.count('.') if nb_dot > 0: - nb_dot = nb_dot -1 + nb_dot = nb_dot - 1 tag_val = tag.rsplit('.', nb_dot) tag_val = ''.join(tag_val) @@ -253,7 +254,7 @@ def update_submodules(): def update_ail(current_tag, list_upper_tags_remote, current_version_path, is_fork): print('{}git checkout master:{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) process = subprocess.run(['git', 'checkout', 'master'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - #process = subprocess.run(['ls'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + # process = subprocess.run(['ls'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: print(process.stdout.decode()) print() @@ -269,8 +270,8 @@ def update_ail(current_tag, list_upper_tags_remote, current_version_path, is_for # CHECK IF UPDATER Update if float(os.stat(UPDATER_FILENAME).st_mtime) > UPDATER_LAST_MODIFICATION: - # request updater relauch - print('{}{}{}'.format(TERMINAL_RED, ' Relaunch Launcher ', TERMINAL_DEFAULT)) + # request updater relaunch + print(f'{TERMINAL_RED} Relaunch Launcher {TERMINAL_DEFAULT}') sys.exit(3) if len(list_upper_tags_remote) == 1: @@ -278,10 +279,11 @@ def update_ail(current_tag, list_upper_tags_remote, current_version_path, is_for additional_update_path = os.path.join(os.environ['AIL_HOME'], 'update', current_tag, 'additional_update.sh') if os.path.isfile(additional_update_path): print() - print('{}------------------------------------------------------------------'.format(TERMINAL_YELLOW)) + print(f'{TERMINAL_YELLOW}------------------------------------------------------------------') print('- Launching Additional Update: -') - print('-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --{}'.format(TERMINAL_DEFAULT)) - process = subprocess.run(['bash', additional_update_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print(f'-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --{TERMINAL_DEFAULT}') + process = subprocess.run(['bash', additional_update_path], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: output = process.stdout.decode() print(output) @@ -291,7 +293,7 @@ def update_ail(current_tag, list_upper_tags_remote, current_version_path, is_for sys.exit(1) print() - print('{}**************** AIL Sucessfully Updated *****************{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) + print(f'{TERMINAL_YELLOW}**************** AIL Successfully Updated *****************{TERMINAL_DEFAULT}') print() exit(0) @@ -299,15 +301,15 @@ def update_ail(current_tag, list_upper_tags_remote, current_version_path, is_for # map version with roll back commit list_update = [] previous_commit = list_upper_tags_remote[0][1] - for tuple in list_upper_tags_remote[1:]: - tag = tuple[0] - list_update.append( (tag, previous_commit) ) - previous_commit = tuple[1] + for row_tuple in list_upper_tags_remote[1:]: + tag = row_tuple[0] + list_update.append((tag, previous_commit)) + previous_commit = row_tuple[1] for update in list_update: launch_update_version(update[0], update[1], current_version_path, is_fork) - # Sucess - print('{}**************** AIL Sucessfully Updated *****************{}'.format(TERMINAL_YELLOW, TERMINAL_DEFAULT)) + # Success + print(f'{TERMINAL_YELLOW}**************** AIL Successfully Updated *****************{TERMINAL_DEFAULT}') print() sys.exit(0) else: @@ -322,9 +324,9 @@ def update_ail(current_tag, list_upper_tags_remote, current_version_path, is_for def launch_update_version(version, roll_back_commit, current_version_path, is_fork): update_path = os.path.join(os.environ['AIL_HOME'], 'update', str(version), 'Update.sh') print() - print('{}------------------------------------------------------------------'.format(TERMINAL_YELLOW)) - print('- Launching Update: {}{}{} -'.format(TERMINAL_BLUE, version, TERMINAL_YELLOW)) - print('-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --{}'.format(TERMINAL_DEFAULT)) + print(f'{TERMINAL_YELLOW}------------------------------------------------------------------') + print(f'- Launching Update: {TERMINAL_BLUE}{version}{TERMINAL_YELLOW} -') + print(f'-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --{TERMINAL_DEFAULT}') if not os.path.isfile(update_path): update_path = os.path.join(os.environ['AIL_HOME'], 'update', 'default_update', 'Update.sh') process = subprocess.Popen(['bash', update_path, version], stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -337,18 +339,18 @@ def launch_update_version(version, roll_back_commit, current_version_path, is_fo if output: print(output.strip()) if process.returncode == 0: - #output = process.stdout.decode() - #print(output) + # output = process.stdout.decode() + # print(output) with open(current_version_path, 'w') as version_content: version_content.write(version) print('{}-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --'.format(TERMINAL_YELLOW)) - print('- Sucessfully Updated: {}{}{} -'.format(TERMINAL_BLUE, version, TERMINAL_YELLOW)) + print('- Successfully Updated: {}{}{} -'.format(TERMINAL_BLUE, version, TERMINAL_YELLOW)) print('------------------------------------------------------------------{}'.format(TERMINAL_DEFAULT)) print() else: - #print(process.stdout.read().decode()) + # print(process.stdout.read().decode()) print('{}{}{}'.format(TERMINAL_RED, process.stderr.read().decode(), TERMINAL_DEFAULT)) print('------------------------------------------------------------------') print(' {}Update Error: {}{}{}'.format(TERMINAL_RED, TERMINAL_BLUE, version, TERMINAL_DEFAULT)) @@ -360,7 +362,7 @@ def launch_update_version(version, roll_back_commit, current_version_path, is_fo sys.exit(1) def roll_back_update(roll_back_commit): - print('Rolling back to safe commit: {}{}{}'.format(TERMINAL_BLUE ,roll_back_commit, TERMINAL_DEFAULT)) + print('Rolling back to safe commit: {}{}{}'.format(TERMINAL_BLUE, roll_back_commit, TERMINAL_DEFAULT)) process = subprocess.run(['git', 'checkout', roll_back_commit], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if process.returncode == 0: output = process.stdout @@ -379,6 +381,7 @@ def aborting_update(): print('******************************************************************{}'.format(TERMINAL_DEFAULT)) print() + if __name__ == "__main__": TERMINAL_RED = '\033[91m' @@ -418,10 +421,10 @@ if __name__ == "__main__": current_tag = get_git_current_tag(current_version_path) print() - print('Current Version: {}{}{}'.format( TERMINAL_YELLOW, current_tag, TERMINAL_DEFAULT)) + print('Current Version: {}{}{}'.format(TERMINAL_YELLOW, current_tag, TERMINAL_DEFAULT)) print() list_upper_tags_remote = get_git_upper_tags_remote(current_tag.replace('v', ''), is_fork) - # new realease + # new release if len(list_upper_tags_remote) > 1: print('New Releases:') if is_fork: diff --git a/bin/ailleakObject.py b/bin/ailleakObject.py index fd07e6cc..ca4efb43 100755 --- a/bin/ailleakObject.py +++ b/bin/ailleakObject.py @@ -6,19 +6,11 @@ import sys from pymisp import MISPEvent, MISPObject from pymisp.tools.abstractgenerator import AbstractMISPObjectGenerator -MISPEvent -from packages import Paste import datetime -import json -from io import BytesIO -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader -import item_basic - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'export')) -import MispExport +from lib.objects.Items import Item +from lib import ConfigLoader class ObjectWrapper: def __init__(self, pymisp): @@ -30,33 +22,11 @@ class ObjectWrapper: config_loader = None self.attribute_to_tag = None - def add_new_object(self, uuid_ail, item_id, tag): + def add_new_object(self, uuid_ail, item_id): self.uuid_ail = uuid_ail - # self.paste = Paste.Paste(path) - # temp = self.paste._get_p_duplicate() - # - # #beautifier - # if not temp: - # temp = '' - # - # p_duplicate_number = len(temp) if len(temp) >= 0 else 0 - # - # to_ret = "" - # for dup in temp[:10]: - # dup = dup.replace('\'','\"').replace('(','[').replace(')',']') - # dup = json.loads(dup) - # algo = dup[0] - # path = dup[1].split('/')[-6:] - # path = '/'.join(path)[:-3] # -3 removes .gz - # if algo == 'tlsh': - # perc = 100 - int(dup[2]) - # else: - # perc = dup[2] - # to_ret += "{}: {} [{}%]\n".format(path, algo, perc) - # p_duplicate = to_ret - - return MispExport.export_ail_item(item_id, [tag]) + item = Item(item_id) + return item.get_misp_object() def date_to_str(self, date): return "{0}-{1}-{2}".format(date.year, date.month, date.day) @@ -125,9 +95,9 @@ class ObjectWrapper: # add new tag self.tag(self.attribute_to_tag, tag) print(item_id + ' tagged: ' + tag) - #create object + # create object else: - misp_obj = self.add_new_object(uuid_ail, item_id, tag) + misp_obj = self.add_new_object(uuid_ail, item_id) # deprecated # try: diff --git a/bin/core/DbCleaner.py b/bin/core/DbCleaner.py index 0fdb4228..4be8579f 100755 --- a/bin/core/DbCleaner.py +++ b/bin/core/DbCleaner.py @@ -15,7 +15,6 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from packages import Date -from packages import Item from packages import Term from pubsublogger import publisher diff --git a/bin/core/Sync_importer.py b/bin/core/Sync_importer.py index 0176565d..2999f5e8 100755 --- a/bin/core/Sync_importer.py +++ b/bin/core/Sync_importer.py @@ -24,8 +24,6 @@ sys.path.append(os.environ['AIL_BIN']) from core import ail_2_ail from lib.ConfigLoader import ConfigLoader from modules.abstract_module import AbstractModule -from packages.Item import Item -from packages import Tag #### CONFIG #### config_loader = ConfigLoader() diff --git a/bin/core/Sync_manager.py b/bin/core/Sync_manager.py index 1ee97bcb..87ea4f5e 100755 --- a/bin/core/Sync_manager.py +++ b/bin/core/Sync_manager.py @@ -5,10 +5,13 @@ import os import sys import time -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import ail_2_ail +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from core import ail_2_ail -# # TODO: lauch me in core screen +# # TODO: launch me in core screen if __name__ == '__main__': diff --git a/bin/core/Sync_module.py b/bin/core/Sync_module.py index 7c1330e0..7212c727 100755 --- a/bin/core/Sync_module.py +++ b/bin/core/Sync_module.py @@ -21,9 +21,8 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from core import ail_2_ail +from lib.objects.Items import Item from modules.abstract_module import AbstractModule -from packages.Item import Item -from packages import Tag class Sync_module(AbstractModule): @@ -34,7 +33,7 @@ class Sync_module(AbstractModule): def __init__(self): super(Sync_module, self).__init__() - # Waiting time in secondes between to message proccessed + # Waiting time in seconds between to message processed self.pending_seconds = 10 self.dict_sync_queues = ail_2_ail.get_all_sync_queue_dict() @@ -45,7 +44,6 @@ class Sync_module(AbstractModule): # Send module state to logs self.redis_logger.info(f'Module {self.module_name} Launched') - def compute(self, message): ### REFRESH DICT @@ -64,23 +62,24 @@ class Sync_module(AbstractModule): obj_id = mess_split[2] # OBJECT => Item - if obj_type == 'item': - obj = Item(obj_id) - tags = obj.get_tags(r_set=True) + # if obj_type == 'item': + obj = Item(obj_id) + + tags = obj.get_tags() # check filter + tags - #print(message) + # print(message) for queue_uuid in self.dict_sync_queues: filter_tags = self.dict_sync_queues[queue_uuid]['filter'] if filter_tags and tags: - #print(f'tags: {tags} filter: {filter_tags}') + # print('tags: {tags} filter: {filter_tags}') if filter_tags.issubset(tags): obj_dict = obj.get_default_meta() # send to queue push and/or pull for dict_ail in self.dict_sync_queues[queue_uuid]['ail_instances']: print(f'ail_uuid: {dict_ail["ail_uuid"]} obj: {message}') ail_2_ail.add_object_to_sync_queue(queue_uuid, dict_ail['ail_uuid'], obj_dict, - push=dict_ail['push'], pull=dict_ail['pull']) + push=dict_ail['push'], pull=dict_ail['pull']) else: # Malformed message diff --git a/bin/core/ail_2_ail.py b/bin/core/ail_2_ail.py index b0d86a2b..da388611 100755 --- a/bin/core/ail_2_ail.py +++ b/bin/core/ail_2_ail.py @@ -14,15 +14,14 @@ import subprocess from flask import escape from pubsublogger import publisher -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader -import Tag - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'core/')) -import screen - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -from Item import Item +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader +from lib.objects.Items import Item +# from lib import Tag +from core import screen config_loader = ConfigLoader.ConfigLoader() r_cache = config_loader.get_redis_conn("Redis_Cache") @@ -1204,13 +1203,15 @@ def add_ail_stream_to_sync_importer(ail_stream): # # #### AIL EXCHANGE FORMAT #### +# TODO def is_valid_ail_exchange_format_json(json_obj): try: - ail_stream = json.dumps(ail_stream) + ail_stream = json.dumps(json_obj) except ValueError: return False return is_valid_ail_exchange_format(ail_stream) +# TODO def is_valid_ail_exchange_format(ail_stream): pass diff --git a/bin/export/AILObjects.py b/bin/export/AILObjects.py index f3c5b21b..f2af699b 100755 --- a/bin/export/AILObjects.py +++ b/bin/export/AILObjects.py @@ -5,10 +5,9 @@ import os import sys import uuid -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import ConfigLoader -import Correlate_object +sys.path.append(os.environ['AIL_BIN']) +from lib.objects import ail_objects +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_serv_objects = config_loader.get_redis_conn("ARDB_Objects") @@ -26,27 +25,27 @@ def create_map_obj_event_uuid(event_uuid, global_id): r_serv_objects.sadd('object:map:event_id:{}'.format(event_uuid), global_id) r_serv_objects.sadd('object:map:id_event:{}'.format(global_id), event_uuid) -def get_user_list_of_obj_to_export(user_id, add_uuid=False): - objs_to_export = [] - res = r_serv_objects.hgetall('user:all_objs_to_export:{}'.format(user_id)) - for global_id in res: - dict_obj = Correlate_object.get_global_id_from_id(global_id) - dict_obj['lvl'] = int(res[global_id]) - if add_uuid: - obj_dict['uuid'] = str(uuid.uuid4()) - objs_to_export.append(dict_obj) - return objs_to_export +# def get_user_list_of_obj_to_export(user_id, add_uuid=False): +# objs_to_export = [] +# res = r_serv_objects.hgetall('user:all_objs_to_export:{}'.format(user_id)) +# for global_id in res: +# dict_obj = Correlate_object.get_global_id_from_id(global_id) +# dict_obj['lvl'] = int(res[global_id]) +# if add_uuid: +# obj_dict['uuid'] = str(uuid.uuid4()) +# objs_to_export.append(dict_obj) +# return objs_to_export def add_user_object_to_export(user_id, obj_type, obj_id, lvl, obj_subtype=None): ## TODO: check if user exist # # TODO: check if valid object # # TODO: check lvl - global_id = Correlate_object.get_obj_global_id(obj_type, obj_id, obj_sub_type=obj_subtype) + global_id = ail_objects.get_obj_global_id(obj_type, obj_subtype, obj_id) return r_serv_objects.hset('user:all_objs_to_export:{}'.format(user_id), global_id, lvl) def delete_user_object_to_export(user_id, obj_type, obj_id, obj_subtype=None): ## TODO: check if user exist - global_id = Correlate_object.get_obj_global_id(obj_type, obj_id, obj_sub_type=obj_subtype) + global_id = ail_objects.get_obj_global_id(obj_type, obj_subtype, obj_id) r_serv_objects.hdel('user:all_objs_to_export:{}'.format(user_id), global_id) def delete_all_user_object_to_export(user_id): diff --git a/bin/export/Export.py b/bin/export/Export.py index 90de4570..c81c1b3a 100755 --- a/bin/export/Export.py +++ b/bin/export/Export.py @@ -3,11 +3,10 @@ import os import sys -import redis from uuid import uuid4 -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import ConfigLoader +sys.path.append(os.environ['AIL_BIN']) +from lib import ConfigLoader sys.path.append('../../configs/keys') try: diff --git a/bin/export/MispExport.py b/bin/export/MispExport.py index d1d09b70..56f0501c 100755 --- a/bin/export/MispExport.py +++ b/bin/export/MispExport.py @@ -5,30 +5,16 @@ import os import io import sys import uuid -import redis -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'export')) -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import Item -import Cryptocurrency -import Pgp -import Decoded -import Domain -import Screenshot +sys.path.append(os.environ['AIL_BIN']) +from lib.objects import ail_objects -import Username - -import Correlate_object - -import AILObjects -import Export +from export import AILObjects -from Investigations import Investigation -import Tag +from lib.Investigations import Investigation -# # TODO: # FIXME: REFRACTOR ME => use UI/Global config +# # TODO: # FIXME: REFACTOR ME => use UI/Global config sys.path.append('../../configs/keys') try: from mispKEYS import misp_url, misp_key, misp_verifycert @@ -41,11 +27,11 @@ except: from pymisp import MISPEvent, MISPObject, PyMISP def is_valid_obj_to_export(obj_type, obj_subtype, obj_id): - if not Correlate_object.is_valid_object_type(obj_type): + if not ail_objects.is_valid_object_type(obj_type): return False - if not Correlate_object.is_valid_object_subtype(obj_type, obj_subtype): + if not ail_objects.is_valid_object_subtype(obj_type, obj_subtype): return False - if not Correlate_object.exist_object(obj_type, obj_id, type_id=obj_subtype): + if not ail_objects.exists_obj(obj_type, obj_subtype, obj_id): return False return True @@ -62,159 +48,9 @@ def get_export_filename(json_content): def create_in_memory_file(json_content): return io.BytesIO(json_content.encode()) -def tag_misp_object_attributes(l_ref_obj_attr, tags): - for obj_attr in l_ref_obj_attr: - for tag in tags: - obj_attr.add_tag(tag) - -def export_ail_item(item_id, tags=[]): - dict_metadata = Item.get_item({'id': item_id, 'date':True, 'tags':True, 'raw_content':True})[0] - # force tags - for tag in tags: - if tag not in dict_metadata['tags']: - dict_metadata['tags'].append(tag) - - #obj = MISPObject('ail-item', standalone=True) - obj = MISPObject('ail-leak', standalone=True) - obj.first_seen = dict_metadata['date'] - - l_obj_attr = [] - l_obj_attr.append( obj.add_attribute('first-seen', value=dict_metadata['date']) ) - l_obj_attr.append( obj.add_attribute('raw-data', value=item_id, data=dict_metadata['raw_content']) ) - l_obj_attr.append( obj.add_attribute('sensor', value=Export.get_ail_uuid()) ) - - # add tags - if dict_metadata['tags']: - tag_misp_object_attributes(l_obj_attr, dict_metadata['tags']) - return obj - -def export_domain(domain): - domain_obj = Domain.Domain(domain) - dict_metadata = domain_obj.get_domain_metadata(tags=True) - - # create domain-ip obj - obj = MISPObject('domain-crawled', standalone=True) - obj.first_seen = dict_metadata['first_seen'] - obj.last_seen = dict_metadata['last_check'] - - l_obj_attr = [] - l_obj_attr.append( obj.add_attribute('domain', value=domain) ) - dict_all_url = Domain.get_domain_all_url(domain, domain_obj.get_domain_type()) - for crawled_url in dict_all_url: - attribute = obj.add_attribute('url', value=crawled_url) - attribute.first_seen = str(dict_all_url[crawled_url]['first_seen']) - attribute.last_seen = str(dict_all_url[crawled_url]['last_seen']) - l_obj_attr.append( attribute ) - - # add tags - if dict_metadata['tags']: - tag_misp_object_attributes(l_obj_attr, dict_metadata['tags']) - - #print(obj.to_json()) - return obj - -# TODO: add tags -def export_decoded(sha1_string): - - decoded_metadata = Decoded.get_decoded_metadata(sha1_string, tag=True) - - obj = MISPObject('file') - obj.first_seen = decoded_metadata['first_seen'] - obj.last_seen = decoded_metadata['last_seen'] - - l_obj_attr = [] - l_obj_attr.append( obj.add_attribute('sha1', value=sha1_string) ) - l_obj_attr.append( obj.add_attribute('mimetype', value=Decoded.get_decoded_item_type(sha1_string)) ) - l_obj_attr.append( obj.add_attribute('malware-sample', value=sha1_string, data=Decoded.get_decoded_file_content(sha1_string)) ) - - # add tags - if decoded_metadata['tags']: - tag_misp_object_attributes(l_obj_attr, decoded_metadata['tags']) - - return obj - -# TODO: add tags -def export_screenshot(sha256_string): - obj = MISPObject('file') - - l_obj_attr = [] - l_obj_attr.append( obj.add_attribute('sha256', value=sha256_string) ) - l_obj_attr.append( obj.add_attribute('attachment', value=sha256_string, data=Screenshot.get_screenshot_file_content(sha256_string)) ) - - # add tags - tags = Screenshot.get_screenshot_tags(sha256_string) - if tags: - tag_misp_object_attributes(l_obj_attr, tags) - - return obj - -# TODO: add tags -def export_cryptocurrency(crypto_type, crypto_address): - dict_metadata = Cryptocurrency.cryptocurrency.get_metadata(crypto_type, crypto_address) - - obj = MISPObject('coin-address') - obj.first_seen = dict_metadata['first_seen'] - obj.last_seen = dict_metadata['last_seen'] - - l_obj_attr = [] - l_obj_attr.append( obj.add_attribute('address', value=crypto_address) ) - crypto_symbol = Cryptocurrency.get_cryptocurrency_symbol(crypto_type) - if crypto_symbol: - l_obj_attr.append( obj.add_attribute('symbol', value=crypto_symbol) ) - - return obj - -# TODO: add tags -def export_pgp(pgp_type, pgp_value): - dict_metadata = Pgp.pgp.get_metadata(pgp_type, pgp_value) - - obj = MISPObject('pgp-meta') - obj.first_seen = dict_metadata['first_seen'] - obj.last_seen = dict_metadata['last_seen'] - - l_obj_attr = [] - if pgp_type=='key': - l_obj_attr.append( obj.add_attribute('key-id', value=pgp_value) ) - elif pgp_type=='name': - #l_obj_attr.append( obj.add_attribute('key-id', value='debug') ) - l_obj_attr.append( obj.add_attribute('user-id-name', value=pgp_value) ) - else: # mail - #l_obj_attr.append( obj.add_attribute('key-id', value='debug') ) - l_obj_attr.append( obj.add_attribute('user-id-email', value=pgp_value) ) - return obj - -def export_username(username_type, username): - dict_metadata = Username.correlation.get_metadata(username_type, username) - - obj_attrs = [] - if username_type == 'telegram': - obj = MISPObject('telegram-account', standalone=True) - obj_attrs.append( obj.add_attribute('username', value=username) ) - - elif username_type == 'twitter': - obj = MISPObject('twitter-account', standalone=True) - obj_attrs.append( obj.add_attribute('name', value=username) ) - - else: - obj = MISPObject('user-account', standalone=True) - obj_attrs.append( obj.add_attribute('username', value=username) ) - - obj.first_seen = dict_metadata['first_seen'] - obj.last_seen = dict_metadata['last_seen'] - # for obj_attr in obj_attrs: - # for tag in self.get_tags(): - # obj_attr.add_tag(tag) - return obj - -# filter objects to export, export only object who correlect which each other -def filter_obj_linked(l_obj): - for obj in l_obj: - res = Correlate_object.get_object_correlation(obj['type'], obj['id'], obj.get('subtype', None)) - print(res) - def add_relation_ship_to_create(set_relationship, dict_obj, dict_new_obj): - global_id = Correlate_object.get_obj_global_id(dict_obj['type'], dict_obj['id'], dict_obj.get('subtype', None)) - global_id_new = Correlate_object.get_obj_global_id(dict_new_obj['type'], dict_new_obj['id'], dict_new_obj.get('subtype', None)) + global_id = ail_objects.get_obj_global_id(dict_obj['type'], dict_obj.get('subtype', ''), dict_obj['id']) + global_id_new = ail_objects.get_obj_global_id(dict_new_obj['type'], dict_new_obj.get('subtype', ''), dict_new_obj['id']) if global_id > global_id_new: res = (global_id, global_id_new) else: @@ -224,7 +60,7 @@ def add_relation_ship_to_create(set_relationship, dict_obj, dict_new_obj): # # TODO: add action by obj type # ex => Domain def add_obj_to_create(all_obj_to_export, set_relationship, dict_obj): - all_obj_to_export.add(Correlate_object.get_obj_global_id(dict_obj['type'], dict_obj['id'], dict_obj.get('subtype', None))) + all_obj_to_export.add(ail_objects.get_obj_global_id(dict_obj['type'], dict_obj.get('subtype', ''), dict_obj['id'])) def add_obj_to_create_by_lvl(all_obj_to_export, set_relationship, dict_obj, lvl): # # TODO: filter by export mode or filter on all global ? @@ -235,7 +71,7 @@ def add_obj_to_create_by_lvl(all_obj_to_export, set_relationship, dict_obj, lvl) lvl = lvl - 1 # # TODO: filter by correlation types - obj_correlations = Correlate_object.get_object_correlation(dict_obj['type'], dict_obj['id'], requested_correl_type=dict_obj.get('subtype', None)) + obj_correlations = ail_objects.get_obj_correlations(dict_obj['type'], dict_obj.get('subtype', ''), dict_obj['id']) for obj_type in obj_correlations: dict_new_obj = {'type': obj_type} if obj_type=='pgp' or obj_type=='cryptocurrency' or obj_type=='username': @@ -286,32 +122,23 @@ def create_list_of_objs_to_export(l_obj, r_type='json'): return event +# TODO REFACTOR ME def create_all_misp_obj(all_obj_to_export, set_relationship): dict_misp_obj = {} for obj_global_id in all_obj_to_export: obj_type, obj_id = obj_global_id.split(':', 1) dict_misp_obj[obj_global_id] = create_misp_obj(obj_type, obj_id) - return dict_misp_obj +# TODO REFACTOR ME def create_misp_obj(obj_type, obj_id): - if obj_type == 'item': - return export_ail_item(obj_id) - elif obj_type == 'decoded': - return export_decoded(obj_id) - elif obj_type == 'image': - return export_screenshot(obj_id) - elif obj_type == 'cryptocurrency': + if obj_type in ['cryptocurrency', 'pgp', 'username']: obj_subtype, obj_id = obj_id.split(':', 1) - return export_cryptocurrency(obj_subtype, obj_id) - elif obj_type == 'pgp': - obj_subtype, obj_id = obj_id.split(':', 1) - return export_pgp(obj_subtype, obj_id) - elif obj_type == 'username': - obj_subtype, obj_id = obj_id.split(':', 1) - return export_username(obj_subtype, obj_id) - elif obj_type == 'domain': - return export_domain(obj_id) + else: + obj_subtype = '' + misp_obj = ail_objects.get_misp_object(obj_type, obj_subtype, obj_id) + return misp_obj + def get_relationship_between_global_obj(obj_global_id_1, obj_global_id_2): obj_type_1 = obj_global_id_1.split(':', 1)[0] diff --git a/bin/export/MispImport.py b/bin/export/MispImport.py index b67bc886..a2043164 100755 --- a/bin/export/MispImport.py +++ b/bin/export/MispImport.py @@ -4,21 +4,16 @@ import os import sys import uuid -import redis from hashlib import sha1, sha256 -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import Item -import Cryptocurrency -import Pgp -import Decoded -import Domain -import Screenshot -import Correlate_object +sys.path.append(os.environ['AIL_BIN']) +from lib.objects import ail_objects -import AILObjects + +from lib.objects import Items + +from export import AILObjects # MISP from pymisp import MISPEvent, MISPObject, PyMISP @@ -91,7 +86,7 @@ def unpack_item_obj(map_uuid_global_id, misp_obj): io_content = attribute.data # # TODO: check if type == io if obj_id and io_content: - res = Item.create_item(obj_id, obj_meta, io_content) + res = Items.create_item(obj_id, obj_meta, io_content) map_uuid_global_id[misp_obj.uuid] = get_global_id('item', obj_id) @@ -99,38 +94,44 @@ def unpack_item_obj(map_uuid_global_id, misp_obj): ## TODO: handle multiple pgp in the same object def unpack_obj_pgp(map_uuid_global_id, misp_obj): - # get obj sub type - obj_attr = misp_obj.attributes[0] - obj_id = obj_attr.value - if obj_attr.object_relation == 'key-id': - obj_subtype = 'key' - elif obj_attr.object_relation == 'user-id-name': - obj_subtype = 'name' - elif obj_attr.object_relation == 'user-id-email': - obj_subtype = 'mail' - - if obj_id and obj_subtype: - obj_meta = get_object_metadata(misp_obj) - res = Pgp.pgp.create_correlation(obj_subtype, obj_id, obj_meta) - - map_uuid_global_id[misp_obj.uuid] = get_global_id('pgp', obj_id, obj_subtype=obj_subtype) + # TODO ail_objects import_misp_object(misp_obj) + pass + # # get obj sub type + # obj_attr = misp_obj.attributes[0] + # obj_id = obj_attr.value + # if obj_attr.object_relation == 'key-id': + # obj_subtype = 'key' + # elif obj_attr.object_relation == 'user-id-name': + # obj_subtype = 'name' + # elif obj_attr.object_relation == 'user-id-email': + # obj_subtype = 'mail' + # + # if obj_id and obj_subtype: + # obj_meta = get_object_metadata(misp_obj) + # # res = Pgp.pgp.create_correlation(obj_subtype, obj_id, obj_meta) + # # TODO ail_objects import_misp_object(misp_obj) + # + # map_uuid_global_id[misp_obj.uuid] = get_global_id('pgp', obj_id, obj_subtype=obj_subtype) def unpack_obj_cryptocurrency(map_uuid_global_id, misp_obj): - obj_id = None - obj_subtype = None - for attribute in misp_obj.attributes: - if attribute.object_relation == 'address': # # TODO: handle xmr address field - obj_id = attribute.value - elif attribute.object_relation == 'symbol': - obj_subtype = Cryptocurrency.get_cryptocurrency_type(attribute.value) - - # valid cryptocurrency type - if obj_subtype and obj_id: - obj_meta = get_object_metadata(misp_obj) - res = Cryptocurrency.cryptocurrency.create_correlation(obj_subtype, obj_id, obj_meta) - - map_uuid_global_id[misp_obj.uuid] = get_global_id('cryptocurrency', obj_id, obj_subtype=obj_subtype) + # TODO ail_objects import_misp_object(misp_obj) + pass + # + # obj_id = None + # obj_subtype = None + # for attribute in misp_obj.attributes: + # if attribute.object_relation == 'address': # # TODO: handle xmr address field + # obj_id = attribute.value + # elif attribute.object_relation == 'symbol': + # obj_subtype = Cryptocurrency.get_cryptocurrency_type(attribute.value) + # + # # valid cryptocurrency type + # if obj_subtype and obj_id: + # obj_meta = get_object_metadata(misp_obj) + # # res = Cryptocurrency.cryptocurrency.create_correlation(obj_subtype, obj_id, obj_meta) + # + # map_uuid_global_id[misp_obj.uuid] = get_global_id('cryptocurrency', obj_id, obj_subtype=obj_subtype) def get_obj_type_from_relationship(misp_obj): obj_uuid = misp_obj.uuid @@ -180,10 +181,12 @@ def unpack_file(map_uuid_global_id, misp_obj): if obj_id and io_content: obj_meta = get_object_metadata(misp_obj) if obj_type == 'screenshot': - Screenshot.create_screenshot(obj_id, obj_meta, io_content) + # TODO MIGRATE + REFACTOR ME + # Screenshot.create_screenshot(obj_id, obj_meta, io_content) map_uuid_global_id[misp_obj.uuid] = get_global_id('image', obj_id) else: #decoded - Decoded.create_decoded(obj_id, obj_meta, io_content) + # TODO MIGRATE + REFACTOR ME + # Decoded.create_decoded(obj_id, obj_meta, io_content) map_uuid_global_id[misp_obj.uuid] = get_global_id('decoded', obj_id) @@ -213,8 +216,7 @@ def create_obj_relationships(map_uuid_global_id, misp_obj): print(obj_meta_target) print('111111') - Correlate_object.create_obj_relationship(obj_meta_src['type'], obj_meta_src['id'], obj_meta_target['type'], obj_meta_target['id'], - obj1_subtype=obj_meta_src['subtype'], obj2_subtype=obj_meta_target['subtype']) + # TODO CREATE OBJ RELATIONSHIP def create_map_all_obj_uuid_golbal_id(map_uuid_global_id): for obj_uuid in map_uuid_global_id: diff --git a/bin/launch_queues.py b/bin/launch_queues.py index 9eac1a98..44e4e249 100755 --- a/bin/launch_queues.py +++ b/bin/launch_queues.py @@ -17,6 +17,7 @@ def check_pid(pid): return False return True + if __name__ == '__main__': configfile = os.path.join(os.environ['AIL_BIN'], 'packages/modules.cfg') if not os.path.exists(configfile): diff --git a/bin/lib/Config_DB.py b/bin/lib/Config_DB.py index 40998dd6..05be205e 100755 --- a/bin/lib/Config_DB.py +++ b/bin/lib/Config_DB.py @@ -12,7 +12,7 @@ import sys import redis sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import ConfigLoader +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_serv_db = config_loader.get_redis_conn("ARDB_DB") diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py deleted file mode 100755 index a17e2bd3..00000000 --- a/bin/lib/Correlate_object.py +++ /dev/null @@ -1,481 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import uuid -import redis - -from flask import url_for - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader -import Decoded -import Domain -import Screenshot -import Username - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Pgp -import Cryptocurrency -import Item - -config_loader = ConfigLoader.ConfigLoader() -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -baseurl = config_loader.get_config_str("Notifications", "ail_domain") -config_loader = None - -def is_valid_object_type(object_type): - if object_type in ['domain', 'item', 'image', 'decoded', 'pgp', 'cryptocurrency', 'username']: - return True - else: - return False - -def check_correlation_object(object): - if object in get_all_correlation_objects(): - return True - else: - return False - -def is_valid_object_subtype(object_type, object_subtype): - if object_type == 'pgp': - return Pgp.pgp.is_valid_obj_subtype(object_subtype) - elif object_type == 'cryptocurrency': - return Cryptocurrency.cryptocurrency.is_valid_obj_subtype(object_subtype) - elif object_type == 'username': - return Username.correlation.is_valid_obj_subtype(object_subtype) - elif object_subtype == None: - return True - else: - return False - -def get_all_objects(): - return ['domain', 'paste', 'pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username'] - -def get_all_correlation_names(): - ''' - Return a list of all available correlations - ''' - return ['pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username'] - -def get_all_correlation_objects(): - ''' - Return a list of all correllated objects - ''' - return ['domain', 'paste'] - -def exist_object(object_type, correlation_id, type_id=None): # => work on object level - if object_type == 'domain': - return Domain.verify_if_domain_exist(correlation_id) - elif object_type == 'paste' or object_type == 'item': - return Item.exist_item(correlation_id) - elif object_type == 'decoded': - return Decoded.exist_decoded(correlation_id) - elif object_type == 'pgp': - return Pgp.pgp.exist_correlation(type_id, correlation_id) - elif object_type == 'cryptocurrency': - return Cryptocurrency.cryptocurrency.exist_correlation(type_id, correlation_id) - elif object_type == 'username': - return Username.correlation.exist_correlation(type_id, correlation_id) - elif object_type == 'screenshot' or object_type == 'image': - return Screenshot.exist_screenshot(correlation_id) - else: - return False - -# request_type => api or ui -def get_object_metadata(object_type, correlation_id, type_id=None): - if object_type == 'domain': - return Domain.Domain(correlation_id).get_domain_metadata(tags=True) - elif object_type == 'paste' or object_type == 'item': - return Item.get_item({"id": correlation_id, "date": True, "date_separator": True, "tags": True})[0] - elif object_type == 'decoded': - return Decoded.get_decoded_metadata(correlation_id, nb_seen=True, size=True, file_type=True, tag=True) - elif object_type == 'pgp': - return Pgp.pgp.get_metadata(type_id, correlation_id) - elif object_type == 'cryptocurrency': - return Cryptocurrency.cryptocurrency.get_metadata(type_id, correlation_id) - elif object_type == 'username': - return Username.correlation.get_metadata(type_id, correlation_id) - elif object_type == 'screenshot' or object_type == 'image': - return Screenshot.get_metadata(correlation_id) - -def get_object_correlation(object_type, value, correlation_names=None, correlation_objects=None, requested_correl_type=None): - if object_type == 'domain': - return Domain.get_domain_all_correlation(value, correlation_names=correlation_names) - elif object_type == 'paste' or object_type == 'item': - return Item.get_item_all_correlation(value, correlation_names=correlation_names) - elif object_type == 'decoded': - return Decoded.get_decoded_correlated_object(value, correlation_objects=correlation_objects) - elif object_type == 'pgp': - return Pgp.pgp.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) - elif object_type == 'cryptocurrency': - return Cryptocurrency.cryptocurrency.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) - elif object_type == 'username': - return Username.correlation.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) - elif object_type == 'screenshot' or object_type == 'image': - return Screenshot.get_screenshot_correlated_object(value, correlation_objects=correlation_objects) - return {} - -def get_correlation_node_icon(correlation_name, correlation_type=None, value=None): - ''' - Used in UI Graph. - Return a font awesome icon for a given correlation_name. - - :param correlation_name: correlation name - :param correlation_name: str - :param correlation_type: correlation type - :type correlation_type: str, optional - - :return: a dictionnary {font awesome class, icon_code} - :rtype: dict - ''' - - icon_class = 'fas' - icon_text = '' - node_color = "#332288" - node_radius = 6 - if correlation_name == "pgp": - node_color = '#44AA99' - if correlation_type == 'key': - icon_text = '\uf084' - elif correlation_type == 'name': - icon_text = '\uf507' - elif correlation_type == 'mail': - icon_text = '\uf1fa' - else: - icon_text = 'times' - - elif correlation_name == 'cryptocurrency': - node_color = '#DDCC77' - if correlation_type == 'bitcoin': - icon_class = 'fab' - icon_text = '\uf15a' - elif correlation_type == 'monero': - icon_class = 'fab' - icon_text = '\uf3d0' - elif correlation_type == 'ethereum': - icon_class = 'fab' - icon_text = '\uf42e' - else: - icon_text = '\uf51e' - - elif correlation_name == 'username': - node_color = '#4dffff' - if correlation_type == 'telegram': - icon_class = 'fab' - icon_text = '\uf2c6' - elif correlation_type == 'twitter': - icon_class = 'fab' - icon_text = '\uf099' - elif correlation_type == 'jabber': - icon_class = 'fa' - icon_text = '\uf007' - else: - icon_class = 'fa' - icon_text = '\uf007' - - elif correlation_name == 'decoded': - node_color = '#88CCEE' - correlation_type = Decoded.get_decoded_item_type(value).split('/')[0] - if correlation_type == 'application': - icon_text = '\uf15b' - elif correlation_type == 'audio': - icon_text = '\uf1c7' - elif correlation_type == 'image': - icon_text = '\uf1c5' - elif correlation_type == 'text': - icon_text = '\uf15c' - else: - icon_text = '\uf249' - - elif correlation_name == 'screenshot' or correlation_name == 'image': - node_color = '#E1F5DF' - icon_text = '\uf03e' - - elif correlation_name == 'domain': - node_radius = 5 - node_color = '#3DA760' - if Domain.get_domain_type(value) == 'onion': - icon_text = '\uf06e' - else: - icon_class = 'fab' - icon_text = '\uf13b' - - elif correlation_name == 'paste': - node_radius = 5 - if Item.is_crawled(value): - node_color = 'red' - else: - node_color = '#332288' - - return {"icon_class": icon_class, "icon_text": icon_text, "node_color": node_color, "node_radius": node_radius} - -# flask_context: if this function is used with a Flask app context -def get_item_url(correlation_name, value, correlation_type=None, flask_context=True): - ''' - Warning: use only in flask - ''' - url = '#' - if correlation_name == "pgp": - if flask_context: - endpoint = 'correlation.show_correlation' - url = url_for(endpoint, object_type="pgp", type_id=correlation_type, correlation_id=value) - else: - url = f'{baseurl}/correlation/show_correlation?object_type={correlation_name}&type_id={correlation_type}&correlation_id={value}' - elif correlation_name == 'cryptocurrency': - if flask_context: - endpoint = 'correlation.show_correlation' - url = url_for(endpoint, object_type="cryptocurrency", type_id=correlation_type, correlation_id=value) - else: - url = f'{baseurl}/correlation/show_correlation?object_type={correlation_name}&type_id={correlation_type}&correlation_id={value}' - elif correlation_name == 'username': - if flask_context: - endpoint = 'correlation.show_correlation' - url = url_for(endpoint, object_type="username", type_id=correlation_type, correlation_id=value) - else: - url = f'{baseurl}/correlation/show_correlation?object_type={correlation_name}&type_id={correlation_type}&correlation_id={value}' - elif correlation_name == 'decoded': - if flask_context: - endpoint = 'correlation.show_correlation' - url = url_for(endpoint, object_type="decoded", correlation_id=value) - else: - url = f'{baseurl}/correlation/show_correlation?object_type={correlation_name}&correlation_id={value}' - elif correlation_name == 'screenshot' or correlation_name == 'image': ### # TODO: rename me - if flask_context: - endpoint = 'correlation.show_correlation' - url = url_for(endpoint, object_type="screenshot", correlation_id=value) - else: - url = f'{baseurl}/correlation/show_correlation?object_type={correlation_name}&correlation_id={value}' - elif correlation_name == 'domain': - if flask_context: - endpoint = 'crawler_splash.showDomain' - url = url_for(endpoint, domain=value) - else: - url = f'{baseurl}/crawlers/showDomain?domain={value}' - elif correlation_name == 'item' or correlation_name == 'paste': ### # TODO: remove paste - if flask_context: - endpoint = 'objects_item.showItem' - url = url_for(endpoint, id=value) - else: - url = f'{baseurl}/object/item?id={value}' - #print(url) - return url - -def get_obj_tag_table_keys(object_type): - ''' - Warning: use only in flask (dynamic templates) - ''' - if object_type=="domain": - return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot - -def create_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype=None, obj2_subtype=None): - if obj1_type == 'domain': - pass - elif obj1_type == 'item': - pass # son/father + duplicate + domain - elif obj1_type == 'pgp': - Pgp.pgp.save_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id) - elif obj1_type == 'cryptocurrency': - Cryptocurrency.cryptocurrency.save_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id) - elif obj1_type == 'decoded': - Decoded.save_obj_relationship(obj1_id, obj2_type, obj2_id) - elif obj1_type == 'image': - Screenshot.save_obj_relationship(obj1_id, obj2_type, obj2_id) - -def delete_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype=None, obj2_subtype=None): - if obj1_type == 'domain': - pass - elif obj1_type == 'item': - pass # son/father + duplicate + domain - elif obj1_type == 'pgp': - Pgp.pgp.delete_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id) - elif obj1_type == 'cryptocurrency': - Cryptocurrency.cryptocurrency.delete_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id) - elif obj1_type == 'decoded': - Decoded.delete_obj_relationship(obj1_id, obj2_type, obj2_id) - elif obj1_type == 'image': - Screenshot.delete_obj_relationship(obj1_id, obj2_type, obj2_id) - -def create_graph_links(links_set): - graph_links_list = [] - for link in links_set: - graph_links_list.append({"source": link[0], "target": link[1]}) - return graph_links_list - -def create_graph_nodes(nodes_set, root_node_id, flask_context=True): - graph_nodes_list = [] - for node_id in nodes_set: - correlation_name, correlation_type, value = node_id.split(';', 3) - dict_node = {"id": node_id} - dict_node['style'] = get_correlation_node_icon(correlation_name, correlation_type, value) - dict_node['text'] = value - if node_id == root_node_id: - dict_node["style"]["node_color"] = 'orange' - dict_node["style"]["node_radius"] = 7 - dict_node['url'] = get_item_url(correlation_name, value, correlation_type, flask_context=flask_context) - graph_nodes_list.append(dict_node) - return graph_nodes_list - -def create_node_id(correlation_name, value, correlation_type=''): - if correlation_type is None: - correlation_type = '' - return '{};{};{}'.format(correlation_name, correlation_type, value) - - - -# # TODO: filter by correlation type => bitcoin, mail, ... -def get_graph_node_object_correlation(object_type, root_value, mode, correlation_names, correlation_objects, max_nodes=300, requested_correl_type=None, flask_context=True): - links = set() - nodes = set() - - root_node_id = create_node_id(object_type, root_value, requested_correl_type) - nodes.add(root_node_id) - - root_correlation = get_object_correlation(object_type, root_value, correlation_names, correlation_objects, requested_correl_type=requested_correl_type) - for correl in root_correlation: - if correl in ('pgp', 'cryptocurrency', 'username'): - for correl_type in root_correlation[correl]: - for correl_val in root_correlation[correl][correl_type]: - - # add correlation - correl_node_id = create_node_id(correl, correl_val, correl_type) - - if mode=="union": - if len(nodes) > max_nodes: - break - nodes.add(correl_node_id) - links.add((root_node_id, correl_node_id)) - - # get second correlation - res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects, requested_correl_type=correl_type) - if res: - for corr_obj in res: - for correl_key_val in res[corr_obj]: - #filter root value - if correl_key_val == root_value: - continue - - if len(nodes) > max_nodes: - break - new_corel_1 = create_node_id(corr_obj, correl_key_val) - new_corel_2 = create_node_id(correl, correl_val, correl_type) - nodes.add(new_corel_1) - nodes.add(new_corel_2) - links.add((new_corel_1, new_corel_2)) - - if mode=="inter": - nodes.add(correl_node_id) - links.add((root_node_id, correl_node_id)) - if correl in ('decoded', 'screenshot', 'domain', 'paste'): - for correl_val in root_correlation[correl]: - - correl_node_id = create_node_id(correl, correl_val) - if mode=="union": - if len(nodes) > max_nodes: - break - nodes.add(correl_node_id) - links.add((root_node_id, correl_node_id)) - - res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects) - if res: - for corr_obj in res: - if corr_obj in ('decoded', 'domain', 'paste', 'screenshot'): - for correl_key_val in res[corr_obj]: - #filter root value - if correl_key_val == root_value: - continue - - if len(nodes) > max_nodes: - break - new_corel_1 = create_node_id(corr_obj, correl_key_val) - new_corel_2 = create_node_id(correl, correl_val) - nodes.add(new_corel_1) - nodes.add(new_corel_2) - links.add((new_corel_1, new_corel_2)) - - if mode=="inter": - nodes.add(correl_node_id) - links.add((root_node_id, correl_node_id)) - - if corr_obj in ('pgp', 'cryptocurrency', 'username'): - for correl_key_type in res[corr_obj]: - for correl_key_val in res[corr_obj][correl_key_type]: - #filter root value - if correl_key_val == root_value: - continue - - if len(nodes) > max_nodes: - break - new_corel_1 = create_node_id(corr_obj, correl_key_val, correl_key_type) - new_corel_2 = create_node_id(correl, correl_val) - nodes.add(new_corel_1) - nodes.add(new_corel_2) - links.add((new_corel_1, new_corel_2)) - - if mode=="inter": - nodes.add(correl_node_id) - links.add((root_node_id, correl_node_id)) - - - return {"nodes": create_graph_nodes(nodes, root_node_id, flask_context=flask_context), "links": create_graph_links(links)} - - -def get_obj_global_id(obj_type, obj_id, obj_sub_type=None): - if obj_sub_type: - return '{}:{}:{}'.format(obj_type, obj_sub_type, obj_id) - else: - # # TODO: remove me - if obj_type=='paste': - obj_type='item' - # # TODO: remove me - if obj_type=='screenshot': - obj_type='image' - - return '{}:{}'.format(obj_type, obj_id) - - - -def sanitise_correlation_names(correlation_names): - ''' - correlation_names ex = 'pgp,crypto' - ''' - all_correlation_names = get_all_correlation_names() - if correlation_names is None: - return all_correlation_names - else: - l_correlation_names = [] - for correl in correlation_names.split(','): - if correl in all_correlation_names: - l_correlation_names.append(correl) - if l_correlation_names: - return l_correlation_names - else: - return all_correlation_names - -def sanitise_correlation_objects(correlation_objects): - ''' - correlation_objects ex = 'domain,paste' - ''' - all_correlation_objects = get_all_correlation_objects() - if correlation_objects is None: - return all_correlation_objects - else: - l_correlation_objects = [] - for correl in correlation_objects.split(','): - if correl in all_correlation_objects: - l_correlation_objects.append(correl) - if l_correlation_objects: - return l_correlation_objects - else: - return all_correlation_objects - -######## API EXPOSED ######## -def api_check_correlation_objects(l_object): - for object in l_object: - if not check_correlation_object(object): - return ({"error": f"Invalid Object: {object}"}, 400) - -def sanitize_object_type(object_type): - if not is_valid_object_type(object_type): - return ({'status': 'error', 'reason': 'Incorrect object_type'}, 400) -######## ######## diff --git a/bin/lib/Decoded.py b/bin/lib/Decoded.py deleted file mode 100755 index 57d59256..00000000 --- a/bin/lib/Decoded.py +++ /dev/null @@ -1,407 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import magic -import sys -import redis - -from io import BytesIO - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import Item -import Date - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import Tag - -import ConfigLoader - -config_loader = ConfigLoader.ConfigLoader() -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -HASH_DIR = config_loader.get_config_str('Directories', 'hash') -config_loader = None - -# # TODO: move me in another file -def get_all_correlation_objects(): - ''' - Return a list of all correllated objects - ''' - return ['domain', 'paste'] - -def get_all_decoder(): - return ['base64', 'binary', 'hexadecimal'] - -# TODO: # REVIEW: default => base64 -def sanitize_decoder_name(decoder_name): - if decoder_name in get_all_decoder(): - return decoder_name - else: - return 'base64' - -def get_decoded_item_type(sha1_string): - ''' - Retun the estimed type of a given decoded item. - - :param sha1_string: sha1_string - ''' - return r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'estimated_type') - -def get_file_mimetype(bytes_content): - return magic.from_buffer(bytes_content, mime=True) - -def nb_decoded_seen_in_item(sha1_string): - nb = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'nb_seen_in_all_pastes') - if nb is None: - return 0 - else: - return int(nb) - -def nb_decoded_item_size(sha1_string): - nb = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'size') - if nb is None: - return 0 - else: - return int(nb) - -def get_decoded_relative_path(sha1_string, mimetype=None): - if not mimetype: - mimetype = get_decoded_item_type(sha1_string) - return os.path.join(HASH_DIR, mimetype, sha1_string[0:2], sha1_string) - -def get_decoded_filepath(sha1_string, mimetype=None): - return os.path.join(os.environ['AIL_HOME'], get_decoded_relative_path(sha1_string, mimetype=mimetype)) - -def exist_decoded(sha1_string): - return r_serv_metadata.exists('metadata_hash:{}'.format(sha1_string)) - -def get_decoded_first_seen(sha1_string, r_int=False): - res = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'first_seen') - if res: - res = res.replace('/', '') - if r_int: - if res: - return int(res) - else: - return 99999999 - return res - -def get_decoded_last_seen(sha1_string, r_int=False): - res = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'last_seen') - if res: - res = res.replace('/', '') - if r_int: - if res: - return int(res) - else: - return 0 - return res - -def get_decoded_metadata(sha1_string, nb_seen=False, size=False, file_type=False, tag=False): - metadata_dict = {} - metadata_dict['first_seen'] = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'first_seen') - metadata_dict['last_seen'] = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'last_seen') - if nb_seen: - metadata_dict['nb_seen'] = nb_decoded_seen_in_item(sha1_string) - if size: - metadata_dict['size'] = nb_decoded_item_size(sha1_string) - if file_type: - metadata_dict['file_type'] = get_decoded_item_type(sha1_string) - if tag: - metadata_dict['tags'] = get_decoded_tag(sha1_string) - return metadata_dict - -def get_decoded_tag(sha1_string): - return Tag.get_object_tags('decoded', sha1_string) - -def get_list_nb_previous_hash(sha1_string, num_day): - nb_previous_hash = [] - for date_day in Date.get_previous_date_list(num_day): - nb_previous_hash.append(get_nb_hash_seen_by_date(sha1_string, date_day)) - return nb_previous_hash - -def get_nb_hash_seen_by_date(sha1_string, date_day): - nb = r_serv_metadata.zscore('hash_date:{}'.format(date_day), sha1_string) - if nb is None: - return 0 - else: - return int(nb) - -def get_decoded_vt_report(sha1_string): - vt_dict = {} - res = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'vt_link') - if res: - vt_dict["link"] = res - res = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'vt_report') - if res: - vt_dict["report"] = res - return vt_dict - - -def get_decoded_items_list(sha1_string): - return r_serv_metadata.zrange('nb_seen_hash:{}'.format(sha1_string), 0, -1) - -def get_item_decoded(item_id): - ''' - Retun all decoded item of a given item id. - - :param item_id: item id - ''' - res = r_serv_metadata.smembers('hash_paste:{}'.format(item_id)) - if res: - return list(res) - else: - return [] - -def get_domain_decoded_item(domain): - ''' - Retun all decoded item of a given domain. - - :param domain: crawled domain - ''' - res = r_serv_metadata.smembers('hash_domain:{}'.format(domain)) - if res: - return list(res) - else: - return [] - -def get_decoded_domain_item(sha1_string): - ''' - Retun all domain of a given decoded item. - - :param sha1_string: sha1_string - ''' - res = r_serv_metadata.smembers('domain_hash:{}'.format(sha1_string)) - if res: - return list(res) - else: - return [] - -def get_decoded_correlated_object(sha1_string, correlation_objects=[]): - ''' - Retun all correlation of a given sha1. - - :param sha1_string: sha1 - :type sha1_string: str - - :return: a dict of all correlation for a given sha1 - :rtype: dict - ''' - if not correlation_objects: - correlation_objects = get_all_correlation_objects() - decoded_correlation = {} - for correlation_object in correlation_objects: - if correlation_object == 'paste': - res = get_decoded_items_list(sha1_string) - elif correlation_object == 'domain': - res = get_decoded_domain_item(sha1_string) - else: - res = None - if res: - decoded_correlation[correlation_object] = res - return decoded_correlation - -# # TODO: add delete -# delete stats -def create_decoder_matadata(sha1_string, item_id, decoder_type): - estimated_type = get_decoded_item_type(sha1_string) - if not estimated_type: - print('error, unknow sha1_string') - decoder_type = sanitize_decoder_name(decoder_type) - item_date = Item.get_item_date(item_id) - - r_serv_metadata.incrby('{}_decoded:{}'.format(decoder_type, item_date), 1) - r_serv_metadata.zincrby('{}_date:{}'.format(decoder_type, item_date), sha1_string, 1) - - # first time we see this hash encoding on this item - if r_serv_metadata.zscore('{}_hash:{}'.format(decoder_type, sha1_string), item_id) is None: - - # create hash metadata - r_serv_metadata.sadd('hash_{}_all_type'.format(decoder_type), estimated_type) - - # first time we see this hash encoding today - if r_serv_metadata.zscore('{}_date:{}'.format(decoder_type, item_date), sha1_string) is None: - r_serv_metadata.zincrby('{}_type:{}'.format(decoder_type, estimated_type), item_date, 1) # # TODO: # DUP1 - - r_serv_metadata.hincrby('metadata_hash:{}'.format(sha1_string), '{}_decoder'.format(decoder_type), 1) - r_serv_metadata.zincrby('{}_type:{}'.format(decoder_type, estimated_type), item_date, 1) # # TODO: # DUP1 - - r_serv_metadata.zincrby('{}_hash:{}'.format(decoder_type, sha1_string), item_id, 1) # number of b64 on this paste - -# # # TODO: check if item and decoded exist -def save_item_relationship(sha1_string, item_id): - estimated_type = get_decoded_item_type(sha1_string) - if not estimated_type: - print('error, unknow sha1_string') - - item_date = Item.get_item_date(item_id) - - r_serv_metadata.zincrby('hash_date:{}'.format(item_date), sha1_string, 1) - - update_decoded_daterange(sha1_string, item_date) - - # first time we see this hash (all encoding) on this item - if r_serv_metadata.zscore('nb_seen_hash:{}'.format(sha1_string), item_id) is None: - r_serv_metadata.hincrby('metadata_hash:{}'.format(sha1_string), 'nb_seen_in_all_pastes', 1) #### MOVE IT ???? - - # # FIXME: - r_serv_metadata.zincrby('nb_seen_hash:{}'.format(sha1_string), item_id, 1)# hash - paste map - r_serv_metadata.sadd('hash_paste:{}'.format(item_id), sha1_string) # item - hash map - - # domain - if Item.is_crawled(item_id): - domain = Item.get_item_domain(item_id) - save_domain_relationship(domain, sha1_string) - -def delete_item_relationship(sha1_string, item_id): - item_date = Item.get_item_date(item_id) - - #update_decoded_daterange(sha1_string, item_date) 3 # TODO: - r_serv_metadata.srem('hash_paste:{}'.format(item_id), sha1_string) # item - hash map - - res = r_serv_metadata.zincrby('hash_date:{}'.format(item_date), sha1_string, -1) - if int(res) < 1: - r_serv_metadata.zrem('hash_date:{}'.format(item_date), sha1_string) - - res = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'nb_seen_in_all_pastes') - if int(res) > 0: - r_serv_metadata.hincrby('metadata_hash:{}'.format(sha1_string), 'nb_seen_in_all_pastes', -1) - - res = r_serv_metadata.zincrby('nb_seen_hash:{}'.format(sha1_string), item_id, 1)# hash - paste map - if int(res) < 1: - r_serv_metadata.zrem('nb_seen_hash:{}'.format(sha1_string), item_id) - -def save_domain_relationship(domain, sha1_string): - r_serv_metadata.sadd('hash_domain:{}'.format(domain), sha1_string) # domain - hash map - r_serv_metadata.sadd('domain_hash:{}'.format(sha1_string), domain) # hash - domain map - -def delete_domain_relationship(domain, sha1_string): - r_serv_metadata.srem('hash_domain:{}'.format(domain), sha1_string) # domain - hash map - r_serv_metadata.srem('domain_hash:{}'.format(sha1_string), domain) # hash - domain map - -def update_decoded_daterange(obj_id, new_date): - new_date = int(new_date) - new_date_str = str(new_date) - new_date_str = '{}/{}/{}'.format(new_date_str[0:4], new_date_str[4:6], new_date_str[6:8]) - # obj_id don't exit - if not r_serv_metadata.hexists('metadata_hash:{}'.format(obj_id), 'first_seen'): - r_serv_metadata.hset('metadata_hash:{}'.format(obj_id), 'first_seen', new_date_str) - r_serv_metadata.hset('metadata_hash:{}'.format(obj_id), 'last_seen', new_date_str) - else: - first_seen = get_decoded_first_seen(obj_id, r_int=True) - last_seen = get_decoded_last_seen(obj_id, r_int=True) - if new_date < first_seen: - r_serv_metadata.hset('metadata_hash:{}'.format(obj_id), 'first_seen', new_date_str) - if new_date > last_seen: - r_serv_metadata.hset('metadata_hash:{}'.format(obj_id), 'last_seen', new_date_str) - -def save_obj_relationship(obj_id, referenced_obj_type, referenced_obj_id): - if referenced_obj_type == 'domain': - save_domain_relationship(referenced_obj_id, obj_id) - elif referenced_obj_type == 'item': - save_item_relationship(obj_id, referenced_obj_id) - -def delete_obj_relationship(obj_id, referenced_obj_type, referenced_obj_id): - if referenced_obj_type == 'domain': - delete_domain_relationship(referenced_obj_id, obj_id) - elif referenced_obj_type == 'item': - delete_item_relationship(obj_id, referenced_obj_id) - -def get_decoded_file_content(sha1_string, mimetype=None): - filepath = get_decoded_filepath(sha1_string, mimetype=mimetype) - with open(filepath, 'rb') as f: - file_content = BytesIO(f.read()) - return file_content - -# # TODO: check file format -def save_decoded_file_content(sha1_string, file_content, date_from, date_to=None, mimetype=None): - if not mimetype: - if exist_decoded(sha1_string): - mimetype = get_decoded_item_type(sha1_string) - else: - mimetype = get_file_mimetype(file_content) - - filepath = get_decoded_filepath(sha1_string, mimetype=mimetype) - if os.path.isfile(filepath): - #print('File already exist') - return False - - # create dir - dirname = os.path.dirname(filepath) - if not os.path.exists(dirname): - os.makedirs(dirname) - - with open(filepath, 'wb') as f: - f.write(file_content) - - # create hash metadata - r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'size', os.path.getsize(filepath)) - r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'estimated_type', mimetype) - r_serv_metadata.sadd('hash_all_type', mimetype) - - update_decoded_daterange(sha1_string, date_from) - if date_from != date_to and date_to: - update_decoded_daterange(sha1_string, date_to) - - return True - -def delete_decoded_file(obj_id): - filepath = get_decoded_filepath(obj_id) - if not os.path.isfile(filepath): - return False - - Tag.delete_obj_tags(obj_id, 'decoded', Tag.get_obj_tag(obj_id)) ############# - os.remove(filepath) - return True - -def create_decoded(obj_id, obj_meta, io_content): - first_seen = obj_meta.get('first_seen', None) - last_seen = obj_meta.get('last_seen', None) - date_range = Date.sanitise_date_range(first_seen, last_seen, separator='', date_type='datetime') - decoded_file_content = io_content.getvalue() - - res = save_decoded_file_content(obj_id, decoded_file_content, date_range['date_from'], date_to=date_range['date_to'], mimetype=None) - if res and 'tags' in obj_meta: - Tag.api_add_obj_tags(tags=obj_meta['tags'], object_id=obj_id, object_type="decoded") - -def delete_decoded(obj_id): - if not exist_decoded(obj_id): - return False - - res = delete_decoded_file(obj_id) - if not res: - return False - - obj_correlations = get_decoded_correlated_object(obj_id) - if 'domain' in obj_correlations: - for domain in obj_correlations['domain']: - r_serv_metadata.srem('hash_domain:{}'.format(domain), obj_id) - r_serv_metadata.delete('domain_hash:{}'.format(obj_id), domain) - - if 'paste' in obj_correlations: # TODO: handle item - for item_id in obj_correlations['paste']: - item_date = Item.get_item_date(item_id) - - r_serv_metadata.zrem('hash_date:{}'.format(item_date), obj_id) - r_serv_metadata.srem('hash_paste:{}'.format(item_id), obj_id) - for decoder_name in get_all_decoder(): - - r_serv_metadata.incrby('{}_decoded:{}'.format(decoder_name, item_date), -1) - r_serv_metadata.zrem('{}_date:{}'.format(decoder_name, item_date), obj_id) - - for decoder_name in get_all_decoder(): - r_serv_metadata.delete('{}_hash:{}'.format(decoder_name, obj_id)) - - r_serv_metadata.delete('nb_seen_hash:{}'.format(obj_id)) - - - ####### # TODO: DUP1 - #r_serv_metadata.zincrby('{}_type:{}'.format(decoder_type, estimated_type), item_date, 1) - ####### - - ### - #r_serv_metadata.sadd('hash_{}_all_type'.format(decoder_type), estimated_type) - #r_serv_metadata.sadd('hash_all_type', estimated_type) - ### - - r_serv_metadata.delete('metadata_hash:{}'.format(obj_id)) diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py deleted file mode 100755 index 9e1f191c..00000000 --- a/bin/lib/Domain.py +++ /dev/null @@ -1,915 +0,0 @@ -#!/usr/bin/python3 - -""" -The ``Domain`` -=================== - - -""" - -import os -import sys -import itertools -import re -import redis -import random -import time - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Date -import Item -import Tag - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader -import Tag - -import Language -import Screenshot -import Username - -config_loader = ConfigLoader.ConfigLoader() -r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") -config_loader = None - - -######## DB KEYS ######## -def get_db_keys_domain_up(domain_type, date_type): # sanitise domain_type - # get key name - if date_type=='day': - key_value = "{}_up:".format(domain_type) - key_value += "{}" - elif date_type=='month': - key_value = "month_{}_up:".format(domain_type) - key_value += "{}" - else: - key_value = None - return key_value - -def get_list_db_keys_domain_up(domain_type, l_dates, date_type): - l_keys_name = [] - if domain_type=='all': - domains_types = get_all_domains_type() - else: - domains_types = [domain_type] - - for dom_type in domains_types: - key_name = get_db_keys_domain_up(dom_type, date_type) - if key_name: - for str_date in l_dates: - l_keys_name.append(key_name.format(str_date)) - return l_keys_name - -######## UTIL ######## -def sanitize_domain_type(domain_type): - if domain_type in get_all_domains_type(): - return domain_type - else: - return 'regular' - -def sanitize_domain_types(l_domain_type): - all_domain_types = get_all_domains_type() - if not l_domain_type: - return all_domain_types - for domain_type in l_domain_type: - if domain_type not in all_domain_types: - return all_domain_types - return l_domain_type - -######## DOMAINS ######## -def get_all_domains_type(): - return ['onion', 'regular'] - -def get_all_domains_up(domain_type, r_list=True): - ''' - Get all domain up (at least one time) - - :param domain_type: domain type - :type domain_type: str - - :return: list of domain - :rtype: list - ''' - domains = r_serv_onion.smembers("full_{}_up".format(domain_type)) - if r_list: - if domains: - list(domains) - else: - domains = [] - return domains - -def get_domains_up_by_month(date_year_month, domain_type, rlist=False): - ''' - Get all domain up (at least one time) - - :param domain_type: date_year_month YYYYMM - :type domain_type: str - - :return: list of domain - :rtype: list - ''' - res = r_serv_onion.smembers( get_db_keys_domain_up(domain_type, "month").format(date_year_month) ) - if rlist: - return list(res) - else: - return res - -def get_domain_up_by_day(date_year_month, domain_type, rlist=False): - ''' - Get all domain up (at least one time) - - :param domain_type: date YYYYMMDD - :type domain_type: str - - :return: list of domain - :rtype: list - ''' - res = r_serv_onion.smembers(get_db_keys_domain_up(domain_type, "day").format(date_year_month)) - if rlist: - return list(res) - else: - return res - -def get_domains_up_by_daterange(date_from, date_to, domain_type): - ''' - Get all domain up (at least one time) by daterange - - :param domain_type: domain_type - :type domain_type: str - - :return: list of domain - :rtype: list - ''' - days_list, month_list = Date.get_date_range_full_month_and_days(date_from, date_to) - l_keys_name = get_list_db_keys_domain_up(domain_type, days_list, 'day') - l_keys_name.extend(get_list_db_keys_domain_up(domain_type, month_list, 'month')) - - if len(l_keys_name) > 1: - domains_up = list(r_serv_onion.sunion(l_keys_name[0], *l_keys_name[1:])) - elif l_keys_name: - domains_up = list(r_serv_onion.smembers(l_keys_name[0])) - else: - domains_up = [] - return domains_up - -# Retun last crawled domains by type -# domain;epoch -def get_last_crawled_domains(domain_type): - return r_serv_onion.lrange('last_{}'.format(domain_type), 0 ,-1) - -def paginate_iterator(iter_elems, nb_obj=50, page=1): - dict_page = {} - dict_page['nb_all_elem'] = len(iter_elems) - nb_pages = dict_page['nb_all_elem'] / nb_obj - if not nb_pages.is_integer(): - nb_pages = int(nb_pages)+1 - else: - nb_pages = int(nb_pages) - if page > nb_pages: - page = nb_pages - - # multiple pages - if nb_pages > 1: - dict_page['list_elem'] = [] - start = nb_obj*(page -1) - stop = (nb_obj*page) -1 - current_index = 0 - for elem in iter_elems: - if current_index > stop: - break - if start <= current_index and stop >= current_index: - dict_page['list_elem'].append(elem) - current_index += 1 - stop += 1 - if stop > dict_page['nb_all_elem']: - stop = dict_page['nb_all_elem'] - - else: - start = 0 - stop = dict_page['nb_all_elem'] - dict_page['list_elem'] = list(iter_elems) - dict_page['page'] = page - dict_page['nb_pages'] = nb_pages - # UI - dict_page['nb_first_elem'] = start+1 - dict_page['nb_last_elem'] = stop - return dict_page - -def domains_up_by_page(domain_type, nb_obj=28, page=1): - ''' - Get a list of domains up (alpha sorted) - - :param domain_type: domain type - :type domain_type: str - - :return: list of domain - :rtype: list - ''' - domains = sorted(get_all_domains_up(domain_type, r_list=False)) - domains = paginate_iterator(domains, nb_obj=nb_obj, page=page) - domains['list_elem'] = create_domains_metadata_list(domains['list_elem'], domain_type) - return domains - -def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[], nb_obj=28, page=1): - if not tags: - if not date_from and not date_to: - return domains_up_by_page(domain_type, nb_obj=nb_obj, page=page) - else: - domains = sorted(get_domains_up_by_daterange(date_from, date_to, domain_type)) - domains = paginate_iterator(domains, nb_obj=nb_obj, page=page) - domains['list_elem'] = create_domains_metadata_list(domains['list_elem'], domain_type) - domains['domain_type'] = domain_type - domains['date_from'] = date_from - domains['date_to'] = date_to - return domains - else: - return None - - - -## TODO: filters: -# - tags -# - languages -# - daterange UP -def get_domains_by_filters(): - pass - -def create_domains_metadata_list(list_domains, domain_type, tags=True): - - # # TODO: - # tags => optional - # last check timestamp - - l_domains = [] - for domain in list_domains: - if domain_type=='all': - dom_type = get_domain_type(domain) - else: - dom_type = domain_type - - l_domains.append(get_domain_metadata(domain, dom_type, first_seen=True, last_ckeck=True, status=True, - ports=True, tags=tags, languages=True, screenshot=True, tags_safe=True)) - return l_domains - -def sanithyse_domain_name_to_search(name_to_search, domain_type): - if domain_type == 'onion': - r_name = r'[a-z0-9\.]+' - else: - r_name = r'[a-zA-Z0-9-_\.]+' - # invalid domain name - if not re.fullmatch(r_name, name_to_search): - res = re.match(r_name, name_to_search) - return {'search': name_to_search, 'error': res.string.replace( res[0], '')} - return name_to_search.replace('.', '\.') - - -def search_domains_by_name(name_to_search, domain_types, r_pos=False): - domains_dict = {} - for domain_type in domain_types: - r_name = sanithyse_domain_name_to_search(name_to_search, domain_type) - if not name_to_search or isinstance(r_name, dict): - break - r_name = re.compile(r_name) - for domain in get_all_domains_up(domain_type): - res = re.search(r_name, domain) - if res: - domains_dict[domain] = {} - if r_pos: - domains_dict[domain]['hl-start'] = res.start() - domains_dict[domain]['hl-end'] = res.end() - return domains_dict - -def api_sanithyse_domain_name_to_search(name_to_search, domains_types): - domains_types = sanitize_domain_types(domains_types) - for domain_type in domains_types: - r_name = sanithyse_domain_name_to_search(name_to_search, domain_type) - if isinstance(r_name, dict): - return ({'error': 'Invalid'}, 400) - - -def api_search_domains_by_name(name_to_search, domains_types, domains_metadata=False, page=1): - domains_types = sanitize_domain_types(domains_types) - domains_dict = search_domains_by_name(name_to_search, domains_types, r_pos=True) - l_domains = sorted(domains_dict.keys()) - l_domains = paginate_iterator(l_domains, nb_obj=28, page=page) - if not domains_metadata: - return l_domains - else: - l_dict_domains = [] - for domain in l_domains['list_elem']: - dict_domain = get_domain_metadata(domain, get_domain_type(domain), first_seen=True, last_ckeck=True, - status=True, ports=True, tags=True, tags_safe=True, - languages=True, screenshot=True) - dict_domain = {**domains_dict[domain], **dict_domain} - l_dict_domains.append(dict_domain) - l_domains['list_elem'] = l_dict_domains - l_domains['search'] = name_to_search - return l_domains - - -######## LANGUAGES ######## -def get_all_domains_languages(): - return r_serv_onion.smembers('all_domains_languages') - -def get_domains_by_languages(languages, l_domain_type=[]): - l_domain_type = sanitize_domain_types(l_domain_type) - if not languages: - return [] - elif len(languages) == 1: - return get_all_domains_by_language(languages[0], l_domain_type=l_domain_type) - else: - all_domains_t = [] - for domain_type in l_domain_type: - l_keys_name = [] - for language in languages: - l_keys_name.append('language:domains:{}:{}'.format(domain_type, language)) - res = r_serv_onion.sinter(l_keys_name[0], *l_keys_name[1:]) - if res: - all_domains_t.append(res) - return list(itertools.chain.from_iterable(all_domains_t)) - -def get_all_domains_by_language(language, l_domain_type=[]): - l_domain_type = sanitize_domain_types(l_domain_type) - if len(l_domain_type) == 1: - return r_serv_onion.smembers('language:domains:{}:{}'.format(l_domain_type[0], language)) - else: - l_keys_name = [] - for domain_type in l_domain_type: - l_keys_name.append('language:domains:{}:{}'.format(domain_type, language)) - return r_serv_onion.sunion(l_keys_name[0], *l_keys_name[1:]) - -def get_domain_languages(domain, r_list=False): - res = r_serv_onion.smembers('domain:language:{}'.format(domain)) - if r_list: - return list(res) - else: - return res - -def add_domain_language(domain, language): - language = language.split('-')[0] - domain_type = get_domain_type(domain) - r_serv_onion.sadd('all_domains_languages', language) - r_serv_onion.sadd('all_domains_languages:{}'.format(domain_type), language) - r_serv_onion.sadd('language:domains:{}:{}'.format(domain_type, language), domain) - r_serv_onion.sadd('domain:language:{}'.format(domain), language) - -def add_domain_languages_by_item_id(domain, item_id): - for lang in Item.get_item_languages(item_id, min_proportion=0.2, min_probability=0.8): - add_domain_language(domain, lang.language) - -def delete_domain_languages(domain): - domain_type = get_domain_type(domain) - for language in get_domain_languages(domain): - r_serv_onion.srem('language:domains:{}:{}'.format(domain_type, language), domain) - if not r_serv_onion.exists('language:domains:{}:{}'.format(domain_type, language)): - r_serv_onion.srem('all_domains_languages:{}'.format(domain_type), language) - exist_domain_type_lang = False - for domain_type in get_all_domains_type(): - if r_serv_onion.sismembers('all_domains_languages:{}'.format(domain_type), language): - exist_domain_type_lang = True - continue - if not exist_domain_type_lang: - r_serv_onion.srem('all_domains_languages', language) - r_serv_onion.delete('domain:language:{}'.format(domain)) - -def _delete_all_domains_languages(): - for language in get_all_domains_languages(): - for domain in get_all_domains_by_language(language): - delete_domain_languages(domain) - -## API ## -## TODO: verify domains type + languages list -## TODO: add pagination -def api_get_domains_by_languages(domains_types, languages, domains_metadata=False, page=1): - l_domains = sorted(get_domains_by_languages(languages, l_domain_type=domains_types)) - l_domains = paginate_iterator(l_domains, nb_obj=28, page=page) - if not domains_metadata: - return l_domains - else: - l_dict_domains = [] - for domain in l_domains['list_elem']: - l_dict_domains.append(get_domain_metadata(domain, get_domain_type(domain), first_seen=True, last_ckeck=True, - status=True, ports=True, tags=True, tags_safe=True, - languages=True, screenshot=True)) - l_domains['list_elem'] = l_dict_domains - return l_domains -####---- ----#### - -######## DOMAIN ######## - -def get_domain_type(domain): - if str(domain).endswith('.onion'): - return 'onion' - else: - return 'regular' - -def sanathyse_port(port, domain, domain_type, strict=False, current_port=None): - ''' - Retun a port number, If the port number is invalid, a port of the provided domain is randomly selected - ''' - try: - port = int(port) - except (TypeError, ValueError): - if strict: - port = current_port - else: - port = get_random_domain_port(domain, domain_type) - return port - -def domain_was_up(domain, domain_type): - return r_serv_onion.hexists('{}_metadata:{}'.format(domain_type, domain), 'ports') - -def is_domain_up(domain, domain_type, ports=[]): - if not ports: - ports = get_domain_all_ports(domain, domain_type) - for port in ports: - res = r_serv_onion.zrevrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), 0, 0, withscores=True) - if res: - item_core, epoch = res[0] - epoch = int(epoch) - if item_core != str(epoch): - return True - return False - -def get_domain_first_up(domain, domain_type, ports=None): - ''' - Get all domain up (at least one time) - - :param ports: list of ports, optional - :type ports: list - - :return: domain last up epoch - :rtype: int - ''' - if ports is None: - ports = get_domain_all_ports(domain, domain_type) - epoch_min = None - for port in ports: - res = r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), 0, 0, withscores=True)[0] - if not epoch_min: - epoch_min = int(res[1]) - elif res[1] < epoch_min: - epoch_min = int(res[1]) - return epoch_min - -def get_last_domain_up_by_port(domain, domain_type, port): - current_index = 0 - while True: - res = r_serv_onion.zrevrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), current_index, current_index, withscores=True) - # history found - if res: - item_core, epoch = res[0] - epoch = int(epoch) - if item_core == str(epoch): - current_index +=1 - else: - return epoch - else: - return None - -def get_domain_last_up(domain, domain_type, ports=None): - if ports is None: - ports = get_domain_all_ports(domain, domain_type) - epoch_max = 0 - for port in ports: - last_epoch_up = get_last_domain_up_by_port(domain, domain_type, port) - if last_epoch_up > epoch_max: - epoch_max = last_epoch_up - return epoch_max - -def get_domain_up_range(domain, domain_type): - domain_metadata = {} - domain_metadata['first_seen'] = get_domain_first_up(domain, domain_type) - domain_metadata['last_seen'] = get_domain_last_up(domain, domain_type) - return domain_metadata - -def get_domain_all_ports(domain, domain_type): - ''' - Return a list of all crawled ports - ''' - l_ports = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'ports') - if l_ports: - return l_ports.split(";") - return [] - -def get_random_domain_port(domain, domain_type): - return random.choice(get_domain_all_ports(domain, domain_type)) - -def get_all_domain_up_by_type(domain_type): - if domain_type in domains: - list_domain = list(r_serv_onion.smembers('full_{}_up'.format(domain_type))) - return ({'type': domain_type, 'domains': list_domain}, 200) - else: - return ({"status": "error", "reason": "Invalid domain type"}, 400) - -def get_domain_all_url(domain, domain_type, domain_ports=None): - if not domain_ports: - domain_ports = get_domain_all_ports(domain, domain_type) - all_url = {} - for port in domain_ports: - for dict_history in get_domain_history_with_status(domain, domain_type, port, add_root_item=True): - if dict_history['status']: # domain UP - crawled_items = get_domain_items(domain, dict_history['root_item']) - for item_id in crawled_items: - item_url = Item.get_item_link(item_id) - item_date = int(Item.get_item_date(item_id)) - if item_url: - if item_url not in all_url: - all_url[item_url] = {'first_seen': item_date,'last_seen': item_date} - else: # update first_seen / last_seen - if item_date < all_url[item_url]['first_seen']: - all_url[item_url]['first_seen'] = item_date - if item_date > all_url[item_url]['last_seen']: - all_url[item_url]['last_seen'] = item_date - return all_url - - -def get_domain_items(domain, root_item_id): - dom_item = get_domain_item_children(domain, root_item_id) - dom_item.append(root_item_id) - return dom_item - -def get_domain_item_children(domain, root_item_id): - all_items = [] - for item_id in Item.get_item_children(root_item_id): - if Item.is_item_in_domain(domain, item_id): - all_items.append(item_id) - all_items.extend(get_domain_item_children(domain, item_id)) - return all_items - -def get_domain_last_crawled_item_root(domain, domain_type, port): - ''' - Retun last_crawled_item_core dict - ''' - res = r_serv_onion.zrevrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), 0, 0, withscores=True) - if res: - return {"root_item": res[0][0], "epoch": int(res[0][1])} - else: - return {} - -def get_domain_crawled_item_root(domain, domain_type, port, epoch=None): - ''' - Retun the first item crawled for a given domain:port (and epoch) - ''' - if epoch: - res = r_serv_onion.zrevrangebyscore('crawler_history_{}:{}:{}'.format(domain_type, domain, port), int(epoch), int(epoch)) - if res: - return {"root_item": res[0], "epoch": int(epoch)} - # invalid epoch - epoch = None - - if not epoch: - return get_domain_last_crawled_item_root(domain, domain_type, port) - - -def get_domain_items_crawled(domain, domain_type, port, epoch=None, items_link=False, item_screenshot=False, item_tag=False): - ''' - - ''' - item_crawled = {} - item_root = get_domain_crawled_item_root(domain, domain_type, port, epoch=epoch) - if item_root: - item_crawled['port'] = port - item_crawled['epoch'] = item_root['epoch'] - item_crawled['date'] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(item_root['epoch'])) - item_crawled['items'] = [] - if item_root['root_item'] != str(item_root['epoch']): - for item in get_domain_items(domain, item_root['root_item']): - dict_item = {"id": item} - if items_link: - dict_item['link'] = Item.get_item_link(item) - if item_screenshot: - dict_item['screenshot'] = Item.get_item_screenshot(item) - if item_tag: - dict_item['tags'] = Tag.get_obj_tags_minimal(item) - item_crawled['items'].append(dict_item) - return item_crawled - -def get_link_tree(): - pass - -def get_domain_first_seen(domain, domain_type=None, r_format="str"): - ''' - Get domain first seen date - - :param domain: crawled domain - :type domain: str - :param domain_type: domain type - :type domain_type: str - - :return: domain first seen date - :rtype: str - ''' - if not domain_type: - domain_type = get_domain_type(domain) - first_seen = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'first_seen') - if first_seen is not None: - if r_format=="int": - first_seen = int(first_seen) - else: - first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8]) - return first_seen - -def get_domain_last_check(domain, domain_type=None, r_format="str"): - ''' - Get domain last check date - - :param domain: crawled domain - :type domain: str - :param domain_type: domain type - :type domain_type: str - - :return: domain last check date - :rtype: str - ''' - if not domain_type: - domain_type = get_domain_type(domain) - last_check = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'last_check') - if last_check is not None: - if r_format=="int": - last_check = int(last_check) - # str - else: - last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8]) - return last_check - -def get_domain_last_origin(domain, domain_type): - ''' - Get domain last origin - - :param domain: crawled domain - :type domain: str - :param domain_type: domain type - :type domain_type: str - - :return: last orgin item_id - :rtype: str - ''' - origin_item = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'paste_parent') - return origin_item - -def get_domain_father(domain, domain_type): - dict_father = {} - dict_father['item_father'] = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'paste_parent') - if dict_father['item_father'] != 'auto' and dict_father['item_father'] != 'manual': - if Item.is_crawled(dict_father['item_father']): - dict_father['domain_father'] = Item.get_domain(dict_father['item_father']) - return dict_father - -def get_domain_tags(domain): - ''' - Retun all tags of a given domain. - - :param domain: crawled domain - ''' - return Tag.get_object_tags('domain', domain) - -def get_domain_random_screenshot(domain): - ''' - Retun last screenshot (core item). - - :param domain: crawled domain - ''' - return Screenshot.get_randon_domain_screenshot(domain) - -def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False, tags_safe=False, languages=False, screenshot=False): - ''' - Get Domain basic metadata - - :param first_seen: get domain first_seen - :type first_seen: boolean - :param last_ckeck: get domain last_check - :type last_ckeck: boolean - :param ports: get all domain ports - :type ports: boolean - :param tags: get all domain tags - :type tags: boolean - - :return: a dict of all metadata for a given domain - :rtype: dict - ''' - dict_metadata = {} - dict_metadata['id'] = domain - dict_metadata['type'] = domain_type - if first_seen: - res = get_domain_first_seen(domain, domain_type=domain_type) - if res is not None: - dict_metadata['first_seen'] = res - if last_ckeck: - res = get_domain_last_check(domain, domain_type=domain_type) - if res is not None: - dict_metadata['last_check'] = res - if status: - dict_metadata['status'] = is_domain_up(domain, domain_type) - if ports: - dict_metadata['ports'] = get_domain_all_ports(domain, domain_type) - if tags: - dict_metadata['tags'] = get_domain_tags(domain) - if tags_safe: - if tags: - dict_metadata['is_tags_safe'] = Tag.is_tags_safe(dict_metadata['tags']) - else: - dict_metadata['is_tags_safe'] = Tag.is_tags_safe(get_domain_tags(domain)) - if languages: - dict_metadata['languages'] = Language.get_languages_from_iso(get_domain_languages(domain, r_list=True), sort=True) - if screenshot: - dict_metadata['screenshot'] = get_domain_random_screenshot(domain) - return dict_metadata - -def get_domain_metadata_basic(domain, domain_type=None): - if not domain_type: - domain_type = get_domain_type(domain) - return get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=False) - - # TODO: handle port -def get_domain_history(domain, domain_type, port): # TODO: add date_range: from to + nb_elem - ''' - Retun . - - :param domain: crawled domain - :type domain: str - - :return: - :rtype: list of tuple (item_core, epoch) - ''' - return r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), 0, -1, withscores=True) - -def get_domain_history_with_status(domain, domain_type, port, add_root_item=False): # TODO: add date_range: from to + nb_elem - ''' - Retun . - - :param domain: crawled domain - :type domain: str - - :return: - :rtype: list of dict (epoch, date: %Y/%m/%d - %H:%M.%S, boolean status) - ''' - l_history = [] - history = get_domain_history(domain, domain_type, port) - for root_item, epoch_val in history: - epoch_val = int(epoch_val) # force int - dict_history = {"epoch": epoch_val, "date": time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch_val))} - # domain down, root_item==epoch_val - try: - int(root_item) - dict_history['status'] = False - # domain up, root_item=str - except ValueError: - dict_history['status'] = True - if add_root_item: - dict_history['root_item'] = root_item - l_history.append(dict_history) - return l_history - -def verify_if_domain_exist(domain): - return r_serv_onion.exists('{}_metadata:{}'.format(get_domain_type(domain), domain)) - -## API ## - -def api_verify_if_domain_exist(domain): - if not verify_if_domain_exist(domain): - return {'status': 'error', 'reason': 'Domain not found'}, 404 - else: - return None - -def api_get_domain_up_range(domain, domain_type=None): - res = api_verify_if_domain_exist(domain) - if res: - return res - if not domain_type: - domain_type = get_domain_type(domain) - res = get_domain_up_range(domain, domain_type) - res['domain'] = domain - return res, 200 - -def api_get_domains_by_status_daterange(date_from, date_to, domain_type): - sanitize_domain_type(domain_type) - res = {'domains': get_domains_up_by_daterange(date_from, date_to, domain_type)} - return res, 200 - -## CLASS ## -class Domain(object): - """docstring for Domain.""" - - def __init__(self, domain, port=None): - self.domain = str(domain) - self.type = get_domain_type(domain) - if self.domain_was_up(): - self.current_port = sanathyse_port(port, self.domain, self.type) - - def get_domain_name(self): - return self.domain - - def get_domain_type(self): - return self.type - - def get_current_port(self): - return self.current_port - - def get_domain_first_seen(self): - ''' - Get domain first seen date - - :return: domain first seen date - :rtype: str - ''' - return get_domain_first_seen(self.domain, domain_type=self.type) - - def get_domain_last_check(self): - ''' - Get domain last check date - - :return: domain last check date - :rtype: str - ''' - return get_domain_last_check(self.domain, domain_type=self.type) - - def get_domain_last_origin(self): - ''' - Get domain last origin - - :param domain: crawled domain - :type domain: str - :param domain_type: domain type - :type domain_type: str - - :return: last orgin item_id - :rtype: str - ''' - return get_domain_last_origin(self.domain, self.type) - - def get_domain_father(self): - return get_domain_father(self.domain, self.type) - - def domain_was_up(self): - ''' - Return True if this domain was UP at least one time - ''' - return domain_was_up(self.domain, self.type) - - def is_domain_up(self): # # TODO: handle multiple ports - ''' - Return True if this domain is UP - ''' - return is_domain_up(self.domain, self.type) - - def get_domain_all_ports(self): - return get_domain_all_ports(self.domain, self.type) - - def get_domain_metadata(self, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False): - ''' - Get Domain basic metadata - - :param first_seen: get domain first_seen - :type first_seen: boolean - :param last_ckeck: get domain last_check - :type last_ckeck: boolean - :param ports: get all domain ports - :type ports: boolean - :param tags: get all domain tags - :type tags: boolean - - :return: a dict of all metadata for a given domain - :rtype: dict - ''' - return get_domain_metadata(self.domain, self.type, first_seen=first_seen, last_ckeck=last_ckeck, status=status, ports=ports, tags=tags) - - def get_domain_tags(self): - ''' - Retun all tags of a given domain. - - :param domain: crawled domain - ''' - return get_domain_tags(self.domain) - - def get_domain_languages(self): - ''' - Retun all languages of a given domain. - - :param domain: domain name - ''' - return get_domain_languages(self.domain) - - def get_domain_history(self): - ''' - Retun the full history of a given domain and port. - ''' - return get_domain_history(self.domain, self.type, 80) - - def get_domain_history_with_status(self): - ''' - Retun the full history (with status) of a given domain and port. - ''' - return get_domain_history_with_status(self.domain, self.type, 80) - - def get_domain_items_crawled(self, port=None, epoch=None, items_link=False, item_screenshot=False, item_tag=False): - ''' - Return ........................ - ''' - port = sanathyse_port(port, self.domain, self.type, strict=True, current_port=self.current_port) - return get_domain_items_crawled(self.domain, self.type, port, epoch=epoch, items_link=items_link, item_screenshot=item_screenshot, item_tag=item_tag) - -if __name__ == '__main__': - #search_domains_by_name('c', 'onion') - res = get_last_crawled_domains('onion') - print(res) diff --git a/bin/lib/Investigations.py b/bin/lib/Investigations.py index b2943f0b..fb5874f1 100755 --- a/bin/lib/Investigations.py +++ b/bin/lib/Investigations.py @@ -12,7 +12,6 @@ import os import sys import datetime -import redis import time import uuid @@ -20,10 +19,13 @@ from abc import ABC from enum import Enum from flask import escape -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader -import Tag -from exceptions import UpdateInvestigationError +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader +from lib import Tag +from lib.exceptions import UpdateInvestigationError config_loader = ConfigLoader.ConfigLoader() r_tracking = config_loader.get_db_conn("Kvrocks_DB") diff --git a/bin/lib/Language.py b/bin/lib/Language.py index 6b5bd6a0..e413c434 100755 --- a/bin/lib/Language.py +++ b/bin/lib/Language.py @@ -3,7 +3,6 @@ import os import sys -import redis dict_iso_languages = { 'af': 'Afrikaans', diff --git a/bin/lib/MispModules.py b/bin/lib/MispModules.py index 2193863f..f3361a59 100755 --- a/bin/lib/MispModules.py +++ b/bin/lib/MispModules.py @@ -2,7 +2,6 @@ import os import json -import redis import requests import configparser @@ -10,8 +9,11 @@ misp_module_url = 'http://localhost:6666' default_config_path = os.path.join(os.environ['AIL_HOME'], 'configs', 'misp_modules.cfg') -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_serv = config_loader.get_redis_conn("ARDB_DB") diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py deleted file mode 100755 index 4addb30e..00000000 --- a/bin/lib/Screenshot.py +++ /dev/null @@ -1,241 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import base64 -import os -import sys -import redis - -from hashlib import sha256 -from io import BytesIO - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import Item -import Date -import Tag - -import Correlate_object -import ConfigLoader - -config_loader = ConfigLoader.ConfigLoader() -r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') -config_loader = None - -# get screenshot relative path -def get_screenshot_rel_path(sha256_string, add_extension=False): - screenshot_path = os.path.join(sha256_string[0:2], sha256_string[2:4], sha256_string[4:6], sha256_string[6:8], sha256_string[8:10], sha256_string[10:12], sha256_string[12:]) - if add_extension: - screenshot_path = screenshot_path + '.png' - return screenshot_path - -def get_screenshot_filepath(sha256_string): - filename = os.path.join(SCREENSHOT_FOLDER, get_screenshot_rel_path(sha256_string, add_extension=True)) - return os.path.realpath(filename) - -def exist_screenshot(sha256_string): - screenshot_path = get_screenshot_filepath(sha256_string) - return os.path.isfile(screenshot_path) - -def get_metadata(sha256_string): - metadata_dict = {} - metadata_dict['img'] = get_screenshot_rel_path(sha256_string) - metadata_dict['tags'] = get_screenshot_tags(sha256_string) - metadata_dict['is_tags_safe'] = Tag.is_tags_safe(metadata_dict['tags']) - return metadata_dict - -def get_screenshot_tags(sha256_string): - return Tag.get_obj_tag(sha256_string) - -def get_screenshot_items_list(sha256_string): - res = r_serv_onion.smembers('screenshot:{}'.format(sha256_string)) - if res: - return list(res) - else: - return [] - -def get_item_screenshot(item_id): - return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot') - -def get_item_screenshot_list(item_id): - ''' - Retun all decoded item of a given item id. - - :param item_id: item id - ''' - screenshot = get_item_screenshot(item_id) - if screenshot: - return [screenshot] - else: - return [] - -def get_domain_screenshot(domain): - ''' - Retun all screenshot of a given domain. - - :param domain: crawled domain - ''' - res = r_serv_onion.smembers('domain_screenshot:{}'.format(domain)) - if res: - return list(res) - else: - return [] - -def get_randon_domain_screenshot(domain, r_path=True): - ''' - Retun all screenshot of a given domain. - - :param domain: crawled domain - ''' - res = r_serv_onion.srandmember('domain_screenshot:{}'.format(domain)) - if res and r_path: - return get_screenshot_rel_path(res) - return res - -def get_screenshot_domain(sha256_string): - ''' - Retun all domain of a given screenshot. - - :param sha256_string: sha256_string - ''' - res = r_serv_onion.smembers('screenshot_domain:{}'.format(sha256_string)) - if res: - return list(res) - else: - return [] - -def get_screenshot_correlated_object(sha256_string, correlation_objects=[]): - ''' - Retun all correlation of a given sha256. - - :param sha1_string: sha256 - :type sha1_string: str - - :return: a dict of all correlation for a given sha256 - :rtype: dict - ''' - if not correlation_objects: - correlation_objects = Correlate_object.get_all_correlation_objects() - decoded_correlation = {} - for correlation_object in correlation_objects: - if correlation_object == 'paste': - res = get_screenshot_items_list(sha256_string) - elif correlation_object == 'domain': - res = get_screenshot_domain(sha256_string) - else: - res = None - if res: - decoded_correlation[correlation_object] = res - return decoded_correlation - -def save_item_relationship(obj_id, item_id): - r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'screenshot', obj_id) - r_serv_onion.sadd('screenshot:{}'.format(obj_id), item_id) - if Item.is_crawled(item_id): - domain = Item.get_item_domain(item_id) - save_domain_relationship(obj_id, domain) - -def delete_item_relationship(obj_id, item_id): - r_serv_metadata.hdel('paste_metadata:{}'.format(item_id), 'screenshot', obj_id) - r_serv_onion.srem('screenshot:{}'.format(obj_id), item_id) - -def save_domain_relationship(obj_id, domain): - r_serv_onion.sadd('domain_screenshot:{}'.format(domain), obj_id) - r_serv_onion.sadd('screenshot_domain:{}'.format(obj_id), domain) - -def delete_domain_relationship(obj_id, domain): - r_serv_onion.srem('domain_screenshot:{}'.format(domain), obj_id) - r_serv_onion.srem('screenshot_domain:{}'.format(obj_id), domain) - -def save_obj_relationship(obj_id, obj2_type, obj2_id): - if obj2_type == 'domain': - save_domain_relationship(obj_id, obj2_id) - elif obj2_type == 'item': - save_item_relationship(obj_id, obj2_id) - -def delete_obj_relationship(obj_id, obj2_type, obj2_id): - if obj2_type == 'domain': - delete_domain_relationship(obj_id, obj2_id) - elif obj2_type == 'item': - delete_item_relationship(obj_id, obj2_id) - -def get_screenshot_file_content(sha256_string): - filepath = get_screenshot_filepath(sha256_string) - with open(filepath, 'rb') as f: - file_content = BytesIO(f.read()) - return file_content - -# if force save, ignore max_size -def save_crawled_screeshot(b64_screenshot, max_size, f_save=False): - screenshot_size = (len(b64_screenshot)*3) /4 - if screenshot_size < max_size or f_save: - image_content = base64.standard_b64decode(b64_screenshot.encode()) - sha256_string = sha256(image_content).hexdigest() - filepath = get_screenshot_filepath(sha256_string) - if os.path.isfile(filepath): - #print('File already exist') - return sha256_string - # create dir - dirname = os.path.dirname(filepath) - if not os.path.exists(dirname): - os.makedirs(dirname) - with open(filepath, 'wb') as f: - f.write(image_content) - return sha256_string - return False - -def save_screenshot_file(sha256_string, io_content): - filepath = get_screenshot_filepath(sha256_string) - if os.path.isfile(filepath): - #print('File already exist') - return False - # create dir - dirname = os.path.dirname(filepath) - if not os.path.exists(dirname): - os.makedirs(dirname) - # # TODO: check if is IO file - with open(filepath, 'wb') as f: - f.write(io_content.getvalue()) - return True - -def delete_screenshot_file(obj_id): - filepath = get_screenshot_filepath(obj_id) - if not os.path.isfile(filepath): - return False - Tag.delete_obj_tags(obj_id, 'image', Tag.get_obj_tag(obj_id)) - os.remove(filepath) - return True - -def create_screenshot(obj_id, obj_meta, io_content): - # # TODO: check if sha256 - res = save_screenshot_file(obj_id, io_content) - if res: - # creata tags - if 'tags' in obj_meta: - # # TODO: handle mixed tags: taxonomies and Galaxies - Tag.api_add_obj_tags(tags=obj_meta['tags'], object_id=obj_id, object_type="image") - return True - - return False - -def delete_screenshot(obj_id): - if not exist_screenshot(obj_id): - return False - - res = delete_screenshot_file(obj_id) - if not res: - return False - - obj_correlations = get_screenshot_correlated_object(obj_id) - if 'domain' in obj_correlations: - for domain in obj_correlations['domain']: - r_serv_onion.srem('domain_screenshot:{}'.format(domain), obj_id) - r_serv_onion.delete('screenshot_domain:{}'.format(obj_id)) - - if 'paste' in obj_correlations: # TODO: handle item - for item_id in obj_correlations['paste']: - r_serv_metadata.hdel('paste_metadata:{}'.format(item_id), 'screenshot') - r_serv_onion.delete('screenshot:{}'.format(obj_id), item_id) - - return True diff --git a/bin/lib/Statistics.py b/bin/lib/Statistics.py index 7d9067e5..c5898760 100755 --- a/bin/lib/Statistics.py +++ b/bin/lib/Statistics.py @@ -3,15 +3,17 @@ import datetime import os -import redis import sys -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_statistics = config_loader.get_redis_conn("ARDB_Statistics") -#r_serv_trend = ConfigLoader().get_redis_conn("ARDB_Trending") +# r_serv_trend = ConfigLoader().get_redis_conn("ARDB_Trending") config_loader = None PIE_CHART_MAX_CARDINALITY = 8 diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index 5b2a60e1..79a695c3 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -5,7 +5,6 @@ import os import re import sys import time -import redis import uuid import yara import datetime @@ -14,13 +13,14 @@ import base64 from flask import escape -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Date - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader -import item_basic -import Tag +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from packages import Date +from lib import ConfigLoader +from lib import item_basic +from lib import Tag config_loader = ConfigLoader.ConfigLoader() r_cache = config_loader.get_redis_conn("Redis_Cache") diff --git a/bin/lib/Username.py b/bin/lib/Username.py deleted file mode 100755 index 731ac904..00000000 --- a/bin/lib/Username.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import redis - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Correlation - -config_loader = ConfigLoader.ConfigLoader() -r_serv_crawler = config_loader.get_redis_conn("ARDB_Onion") -config_loader = None - -correlation = Correlation.Correlation('username', ['telegram', 'twitter', 'jabber']) - -def save_item_correlation(subtype, username, item_id, item_date): - correlation.save_item_correlation(subtype, username, item_id, item_date) diff --git a/bin/lib/Users.py b/bin/lib/Users.py index 77a6f31a..1e5afb68 100755 --- a/bin/lib/Users.py +++ b/bin/lib/Users.py @@ -17,7 +17,7 @@ from lib.ConfigLoader import ConfigLoader # Config config_loader = ConfigLoader() -#r_serv_db = config_loader.get_redis_conn("ARDB_DB") +# r_serv_db = config_loader.get_redis_conn("ARDB_DB") r_serv_db = config_loader.get_db_conn("Kvrocks_DB") config_loader = None @@ -87,10 +87,9 @@ def exists_user(user_id): return r_serv_db.exists(f'ail:user:metadata:{user_id}') def get_user_metadata(user_id): - user_metadata = {} - user_metadata['email'] = user_id - user_metadata['role'] = r_serv_db.hget(f'ail:user:metadata:{user_id}', 'role') - user_metadata['api_key'] = r_serv_db.hget(f'ail:user:metadata:{user_id}', 'token') + user_metadata = {'email': user_id, + 'role': r_serv_db.hget(f'ail:user:metadata:{user_id}', 'role'), + 'api_key': r_serv_db.hget(f'ail:user:metadata:{user_id}', 'token')} return user_metadata def get_users_metadata(list_users): @@ -100,7 +99,7 @@ def get_users_metadata(list_users): return users def create_user(user_id, password=None, chg_passwd=True, role=None): - # # TODO: check password strenght + # # TODO: check password strength if password: new_password = password else: @@ -137,7 +136,7 @@ def edit_user_password(user_id, password_hash, chg_passwd=False): else: r_serv_db.hdel(f'ail:user:metadata:{user_id}', 'change_passwd') # remove default user password file - if user_id=='admin@admin.test': + if user_id == 'admin@admin.test': default_passwd_file = os.path.join(os.environ['AIL_HOME'], 'DEFAULT_PASSWORD') if os.path.isfile(default_passwd_file): os.remove(default_passwd_file) @@ -149,7 +148,7 @@ def edit_user_password(user_id, password_hash, chg_passwd=False): def delete_user(user_id): if exists_user(user_id): for role_id in get_all_roles(): - r_serv_db.srem('ail:users:role:{role_id}', user_id) + r_serv_db.srem(f'ail:users:role:{role_id}', user_id) user_token = get_user_token(user_id) r_serv_db.hdel('ail:users:tokens', user_token) r_serv_db.delete(f'ail:user:metadata:{user_id}') @@ -183,7 +182,7 @@ def get_user_role_by_range(inf, sup): def get_all_user_role(user_role): current_role_val = get_role_level(user_role) - return r_serv_db.zrange('ail:roles:all', current_role_val -1, -1) + return r_serv_db.zrange('ail:roles:all', current_role_val - 1, -1) def get_all_user_upper_role(user_role): current_role_val = get_role_level(user_role) @@ -203,12 +202,12 @@ def edit_user_role(user_id, role): current_role = get_role_level(current_role) if current_role < request_level: - role_to_remove = get_user_role_by_range(current_role -1, request_level - 2) + role_to_remove = get_user_role_by_range(current_role - 1, request_level - 2) for role_id in role_to_remove: r_serv_db.srem(f'ail:users:role:{role_id}', user_id) r_serv_db.hset(f'ail:user:metadata:{user_id}', 'role', role) else: - role_to_add = get_user_role_by_range(request_level -1, current_role) + role_to_add = get_user_role_by_range(request_level - 1, current_role) for role_id in role_to_add: r_serv_db.sadd(f'ail:users:role:{role_id}', user_id) r_serv_db.hset(f'ail:user:metadata:{user_id}', 'role', role) @@ -238,10 +237,10 @@ class User(UserMixin): self.id = "__anonymous__" # return True or False - #def is_authenticated(): + # def is_authenticated(): # return True or False - #def is_anonymous(): + # def is_anonymous(): @classmethod def get(self_class, id): diff --git a/bin/lib/ail_core.py b/bin/lib/ail_core.py index 7b555afb..50c48dc2 100755 --- a/bin/lib/ail_core.py +++ b/bin/lib/ail_core.py @@ -34,3 +34,41 @@ def get_object_all_subtypes(obj_type): return ['telegram', 'twitter', 'jabber'] ##-- AIL OBJECTS --## + +def paginate_iterator(iter_elems, nb_obj=50, page=1): + dict_page = {} + dict_page['nb_all_elem'] = len(iter_elems) + nb_pages = dict_page['nb_all_elem'] / nb_obj + if not nb_pages.is_integer(): + nb_pages = int(nb_pages)+1 + else: + nb_pages = int(nb_pages) + if page > nb_pages: + page = nb_pages + + # multiple pages + if nb_pages > 1: + dict_page['list_elem'] = [] + start = nb_obj*(page - 1) + stop = (nb_obj*page) - 1 + current_index = 0 + for elem in iter_elems: + if current_index > stop: + break + if start <= current_index <= stop: + dict_page['list_elem'].append(elem) + current_index += 1 + stop += 1 + if stop > dict_page['nb_all_elem']: + stop = dict_page['nb_all_elem'] + + else: + start = 0 + stop = dict_page['nb_all_elem'] + dict_page['list_elem'] = list(iter_elems) + dict_page['page'] = page + dict_page['nb_pages'] = nb_pages + # UI + dict_page['nb_first_elem'] = start+1 + dict_page['nb_last_elem'] = stop + return dict_page diff --git a/bin/lib/ail_users.py b/bin/lib/ail_users.py index e31fca6e..67d252ef 100755 --- a/bin/lib/ail_users.py +++ b/bin/lib/ail_users.py @@ -3,11 +3,9 @@ import os import sys -import uuid -import redis -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader +sys.path.append(os.environ['AIL_BIN']) +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_serv_db = config_loader.get_redis_conn("ARDB_DB") diff --git a/bin/lib/btc_ail.py b/bin/lib/btc_ail.py index 25beb8a7..e17e257c 100755 --- a/bin/lib/btc_ail.py +++ b/bin/lib/btc_ail.py @@ -3,13 +3,12 @@ import os import sys -import json import requests -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Cryptocurrency +sys.path.append(os.environ['AIL_BIN']) +from lib.objects.CryptoCurrencies import CryptoCurrency -blockchain_all='https://blockchain.info/rawaddr' +blockchain_all = 'https://blockchain.info/rawaddr' # pre-alpha script @@ -18,7 +17,6 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50): dict_btc = {} set_btc_in = set() set_btc_out = set() - req = None try: req = requests.get('{}/{}?limit={}'.format(blockchain_all, bitcoin_address, nb_transaction)) jreq = req.json() @@ -26,7 +24,7 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50): print(e) return dict_btc - #print(json.dumps(jreq)) + # print(json.dumps(jreq)) dict_btc['n_tx'] = jreq['n_tx'] dict_btc['total_received'] = float(jreq['total_received'] / 100000000) dict_btc['total_sent'] = float(jreq['total_sent'] / 100000000) @@ -50,6 +48,7 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50): def filter_btc_seen(btc_addr_set): list_seen_btc = [] for btc_addr in btc_addr_set: - if Cryptocurrency.cryptocurrency._exist_corelation_field('bitcoin', btc_addr): + cryptocurrency = CryptoCurrency(btc_addr, 'bitcoin') + if cryptocurrency.exists(): list_seen_btc.append(btc_addr) return list_seen_btc diff --git a/bin/lib/correlations_engine.py b/bin/lib/correlations_engine.py index 0db0ccdb..aceda52a 100755 --- a/bin/lib/correlations_engine.py +++ b/bin/lib/correlations_engine.py @@ -3,7 +3,6 @@ import os import sys -import redis sys.path.append(os.environ['AIL_BIN']) ################################## @@ -42,12 +41,12 @@ config_loader = None ################################## CORRELATION_TYPES_BY_OBJ = { - "cryptocurrency" : ["domain", "item"], + "cryptocurrency": ["domain", "item"], "cve": ["domain", "item"], "decoded": ["domain", "item"], "domain": ["cve", "cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"], "item": ["cve", "cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"], - "pgp" : ["domain", "item"], + "pgp": ["domain", "item"], "username": ["domain", "item"], "screenshot": ["domain", "item"], } @@ -97,7 +96,7 @@ def is_obj_correlated(obj_type, subtype, obj_id, obj2_type, subtype2, obj2_id): subtype = '' if subtype2 is None: subtype2 = '' - return r_metadata.sismember(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}', '{subtype2}:{obj2_id}') + return r_metadata.sismember(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}', f'{subtype2}:{obj2_id}') def add_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id): if subtype1 is None: @@ -113,15 +112,11 @@ def delete_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, ob subtype1 = '' if subtype2 is None: subtype2 = '' - r_metadata.srem(f'correlation:obj:{obj1_type}:{subtype}:{obj2_type}:{obj_id}', f'{subtype2}:{obj2_id}') - r_metadata.srem(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype}:{obj_id}') + r_metadata.srem(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}') + r_metadata.srem(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}') - -# # TODO: CORRELATION GRAPH - - -def get_obj_str_id(obj_type, subtype, obj_id): ################ REPLACE BY : ????????????????????????? +def get_obj_str_id(obj_type, subtype, obj_id): if subtype is None: subtype = '' return f'{obj_type};{subtype};{obj_id}' @@ -141,7 +136,7 @@ def _get_correlations_graph_node(links, nodes, obj_type, subtype, obj_id, level, nodes.add(obj_str_id) obj_correlations = get_correlations(obj_type, subtype, obj_id, filter_types=filter_types) - #print(obj_correlations) + # print(obj_correlations) for correl_type in obj_correlations: for str_obj in obj_correlations[correl_type]: subtype2, obj2_id = str_obj.split(':', 1) @@ -158,566 +153,3 @@ def _get_correlations_graph_node(links, nodes, obj_type, subtype, obj_id, level, if level > 0: next_level = level - 1 _get_correlations_graph_node(links, nodes, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, previous_str_obj=obj_str_id) - - - - -########################################################## -########################################################## -########################################################## -########################################################## -########################################################## -########################################################## - - - - - - - - - - - - - - - - - - - - - - - -# get_correlations_fcts = { -# "cryptocurrency" : ["domain", "item"], -# "decoded" : ["domain", "item"], -# "domain": ["cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"], -# "item": ["cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"], -# "pgp" : ["domain", "item"], -# "username" : ["domain", "item"], -# "screenshot" :{ -# "domain": get_correl_screenshot_domain, -# "item": get_correl_screenshot_item, -# }, -# } -# } -# -# def build_lsets_obj_types(obj1_type, obj_types): -# return [set(obj1_type, x) for x in subtypes_obj] -# -# ########################## -# subtypes_obj = ['cryptocurrency', 'pgp', 'username'] -# lsets_subtypes_obj_domain = build_lsets_obj_types('domain', subtypes_obj) -# lsets_subtypes_obj_item = build_lsets_obj_types('item', subtypes_obj) -# ########################## - -# TODO HANDLE CRAWLED ITEMS -def add_correlation(obj1_type, obj1_subtype, obj1_id, obj2_type, obj2_subtype, obj2_id): - set_type = set(ob1_type, ob2_type) - - # domain - subtypes objs - if set_type in lsets_subtypes_obj_domain: - if ob1_type == 'domain': - domain = obj1_id - obj_type = obj2_type - obj_subtype = obj2_subtype - obj_id = obj2_id - else: - domain = obj2_id - obj_type = obj1_type - obj_subtype = obj1_subtype - obj_id = obj1_id - r_metadata.sadd(f'domain_{obj_type}_{obj_subtype}:{domain}', obj_id) - r_metadata.sadd(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}', domain) - - # TODO HANDLE CRAWLED ITEMS - # item - subtypes objs - elif set_type in lsets_subtypes_obj_item: - if ob1_type == 'item': - item_id = obj1_id - obj_type = obj2_type - obj_subtype = obj2_subtype - obj_id = obj2_id - else: - item_id = obj2_id - obj_type = obj1_type - obj_subtype = obj1_subtype - obj_id = obj1_id - r_metadata.sadd(f'set_{obj_type}_{obj_subtype}:{obj_id}', item_id) - r_metadata.sadd(f'item_{obj_type}_{obj_subtype}:{item_id}', obj_id) - - # domain - decoded - elif set_type == set('domain', 'decoded'): - if ob1_type == 'decoded': - decoded_id = ob1_id - domain = obj2_id - else: - decoded_id = obj2_id - domain = ob1_id - r_metadata.sadd(f'hash_domain:{domain}', decoded_id) # domain - hash map - r_metadata.sadd(f'domain_hash:{decoded_id}', domain) # hash - domain map - - # item - decoded - elif set_type == set('item', 'decoded'): - if ob1_type == 'decoded': - decoded_id = ob1_id - item_id = obj2_id - else: - decoded_id = obj2_id - item_id = ob1_id - - ############################################################ - - - # domain - screenshot - elif set_type == set('domain', 'screenshot'): - if ob1_type == 'screenshot': - screenshot_id = ob1_id - domain = obj2_id - else: - screenshot_id = obj2_id - domain = ob1_id - r_crawler.sadd(f'domain_screenshot:{domain}', screenshot_id) - r_crawler.sadd(f'screenshot_domain:{screenshot_id}', domain) - - # item - screenshot - elif set_type == set('item', 'screenshot'): - if ob1_type == 'screenshot': - screenshot_id = ob1_id - item_id = obj2_id - else: - screenshot_id = obj2_id - item_id = ob1_id - r_metadata.hset(f'paste_metadata:{item_id}', 'screenshot', screenshot_id) - r_crawler.sadd(f'screenshot:{screenshot_id}', item_id) - - # domain - item - elif set_type == set('domain', 'item'): - if ob1_type == 'item': - item_id = ob1_id - domain = obj2_id - else: - item_id = obj2_id - domain = ob1_id - - ############################################################ - - - -# TODO ADD COMPLETE DELETE -# TODO: Handle items crawled -def delete_correlation(obj1_type, obj1_subtype, obj1_id, obj2_type, obj2_subtype, obj2_id): - set_type = set(ob1_type, ob2_type) - - # domain - subtypes objs - if set_type in lsets_subtypes_obj_domain: - if ob1_type == 'domain': - domain = obj1_id - obj_type = obj2_type - obj_subtype = obj2_subtype - obj_id = obj2_id - else: - domain = obj2_id - obj_type = obj1_type - obj_subtype = obj1_subtype - obj_id = obj1_id - r_metadata.srem(f'domain_{obj_type}_{obj_subtype}:{domain}', obj_id) - r_metadata.srem(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}', domain) - - - - # TODO ADD COMPLETE DELETE - # item - subtypes objs - elif set_type in lsets_subtypes_obj_item: - if ob1_type == 'item': - item_id = obj1_id - obj_type = obj2_type - obj_subtype = obj2_subtype - obj_id = obj2_id - else: - item_id = obj2_id - obj_type = obj1_type - obj_subtype = obj1_subtype - obj_id = obj1_id - # TODO ADD COMPLETE DELETE - r_metadata.srem(f'set_{obj_type}_{subtype}:{obj_id}', item_id) - r_metadata.srem(f'item_{obj_type}_{subtype}:{item_id}', obj_id) - # TODO ADD COMPLETE DELETE - - # domain - decoded - elif set_type == set('domain', 'decoded'): - if ob1_type == 'decoded': - decoded_id = ob1_id - domain = obj2_id - else: - decoded_id = obj2_id - domain = ob1_id - r_metadata.srem(f'hash_domain:{domain}', decoded_id) - r_metadata.srem(f'domain_hash:{decoded_id}', domain) - - # item - decoded - elif set_type == set('item', 'decoded'): - if ob1_type == 'decoded': - decoded_id = ob1_id - item_id = obj2_id - else: - decoded_id = obj2_id - item_id = ob1_id - - #################################################################### - - - # domain - screenshot - elif set_type == set('domain', 'screenshot'): - if ob1_type == 'screenshot': - screenshot_id = ob1_id - domain = obj2_id - else: - screenshot_id = obj2_id - domain = ob1_id - r_crawler.srem(f'domain_screenshot:{domain}', screenshot_id) - r_crawler.srem(f'screenshot_domain:{screenshot_id}', domain) - - # item - screenshot - elif set_type == set('item', 'screenshot'): - if ob1_type == 'screenshot': - screenshot_id = ob1_id - item_id = obj2_id - else: - screenshot_id = obj2_id - item_id = ob1_id - r_metadata.hdel(f'paste_metadata:{item_id}', 'screenshot', screenshot_id) - r_crawler.srem(f'screenshot:{screenshot_id}', item_id) - - # domain - item - -# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # - -## Subtypes - Cryptocurrency Pgp Username ## - -def get_correl_subtypes_obj_domain(obj_type, obj_subtype, obj_id): - r_serv_metadata.smembers(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}') - -def get_correl_subtypes_obj_item(): - pass - -def delete_subtype_domain_correlation(domain, obj_type, subtype, obj_id): - r_metadata.srem(f'domain_{obj_type}_{subtype}:{domain}', obj_id) - r_metadata.srem(f'set_domain_{obj_type}_{subtype}:{obj_id}', domain) - -# TODO ADD COMPLETE DELETE -def delete_subtype_item_correlation(obj_type, subtype, obj_id, item_id, item_date): - #self.update_correlation_daterange(subtype, obj_id, item_date) update daterange ! # # TODO: - r_metadata.srem(f'set_{obj_type}_{subtype}:{obj_id}', item_id) - r_metadata.srem(f'item_{obj_type}_{subtype}:{item_id}', obj_id) - - # # TODO: FIXME HANDLE SUB Objects Metadata # WARNING: - # res = r_serv_metadata.hincrby('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id, -1) - # if int(res) < 0: # remove last - # r_serv_metadata.hdel('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id) - # - # res = r_serv_metadata.zscore('{}_all:{}'.format(self.correlation_name, subtype), obj_id) - # if int(res) > 0: - # r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, -1) - -## Screenshot ## - -##-- Screenshot - Domain --## -def add_correl_screenshot_domain(screenshot_id, domain): - r_crawler.sadd(f'domain_screenshot:{domain}', screenshot_id) - r_crawler.sadd(f'screenshot_domain:{screenshot_id}', domain) - -def get_correl_screenshot_domain(screenshot_id): - return r_crawler.smembers(f'screenshot_domain:{screenshot_id}') - -# def delete_correl_screenshot_domain(screenshot_id, domain): -# r_crawler.srem(f'domain_screenshot:{domain}', screenshot_id) -# r_crawler.srem(f'screenshot_domain:{screenshot_id}', domain) - -##-- Screenshot - Item --## -def add_correl_screenshot_item(screenshot_id, item_id): - r_metadata.hset(f'paste_metadata:{item_id}', 'screenshot', screenshot_id) - r_crawler.sadd(f'screenshot:{screenshot_id}', item_id) - -def get_correl_screenshot_item(screenshot_id): - r_crawler.smembers(f'screenshot:{screenshot_id}') - -# def delete_correl_screenshot_item(screenshot_id, item_id): -# r_metadata.hdel(f'paste_metadata:{item_id}', 'screenshot', screenshot_id) -# r_crawler.srem(f'screenshot:{screenshot_id}', item_id) - -## -- ## - - -def get_correl_item_screenshot(item_id): - res = r_metadata.hget(f'paste_metadata:{item_id}', 'screenshot') - if res: - return set(res) - else: - return set() - -## Domain ## - -def get_correl_domain_subtypes_obj(domain_id, obj_type, obj_subtype): - return r_serv_metadata.smembers(f'domain_{obj_type}_{obj_subtype}:{domain_id}') - -## -- ## - -## Item ## - -def get_correl_item_subtypes_obj(): - pass - -## -- ## war game stinger - stranger thing - - -def _get_object_correlations(obj_type, obj_subtype, obj_id, filter_types=[]): # # TODO: , filter_subtypes=[] - obj_relationships = get_obj_relationships(obj_type) - correlations = [] - for correlation_fct in obj_relationship_fcts[obj_type]: - correlations - - - - - - -def get_object_correlations(filter_types, filter_subtypes, lvl=0): - pass - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#################################################################### -#################################################################### -#################################################################### -#################################################################### -#################################################################### -#################################################################### - -def get_object_correlation(object_type, value, correlation_names=None, correlation_objects=None, requested_correl_type=None): - if object_type == 'domain': - return Domain.get_domain_all_correlation(value, correlation_names=correlation_names) - elif object_type == 'paste' or object_type == 'item': - return Item.get_item_all_correlation(value, correlation_names=correlation_names) - elif object_type == 'decoded': - return Decoded.get_decoded_correlated_object(value, correlation_objects=correlation_objects) - elif object_type == 'pgp': - return Pgp.pgp.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) - elif object_type == 'cryptocurrency': - return Cryptocurrency.cryptocurrency.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) - elif object_type == 'username': - return Username.correlation.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) - elif object_type == 'screenshot' or object_type == 'image': - return Screenshot.get_screenshot_correlated_object(value, correlation_objects=correlation_objects) - return {} - -def get_obj_tag_table_keys(object_type): - ''' - Warning: use only in flask (dynamic templates) - ''' - if object_type=="domain": - return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot - -def create_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype=None, obj2_subtype=None): - if obj1_type == 'domain': - pass - elif obj1_type == 'item': - pass # son/father + duplicate + domain - elif obj1_type == 'pgp': - Pgp.pgp.save_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id) - elif obj1_type == 'cryptocurrency': - Cryptocurrency.cryptocurrency.save_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id) - elif obj1_type == 'decoded': - Decoded.save_obj_relationship(obj1_id, obj2_type, obj2_id) - elif obj1_type == 'image': - Screenshot.save_obj_relationship(obj1_id, obj2_type, obj2_id) - -def delete_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype=None, obj2_subtype=None): - if obj1_type == 'domain': - pass - elif obj1_type == 'item': - pass # son/father + duplicate + domain - elif obj1_type == 'pgp': - Pgp.pgp.delete_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id) - elif obj1_type == 'cryptocurrency': - Cryptocurrency.cryptocurrency.delete_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id) - elif obj1_type == 'decoded': - Decoded.delete_obj_relationship(obj1_id, obj2_type, obj2_id) - elif obj1_type == 'image': - Screenshot.delete_obj_relationship(obj1_id, obj2_type, obj2_id) - -def create_graph_links(links_set): - graph_links_list = [] - for link in links_set: - graph_links_list.append({"source": link[0], "target": link[1]}) - return graph_links_list - -def create_graph_nodes(nodes_set, root_node_id, flask_context=True): - graph_nodes_list = [] - for node_id in nodes_set: - correlation_name, correlation_type, value = node_id.split(';', 3) - dict_node = {"id": node_id} - dict_node['style'] = get_correlation_node_icon(correlation_name, correlation_type, value) - dict_node['text'] = value - if node_id == root_node_id: - dict_node["style"]["node_color"] = 'orange' - dict_node["style"]["node_radius"] = 7 - dict_node['url'] = get_item_url(correlation_name, value, correlation_type, flask_context=flask_context) - graph_nodes_list.append(dict_node) - return graph_nodes_list - -def create_node_id(correlation_name, value, correlation_type=''): - if correlation_type is None: - correlation_type = '' - return '{};{};{}'.format(correlation_name, correlation_type, value) - - - -# # TODO: filter by correlation type => bitcoin, mail, ... -def get_graph_node_object_correlation(object_type, root_value, mode, correlation_names, correlation_objects, max_nodes=300, requested_correl_type=None, flask_context=True): - links = set() - nodes = set() - - root_node_id = create_node_id(object_type, root_value, requested_correl_type) - nodes.add(root_node_id) - - root_correlation = get_object_correlation(object_type, root_value, correlation_names, correlation_objects, requested_correl_type=requested_correl_type) - for correl in root_correlation: - if correl in ('pgp', 'cryptocurrency', 'username'): - for correl_type in root_correlation[correl]: - for correl_val in root_correlation[correl][correl_type]: - - # add correlation - correl_node_id = create_node_id(correl, correl_val, correl_type) - - if mode=="union": - if len(nodes) > max_nodes: - break - nodes.add(correl_node_id) - links.add((root_node_id, correl_node_id)) - - # get second correlation - res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects, requested_correl_type=correl_type) - if res: - for corr_obj in res: - for correl_key_val in res[corr_obj]: - #filter root value - if correl_key_val == root_value: - continue - - if len(nodes) > max_nodes: - break - new_corel_1 = create_node_id(corr_obj, correl_key_val) - new_corel_2 = create_node_id(correl, correl_val, correl_type) - nodes.add(new_corel_1) - nodes.add(new_corel_2) - links.add((new_corel_1, new_corel_2)) - - if mode=="inter": - nodes.add(correl_node_id) - links.add((root_node_id, correl_node_id)) - if correl in ('decoded', 'screenshot', 'domain', 'paste'): - for correl_val in root_correlation[correl]: - - correl_node_id = create_node_id(correl, correl_val) - if mode=="union": - if len(nodes) > max_nodes: - break - nodes.add(correl_node_id) - links.add((root_node_id, correl_node_id)) - - res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects) - if res: - for corr_obj in res: - if corr_obj in ('decoded', 'domain', 'paste', 'screenshot'): - for correl_key_val in res[corr_obj]: - #filter root value - if correl_key_val == root_value: - continue - - if len(nodes) > max_nodes: - break - new_corel_1 = create_node_id(corr_obj, correl_key_val) - new_corel_2 = create_node_id(correl, correl_val) - nodes.add(new_corel_1) - nodes.add(new_corel_2) - links.add((new_corel_1, new_corel_2)) - - if mode=="inter": - nodes.add(correl_node_id) - links.add((root_node_id, correl_node_id)) - - if corr_obj in ('pgp', 'cryptocurrency', 'username'): - for correl_key_type in res[corr_obj]: - for correl_key_val in res[corr_obj][correl_key_type]: - #filter root value - if correl_key_val == root_value: - continue - - if len(nodes) > max_nodes: - break - new_corel_1 = create_node_id(corr_obj, correl_key_val, correl_key_type) - new_corel_2 = create_node_id(correl, correl_val) - nodes.add(new_corel_1) - nodes.add(new_corel_2) - links.add((new_corel_1, new_corel_2)) - - if mode=="inter": - nodes.add(correl_node_id) - links.add((root_node_id, correl_node_id)) - - - return {"nodes": create_graph_nodes(nodes, root_node_id, flask_context=flask_context), "links": create_graph_links(links)} - - - - - - - - - - - -#######################################################################################3 diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index c8d06a1a..21dd9cf3 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -17,8 +17,6 @@ import sys import time import uuid -import subprocess - from enum import IntEnum, unique from datetime import datetime, timedelta from urllib.parse import urlparse, urljoin diff --git a/bin/lib/d4.py b/bin/lib/d4.py index c66c0f27..f8e57d2f 100755 --- a/bin/lib/d4.py +++ b/bin/lib/d4.py @@ -4,11 +4,13 @@ import os import sys import time -import redis import d4_pyclient -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import ConfigLoader +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_serv_db = config_loader.get_db_conn("Kvrocks_DB") diff --git a/bin/lib/data_retention_engine.py b/bin/lib/data_retention_engine.py index 6ccc33b7..190a58f0 100755 --- a/bin/lib/data_retention_engine.py +++ b/bin/lib/data_retention_engine.py @@ -4,8 +4,11 @@ import os import sys -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import ConfigLoader +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_serv_db = config_loader.get_db_conn("Kvrocks_Objects") diff --git a/bin/lib/domain_basic.py b/bin/lib/domain_basic.py index 5bf24a72..54bf236d 100755 --- a/bin/lib/domain_basic.py +++ b/bin/lib/domain_basic.py @@ -9,10 +9,12 @@ import os import sys -import redis -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") diff --git a/bin/lib/index_whoosh.py b/bin/lib/index_whoosh.py index bd18a172..ec214b75 100755 --- a/bin/lib/index_whoosh.py +++ b/bin/lib/index_whoosh.py @@ -3,12 +3,14 @@ import os import sys -import redis from shutil import rmtree -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() INDEX_PATH = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Indexer", "path")) diff --git a/bin/lib/objects/CryptoCurrencies.py b/bin/lib/objects/CryptoCurrencies.py index f9b2aede..f1eeb152 100755 --- a/bin/lib/objects/CryptoCurrencies.py +++ b/bin/lib/objects/CryptoCurrencies.py @@ -3,11 +3,16 @@ import os import sys -import redis from flask import url_for +from hashlib import sha256 + +from pymisp import MISPObject, MISPAttribute sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## from lib.ConfigLoader import ConfigLoader from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id @@ -15,6 +20,26 @@ config_loader = ConfigLoader() config_loader = None +digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' + + +# http://rosettacode.org/wiki/Bitcoin/address_validation#Python +def decode_base58(bc, length): + n = 0 + for char in bc: + n = n * 58 + digits58.index(char) + return n.to_bytes(length, 'big') + + +# http://rosettacode.org/wiki/Bitcoin/address_validation#Python +def check_base58_address(bc): + try: + bcbytes = decode_base58(bc, 25) + return bcbytes[-4:] == sha256(sha256(bcbytes[:-4]).digest()).digest()[:4] + except Exception: + return False + + class CryptoCurrency(AbstractSubtypeObject): """ AIL CryptoCurrency Object. (strings) @@ -33,20 +58,26 @@ class CryptoCurrency(AbstractSubtypeObject): # # TODO: pass + def is_valid_address(self): + if self.type == 'bitcoin' or self.type == 'dash' or self.type == 'litecoin': + return check_base58_address(self.id) + else: + return True + def get_currency_symbol(self): - if self.subtype=='bitcoin': + if self.subtype == 'bitcoin': return 'BTC' - elif self.subtype=='ethereum': + elif self.subtype == 'ethereum': return 'ETH' - elif self.subtype=='bitcoin-cash': + elif self.subtype == 'bitcoin-cash': return 'BCH' - elif self.subtype=='litecoin': + elif self.subtype == 'litecoin': return 'LTC' - elif self.subtype=='monero': + elif self.subtype == 'monero': return 'XMR' - elif self.subtype=='zcash': + elif self.subtype == 'zcash': return 'ZEC' - elif self.subtype=='dash': + elif self.subtype == 'dash': return 'DASH' return None @@ -70,7 +101,7 @@ class CryptoCurrency(AbstractSubtypeObject): else: style = 'fas' icon = '\uf51e' - return {'style': style, 'icon': icon, 'color': '#DDCC77', 'radius':5} + return {'style': style, 'icon': icon, 'color': '#DDCC77', 'radius': 5} def get_misp_object(self): obj_attrs = [] @@ -78,10 +109,10 @@ class CryptoCurrency(AbstractSubtypeObject): obj.first_seen = self.get_first_seen() obj.last_seen = self.get_last_seen() - obj_attrs.append( obj.add_attribute('address', value=self.id) ) + obj_attrs.append(obj.add_attribute('address', value=self.id)) crypto_symbol = self.get_currency_symbol() if crypto_symbol: - obj_attrs.append( obj.add_attribute('symbol', value=crypto_symbol) ) + obj_attrs.append(obj.add_attribute('symbol', value=crypto_symbol)) for obj_attr in obj_attrs: for tag in self.get_tags(): @@ -95,15 +126,15 @@ class CryptoCurrency(AbstractSubtypeObject): meta['tags'] = self.get_tags(r_list=True) return meta - - ############################################################################ ############################################################################ + def get_all_subtypes(): - #return ail_core.get_object_all_subtypes(self.type) + # return ail_core.get_object_all_subtypes(self.type) return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash'] + # def build_crypto_regex(subtype, search_id): # pass # @@ -118,10 +149,25 @@ def get_all_subtypes(): # return abstract_object.search_subtype_obj_by_id('cryptocurrency', subtype, regex) +def get_subtype_by_symbol(symbol): + if symbol == 'BTC': + return 'bitcoin' + elif symbol == 'ETH': + return 'ethereum' + elif symbol == 'BCH': + return 'bitcoin-cash' + elif symbol == 'LTC': + return 'litecoin' + elif symbol == 'XMR': + return 'monero' + elif symbol == 'ZEC': + return 'zcash' + elif symbol == 'DASH': + return 'dash' + return None - -# by days -> need first/last entry USEFULL FOR DATA RETENTION UI +# by days -> need first/last entry USEFUL FOR DATA RETENTION UI def get_all_cryptocurrencies(): cryptos = {} @@ -129,9 +175,31 @@ def get_all_cryptocurrencies(): cryptos[subtype] = get_all_cryptocurrencies_by_subtype(subtype) return cryptos + def get_all_cryptocurrencies_by_subtype(subtype): return get_all_id('cryptocurrency', subtype) + +# TODO save object +def import_misp_object(misp_obj): + """ + :type misp_obj: MISPObject + """ + obj_id = None + obj_subtype = None + for attribute in misp_obj.attributes: + if attribute.object_relation == 'address': # TODO: handle xmr address field + obj_id = attribute.value + elif attribute.object_relation == 'symbol': + obj_subtype = get_subtype_by_symbol(attribute.value) + if obj_id and obj_subtype: + obj = CryptoCurrency(obj_id, obj_subtype) + first_seen, last_seen = obj.get_misp_object_first_last_seen(misp_obj) + tags = obj.get_misp_object_tags(misp_obj) + # for tag in tags: + # obj.add_tag() + + if __name__ == '__main__': res = get_all_cryptocurrencies() print(res) diff --git a/bin/lib/objects/Domains.py b/bin/lib/objects/Domains.py index 1c4c389a..50f4685c 100755 --- a/bin/lib/objects/Domains.py +++ b/bin/lib/objects/Domains.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 # -*-coding:UTF-8 -* - +import itertools import os +import re import sys import time import zipfile @@ -18,6 +19,7 @@ sys.path.append(os.environ['AIL_BIN']) from lib import ConfigLoader from lib.objects.abstract_object import AbstractObject +from lib.ail_core import paginate_iterator from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_har from lib import data_retention_engine @@ -201,6 +203,14 @@ class Domain(AbstractObject): history.append(dict_history) return history + # TODO ADD RANDOM OPTION + def get_screenshot(self): + last_item = self.get_last_item_root() + if last_item: + screenshot = self._get_external_correlation('item', '', last_item, 'screenshot').get('screenshot') + if screenshot: + return screenshot.pop()[1:] + def get_languages(self): return r_crawler.smembers(f'domain:language:{self.id}') @@ -217,14 +227,14 @@ class Domain(AbstractObject): 'tags': self.get_tags(r_list=True), 'status': self.is_up() } - # meta['ports'] = self.get_ports() - if 'last_origin' in options: meta['last_origin'] = self.get_last_origin(obj=True) - # meta['is_tags_safe'] = ################################## if 'languages' in options: meta['languages'] = self.get_languages() - # meta['screenshot'] = + if 'screenshot' in options: + meta['screenshot'] = self.get_screenshot() + if 'tags_safe' in options: + meta['is_tags_safe'] = self.is_tags_safe(meta['tags']) return meta # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ @@ -454,9 +464,59 @@ def _write_in_zip_buffer(zf, path, filename): def get_all_domains_types(): return ['onion', 'web'] # i2p +def sanitize_domains_types(types): + domains_types = get_all_domains_types() + if not types: + return domains_types + types_domains = [] + for type_d in types: + if type_d in domains_types: + domains_types.append(type_d) + if not types_domains: + return domains_types + return types_domains + + def get_all_domains_languages(): return r_crawler.smembers('all_domains_languages') +# TODO sanitize type +# TODO sanitize languages +def get_domains_by_languages(languages, domain_types): + if len(languages) == 1: + if len(domain_types) == 1: + return r_crawler.smembers(f'language:domains:{domain_type[0]}:{languages[0]}') + else: + l_keys = [] + for domain_type in domain_types: + l_keys.append(f'language:domains:{domain_type}:{languages[0]}') + return r_crawler.sunion(l_keys[0], *l_keys[1:]) + else: + domains = [] + for domain_type in domain_types: + l_keys = [] + for language in languages: + l_keys.append(f'language:domains:{domain_type}:{language}') + res = r_crawler.sinter(l_keys[0], *l_keys[1:]) + if res: + domains.append(res) + return list(itertools.chain.from_iterable(domains)) + +def api_get_domains_by_languages(domains_types, languages, meta=False, page=1): + domains = sorted(get_domains_by_languages(languages, domains_types)) + domains = paginate_iterator(domains, nb_obj=28, page=page) + if not meta: + return domains + else: + metas = [] + for dom in domains['list_elem']: + domain = Domain(dom) + domain_meta = domain.get_meta(options={'languages', 'screenshot', 'tags_safe'}) + metas.append(domain_meta) + domains['list_elem'] = metas + return domains + + def get_domains_up_by_type(domain_type): return r_crawler.smembers(f'full_{domain_type}_up') @@ -488,9 +548,77 @@ def get_domains_meta(domains): metas.append(dom.get_meta()) return metas +# TODO HANDLE ALL MULTIPLE DOMAIN TYPES +# TODO ADD TAGS FILTER +def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[], nb_obj=28, page=1): + if not tags: + if not date_from and not date_to: + domains = sorted(get_domains_up_by_type(domain_type)) + else: + domains = sorted(get_domains_by_daterange(date_from, date_to, domain_type)) + domains = paginate_iterator(domains, nb_obj=nb_obj, page=page) + meta = [] + for dom in domains['list_elem']: + domain = Domain(dom) + meta.append(domain.get_meta(options={'languages', 'screenshot', 'tags_safe'})) + domains['list_elem'] = meta + domains['domain_type'] = domain_type + if date_from: + domains['date_from'] = date_from + if date_to: + domains['date_to'] = date_to + return domains + else: + return None + +def sanitize_domain_name_to_search(name_to_search, domain_type): + if domain_type == 'onion': + r_name = r'[a-z0-9\.]+' + else: + r_name = r'[a-zA-Z0-9-_\.]+' + # invalid domain name + if not re.fullmatch(r_name, name_to_search): + res = re.match(r_name, name_to_search) + return {'search': name_to_search, 'error': res.string.replace( res[0], '')} + return name_to_search.replace('.', '\.') + +def search_domain_by_name(name_to_search, domain_types, r_pos=False): + domains = {} + for domain_type in domain_types: + r_name = sanitize_domain_name_to_search(name_to_search, domain_type) + if not name_to_search or isinstance(r_name, dict): + break + r_name = re.compile(r_name) + for domain in get_domains_up_by_type(domain_type): + res = re.search(r_name, domain) + if res: + domains[domain] = {} + if r_pos: + domains[domain]['hl-start'] = res.start() + domains[domain]['hl-end'] = res.end() + return domains + +def api_search_domains_by_name(name_to_search, domain_types, meta=False, page=1): + domain_types = sanitize_domains_types(domain_types) + domains_dict = search_domain_by_name(name_to_search, domain_types, r_pos=True) + domains = sorted(domains_dict.keys()) + domains = paginate_iterator(domains, nb_obj=28, page=page) + if not meta: + return domains + else: + metas = [] + for dom in domains['list_elem']: + domain = Domain(dom) + domain_meta = domain.get_meta(options={'languages', 'screenshot', 'tags_safe'}) + domain_meta = {**domains_dict[dom], **domain_meta} + metas.append(domain_meta) + domains['list_elem'] = metas + domains['search'] = name_to_search + return domains + ################################################################################ ################################################################################ -if __name__ == '__main__': - dom = Domain('') - dom.get_download_zip() +# if __name__ == '__main__': +# dom = Domain('') +# dom.get_download_zip() diff --git a/bin/lib/objects/abstract_object.py b/bin/lib/objects/abstract_object.py index e7f6932e..43d57d2f 100755 --- a/bin/lib/objects/abstract_object.py +++ b/bin/lib/objects/abstract_object.py @@ -9,8 +9,9 @@ Base Class for AIL Objects import os import sys from abc import ABC, abstractmethod +from pymisp import MISPObject -#from flask import url_for +# from flask import url_for sys.path.append(os.environ['AIL_BIN']) ################################## @@ -22,7 +23,6 @@ from lib.correlations_engine import get_nb_correlations, get_correlations, add_o from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations from lib.Tracker import is_obj_tracked, get_obj_all_trackers, delete_obj_trackers -# # TODO: ADD CORRELATION ENGINE class AbstractObject(ABC): """ @@ -31,7 +31,7 @@ class AbstractObject(ABC): # first seen last/seen ?? # # TODO: - tags - # - handle + refactor coorelations + # - handle + refactor correlations # - creates others objects def __init__(self, obj_type, id, subtype=None): @@ -56,6 +56,9 @@ class AbstractObject(ABC): return '' return self.subtype + def get_global_id(self): + return f'{self.get_type()}:{self.get_subtype(r_str=True)}:{self.get_id()}' + def get_default_meta(self, tags=False): dict_meta = {'id': self.get_id(), 'type': self.get_type(), @@ -78,6 +81,11 @@ class AbstractObject(ABC): def add_tag(self, tag): Tag.add_object_tag(tag, self.type, self.id, subtype=self.get_subtype(r_str=True)) + def is_tags_safe(self, tags=None): + if not tags: + tags = self.get_tags() + return Tag.is_tags_safe(tags) + #- Tags -# ## Investigations ## @@ -99,10 +107,10 @@ class AbstractObject(ABC): def delete_investigations(self): if not self.subtype: - unregistred = delete_obj_investigations(self.id, self.type) + unregistered = delete_obj_investigations(self.id, self.type) else: - unregistred = delete_obj_investigations(self.id, self.type, self.subtype) - return unregistred + unregistered = delete_obj_investigations(self.id, self.type, self.subtype) + return unregistered #- Investigations -# @@ -117,11 +125,11 @@ class AbstractObject(ABC): def delete_trackers(self): return delete_obj_trackers(self.type, self.subtype, self.id) - #- Investigations -# + #- Trackers -# def _delete(self): # DELETE TAGS - Tag.delete_obj_all_tags(self.id, self.type) ############ # TODO: # TODO: # FIXME: + Tag.delete_obj_all_tags(self.id, self.type) # ########### # TODO: # TODO: # FIXME: # remove from tracker self.delete_trackers() # remove from investigations @@ -165,6 +173,29 @@ class AbstractObject(ABC): def get_misp_object(self): pass + @staticmethod + def get_misp_object_first_last_seen(misp_obj): + """ + :type misp_obj: MISPObject + """ + first_seen = misp_obj.get('first_seen') + last_seen = misp_obj.get('last_seen') + return first_seen, last_seen + + @staticmethod + def get_misp_object_tags(misp_obj): + """ + :type misp_obj: MISPObject + """ + if misp_obj.attributes: + misp_tags = misp_obj.attributes[0].tags + tags = [] + for tag in misp_tags: + tags.append(tag.name) + return tags + else: + return [] + def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type): """ Get object correlation diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py index 29d3be4a..48c2589f 100755 --- a/bin/lib/objects/ail_objects.py +++ b/bin/lib/objects/ail_objects.py @@ -3,8 +3,6 @@ import os import sys -import uuid -import redis from abc import ABC from flask import url_for @@ -19,21 +17,21 @@ from lib import correlations_engine from lib import btc_ail from lib import Tag -from lib.objects.CryptoCurrencies import CryptoCurrency +from lib.objects import CryptoCurrencies from lib.objects.Cves import Cve from lib.objects.Decodeds import Decoded from lib.objects.Domains import Domain from lib.objects.Items import Item -from lib.objects.Pgps import Pgp +from lib.objects import Pgps from lib.objects.Screenshots import Screenshot -from lib.objects.Usernames import Username +from lib.objects import Usernames config_loader = ConfigLoader() r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None -class AILObjects(object): ## ?????????????????????? +class AILObjects(object): ## ?????????????????????? initial = 0 ongoing = 1 completed = 2 @@ -41,6 +39,14 @@ class AILObjects(object): ## ?????????????????????? def is_valid_object_type(obj_type): return obj_type in get_all_objects() +def is_valid_object_subtype(obj_type, subtype): + if obj_type == 'cryptocurrency': + return subtype in CryptoCurrencies.get_all_subtypes() + elif obj_type == 'pgp': + return subtype in Pgps.get_all_subtypes() + elif obj_type == 'username': + return subtype in CryptoCurrencies.get_all_subtypes() + def sanitize_objs_types(objs): l_types = [] for obj in objs: @@ -62,11 +68,11 @@ def get_object(obj_type, subtype, id): elif obj_type == 'screenshot': return Screenshot(id) elif obj_type == 'cryptocurrency': - return CryptoCurrency(id, subtype) + return CryptoCurrencies.CryptoCurrency(id, subtype) elif obj_type == 'pgp': - return Pgp(id, subtype) + return Pgps.Pgp(id, subtype) elif obj_type == 'username': - return Username(id, subtype) + return Usernames.Username(id, subtype) def exists_obj(obj_type, subtype, obj_id): obj = get_object(obj_type, subtype, obj_id) @@ -75,6 +81,14 @@ def exists_obj(obj_type, subtype, obj_id): else: return False +def get_obj_global_id(obj_type, subtype, obj_id): + obj = get_object(obj_type, subtype, obj_id) + return obj.get_global_id() + +def get_obj_from_global_id(global_id): + obj = global_id.split(':', 3) + return get_object(obj[0], obj[1], obj[2]) + def get_object_link(obj_type, subtype, id, flask_context=False): obj = get_object(obj_type, subtype, id) return obj.get_link(flask_context=flask_context) @@ -93,7 +107,8 @@ def get_object_meta(obj_type, subtype, id, options=[], flask_context=False): def get_objects_meta(objs, options=[], flask_context=False): metas = [] for obj_dict in objs: - metas.append(get_object_meta(obj_dict['type'], obj_dict['subtype'], obj_dict['id'], options=options, flask_context=flask_context)) + metas.append(get_object_meta(obj_dict['type'], obj_dict['subtype'], obj_dict['id'], options=options, + flask_context=flask_context)) return metas def get_object_card_meta(obj_type, subtype, id, related_btc=False): @@ -116,31 +131,18 @@ def get_ui_obj_tag_table_keys(obj_type): ''' Warning: use only in flask (dynamic templates) ''' - if obj_type=="domain": - return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot - -# # TODO: # FIXME: -# def get_objects_meta(l_dict_objs, icon=False, url=False, flask_context=False): -# l_meta = [] -# for dict_obj in l_dict_objs: -# object = get_object(dict_obj['type'], dict_obj['subtype'], dict_obj['id']) -# dict_meta = object.get_default_meta(tags=True) -# if icon: -# dict_meta['icon'] = object.get_svg_icon() -# if url: -# dict_meta['link'] = object.get_link(flask_context=flask_context) -# l_meta.append(dict_meta) -# return l_meta + if obj_type == "domain": + return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot # # TODO: CHECK IF object already have an UUID def get_misp_object(obj_type, subtype, id): - object = get_object(obj_type, subtype, id) - return object.get_misp_object() + obj = get_object(obj_type, subtype, id) + return obj.get_misp_object() # get misp relationship def get_objects_relationship(obj_1, obj2): relationship = {} - obj_types = ( obj_1.get_type(), obj2.get_type() ) + obj_types = (obj_1.get_type(), obj2.get_type()) ############################################################## # if ['cryptocurrency', 'pgp', 'username', 'decoded', 'screenshot']: @@ -149,12 +151,12 @@ def get_objects_relationship(obj_1, obj2): ############################################################## if 'cryptocurrency' in obj_types: relationship['relation'] = 'extracted-from' - if obj1_type == 'cryptocurrency': - relationship['src'] = obj1_id - relationship['dest'] = obj2_id + if obj_1.get_type() == 'cryptocurrency': + relationship['src'] = obj_1.get_id() + relationship['dest'] = obj2.get_id() else: - relationship['src'] = obj2_id - relationship['dest'] = obj1_id + relationship['src'] = obj2.get_id() + relationship['dest'] = obj_1.get_id() elif 'pgp' in obj_types: relationship['relation'] = 'extracted-from' @@ -175,16 +177,15 @@ def get_objects_relationship(obj_1, obj2): else: pass - - - - - return relationship def api_sanitize_object_type(obj_type): if not is_valid_object_type(obj_type): - return ({'status': 'error', 'reason': 'Incorrect object type'}, 400) + return {'status': 'error', 'reason': 'Incorrect object type'}, 400 + +def get_obj_correlations(obj_type, subtype, id): + obj = get_object(obj_type, subtype, id) + return obj.get_correlations() ################################################################################ # DATA RETENTION diff --git a/bin/lib/queues_modules.py b/bin/lib/queues_modules.py index a3977e10..5a1e87c6 100755 --- a/bin/lib/queues_modules.py +++ b/bin/lib/queues_modules.py @@ -3,11 +3,13 @@ import os import sys -import redis import datetime -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import ConfigLoader +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_serv_queues = config_loader.get_redis_conn("Redis_Queues") diff --git a/bin/lib/regex_helper.py b/bin/lib/regex_helper.py index edaff949..1b7149c4 100755 --- a/bin/lib/regex_helper.py +++ b/bin/lib/regex_helper.py @@ -15,9 +15,12 @@ from multiprocessing import Process as Proc sys.path.append(os.environ['AIL_BIN']) from pubsublogger import publisher -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import ConfigLoader -import Statistics +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader +from lib import Statistics ## LOAD CONFIG ## config_loader = ConfigLoader.ConfigLoader() diff --git a/bin/modules/Cryptocurrencies.py b/bin/modules/Cryptocurrencies.py new file mode 100755 index 00000000..f77551ff --- /dev/null +++ b/bin/modules/Cryptocurrencies.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +The Cryptocurrency Module +============================ + +It trying to extract cryptocurrencies address and secret key from items + + ..seealso:: Paste method (get_regex) + +Requirements +------------ + +*Need running Redis instances. (Redis). + +""" + +################################## +# Import External packages +################################## +import os +import sys +from abc import ABC + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from modules.abstract_module import AbstractModule +from lib.objects.CryptoCurrencies import CryptoCurrency +from lib.objects.Items import Item + +################################## +################################## +default_max_execution_time = 30 +CURRENCIES = { + 'bitcoin': { + 'name': 'bitcoin', # e.g. 1NbEPRwbBZrFDsx1QW19iDs8jQLevzzcms + 'regex': r'\b(? last_seen: - r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'last_seen', date) - - def save_item_correlation(self, subtype, obj_id, item_id, item_date): - self.update_correlation_daterange(subtype, obj_id, item_date) - - # global set - r_serv_metadata.sadd('set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), item_id) - - # daily - r_serv_metadata.hincrby('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id, 1) - - # all type - r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, 1) - - ## object_metadata - # item - r_serv_metadata.sadd('item_{}_{}:{}'.format(self.correlation_name, subtype, item_id), obj_id) - - # domain - if item_basic.is_crawled(item_id): - domain = item_basic.get_item_domain(item_id) - self.save_domain_correlation(domain, subtype, obj_id) - - def delete_item_correlation(self, subtype, obj_id, item_id, item_date): - #self.update_correlation_daterange(subtype, obj_id, item_date) update daterange ! # # TODO: - r_serv_metadata.srem('set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), item_id) - r_serv_metadata.srem('item_{}_{}:{}'.format(self.correlation_name, subtype, item_id), obj_id) - - res = r_serv_metadata.hincrby('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id, -1) - if int(res) < 0: # remove last - r_serv_metadata.hdel('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id) - - res = r_serv_metadata.zscore('{}_all:{}'.format(self.correlation_name, subtype), obj_id) - if int(res) > 0: - r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, -1) - - def save_domain_correlation(self, domain, subtype, obj_id): - r_serv_metadata.sadd('domain_{}_{}:{}'.format(self.correlation_name, subtype, domain), obj_id) - r_serv_metadata.sadd('set_domain_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), domain) - - def delete_domain_correlation(self, domain, subtype, obj_id): - r_serv_metadata.srem('domain_{}_{}:{}'.format(self.correlation_name, subtype, domain), obj_id) - r_serv_metadata.srem('set_domain_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), domain) - - def save_correlation(self, subtype, obj_id, date_range): - r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, 0) - self.update_correlation_daterange(subtype, obj_id, date_range['date_from']) - if date_range['date_from'] != date_range['date_to']: - self.update_correlation_daterange(subtype, obj_id, date_range['date_to']) - return True - - def save_obj_relationship(self, subtype, obj_id, obj2_type, obj2_id): - if obj2_type == 'domain': - self.save_domain_correlation(obj2_id, subtype, obj_id) - elif obj2_type == 'item': - self.save_item_correlation(subtype, obj_id, obj2_id, item_basic.get_item_date(obj2_id)) - - def delete_obj_relationship(self, subtype, obj_id, obj2_type, obj2_id): - if obj2_type == 'domain': - self.delete_domain_correlation(obj2_id, subtype, obj_id) - elif obj2_type == 'item': - self.delete_item_correlation(subtype, obj_id, obj2_id, item_basic.get_item_date(obj2_id)) - - def create_correlation(self, subtype, obj_id, obj_meta): - res = self.sanythise_correlation_types([subtype], r_boolean=True) - if not res: - print('invalid subtype') - return False - first_seen = obj_meta.get('first_seen', None) - last_seen = obj_meta.get('last_seen', None) - date_range = Date.sanitise_date_range(first_seen, last_seen, separator='', date_type='datetime') - res = self.save_correlation(subtype, obj_id, date_range) - if res and 'tags' in obj_meta: - # # TODO: handle mixed tags: taxonomies and Galaxies - pass - #Tag.api_add_obj_tags(tags=obj_meta['tags'], object_id=obj_id, object_type=self.get_correlation_obj_type()) - return True - - # # TODO: handle tags - def delete_correlation(self, subtype, obj_id): - res = self.sanythise_correlation_types([subtype], r_boolean=True) - if not res: - print('invalid subtype') - return False - if not self.exist_correlation(subtype, obj_id): - return False - - obj_correlations = self.get_correlation_all_object(subtype, obj_id) - if 'domain' in obj_correlations: - for domain in obj_correlations['domain']: - r_serv_metadata.srem('domain_{}_{}:{}'.format(self.correlation_name, subtype, domain), obj_id) - r_serv_metadata.delete('set_domain_{}_{}:{}'.format(self.correlation_name, subtype, obj_id)) - - - if 'paste' in obj_correlations: # TODO: handle item - for item_id in obj_correlations['paste']: - - r_serv_metadata.srem('item_{}_{}:{}'.format(self.correlation_name, subtype, item_id), obj_id) - r_serv_metadata.delete('set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id)) - - # delete daily correlation - first_seen = self.get_correlation_first_seen(subtype, obj_id) - last_seen = self.get_correlation_last_seen(subtype, obj_id) - meta_date = Date.sanitise_date_range(first_seen, last_seen) - date_range = Date.substract_date(meta_date['date_from'], meta_date['date_to']) - for date_day in date_range: - r_serv_metadata.hdel('{}:{}:{}'.format(self.correlation_name, subtype, date_day), obj_id) - - r_serv_metadata.delete('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id)) - r_serv_metadata.zrem('{}_all:{}'.format(self.correlation_name, subtype), obj_id) - - return True - - ######## API EXPOSED ######## - def api_check_objs_type(self, l_types): - for obj_type in l_types: - if not self.is_valid_obj_subtype(obj_type): - return ({"error": f"Invalid Type: {obj_type}"}, 400) - - ######## ######## diff --git a/bin/packages/Cryptocurrency.py b/bin/packages/Cryptocurrency.py deleted file mode 100755 index 9ed8ec59..00000000 --- a/bin/packages/Cryptocurrency.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import redis - -from hashlib import sha256 - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import Correlation - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader - -config_loader = ConfigLoader.ConfigLoader() -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -config_loader = None - -digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' - -cryptocurrency = Correlation.Correlation('cryptocurrency', ['bitcoin', 'ethereum', 'bitcoin-cash', 'litecoin', 'monero', 'zcash', 'dash']) - -# http://rosettacode.org/wiki/Bitcoin/address_validation#Python -def decode_base58(bc, length): - n = 0 - for char in bc: - n = n * 58 + digits58.index(char) - return n.to_bytes(length, 'big') - -# http://rosettacode.org/wiki/Bitcoin/address_validation#Python -def check_base58_address(bc): - try: - bcbytes = decode_base58(bc, 25) - return bcbytes[-4:] == sha256(sha256(bcbytes[:-4]).digest()).digest()[:4] - except Exception: - return False - -def verify_cryptocurrency_address(cryptocurrency_type, cryptocurrency_address): - if cryptocurrency_type in ('bitcoin', 'litecoin', 'dash'): - return check_base58_address(cryptocurrency_address) - else: - return True - - -def get_cryptocurrency(request_dict, cryptocurrency_type): - # basic verification - res = cryptocurrency.verify_correlation_field_request(request_dict, cryptocurrency_type) - if res: - return res - # cerify address - field_name = request_dict.get(cryptocurrency_type) - if not verify_cryptocurrency_address(cryptocurrency_type, field_name): - return ( {'status': 'error', 'reason': 'Invalid Cryptocurrency address'}, 400 ) - - return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name) - - -def get_cryptocurrency_symbol(crypto_type): - if crypto_type=='bitcoin': - return 'BTC' - elif crypto_type=='ethereum': - return 'ETH' - elif crypto_type=='bitcoin-cash': - return 'BCH' - elif crypto_type=='litecoin': - return 'LTC' - elif crypto_type=='monero': - return 'XMR' - elif crypto_type=='zcash': - return 'ZEC' - elif crypto_type=='dash': - return 'DASH' - return None - -def get_cryptocurrency_type(crypto_symbol): - if crypto_symbol=='BTC': - return 'bitcoin' - elif crypto_symbol=='ETH': - return 'ethereum' - elif crypto_symbol=='BCH': - return 'bitcoin-cash' - elif crypto_symbol=='LTC': - return 'litecoin' - elif crypto_symbol=='XMR': - return 'monero' - elif crypto_symbol=='ZEC': - return 'zcash' - elif crypto_symbol=='DASH': - return 'dash' - return None diff --git a/bin/packages/HiddenServices.py b/bin/packages/HiddenServices.py deleted file mode 100755 index 6cd59d51..00000000 --- a/bin/packages/HiddenServices.py +++ /dev/null @@ -1,342 +0,0 @@ -#!/usr/bin/python3 - -""" -The ``hiddenServices Class`` -=================== - -Use it to create an object from an existing paste or other random file. - -Conditions to fulfill to be able to use this class correctly: -------------------------------------------------------------- - -1/ The paste need to be saved on disk somewhere (have an accessible path) -2/ The paste need to be gziped. -3/ The filepath need to look like something like this: - /directory/source/year/month/day/paste.gz - -""" - -import os -import sys -import time -import gzip -import redis -import random - -from io import BytesIO -import zipfile - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -from Date import Date - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader - -class HiddenServices(object): - """ - This class representing a hiddenServices as an object. - When created, the object will have by default some "main attributes" - - :Example: - - PST = HiddenServices("xxxxxxxx.onion", "onion") - - """ - - def __init__(self, domain, type, port=80): - - config_loader = ConfigLoader.ConfigLoader() - self.r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - self.r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") - - self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' - - self.domain = domain - self.type = type - self.port = port - self.tags = {} - - if type == 'onion' or type == 'regular': - self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) - self.paste_crawled_directory = os.path.join(self.paste_directory, config_loader.get_config_str("Directories", "crawled")) - self.paste_crawled_directory_name = config_loader.get_config_str("Directories", "crawled") - self.screenshot_directory = config_loader.get_files_directory('screenshot') - elif type == 'i2p': - self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) - self.screenshot_directory = config_loader.get_files_directory('screenshot') - else: - ## TODO: # FIXME: add error - pass - - config_loader = None - - #def remove_absolute_path_link(self, key, value): - # print(key) - # print(value) - - def update_item_path_children(self, key, children): - if self.PASTES_FOLDER in children: - self.r_serv_metadata.srem(key, children) - children = children.replace(self.PASTES_FOLDER, '', 1) - self.r_serv_metadata.sadd(key, children) - return children - - def get_origin_paste_name(self): - origin_item = self.r_serv_onion.hget('onion_metadata:{}'.format(self.domain), 'paste_parent') - if origin_item is None: - return '' - elif origin_item == 'auto' or origin_item == 'manual': - return origin_item - return origin_item.replace(self.paste_directory+'/', '') - - def get_domain_tags(self, update=False): - if not update: - return self.tags - else: - self.get_last_crawled_pastes() - return self.tags - - def update_domain_tags(self, item): - if item: - - if self.r_serv_metadata.exists('tag:{}'.format(item)): - p_tags = self.r_serv_metadata.smembers('tag:{}'.format(item)) - # update path here - else: - # need to remove it - if self.paste_directory in item: - p_tags = self.r_serv_metadata.smembers('tag:{}'.format(item.replace(self.paste_directory+'/', ''))) - # need to remove it - else: - p_tags = self.r_serv_metadata.smembers('tag:{}'.format(os.path.join(self.paste_directory, item))) - for tag in p_tags: - self.tags[tag] = self.tags.get(tag, 0) + 1 - - def extract_epoch_from_history(self, crawled_history): - epoch_list = [] - for res, epoch_val in crawled_history: - epoch_val = int(epoch_val) # force int - try: - # domain down - if int(res) == epoch_val: - status = False - # domain up - else: - status = True - except ValueError: - status = True - epoch_val = int(epoch_val) # force int - epoch_list.append((epoch_val, time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch_val)), status)) - return epoch_list - - def get_domain_crawled_history(self): - return self.r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(self.type, self.domain, self.port), 0, -1, withscores=True) - - def get_first_crawled(self): - res = self.r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(self.type, self.domain, self.port), 0, 0, withscores=True) - if res: - res = res[0] - return {'root_item':res[0], 'epoch':int(res[1])} - else: - return {} - - def get_last_crawled(self): - res = self.r_serv_onion.zrevrange('crawler_history_{}:{}:{}'.format(self.type, self.domain, self.port), 0, 0, withscores=True) - if res: - return {'root_item':res[0][0], 'epoch':res[0][1]} - else: - return {} - - #todo use the right paste - def get_domain_crawled_core_item(self, epoch=None): - core_item = {} - if epoch: - list_root = self.r_serv_onion.zrevrangebyscore('crawler_history_{}:{}:{}'.format(self.type, self.domain, self.port), int(epoch), int(epoch)) - if list_root: - core_item['root_item'] = list_root[0] - core_item['epoch'] = epoch - return core_item - - # no history found for this epoch - if not core_item: - return self.get_last_crawled() - - #todo use the right paste - def get_last_crawled_pastes(self, item_root=None): - if item_root is None: - item_root = self.get_domain_crawled_core_item() - if item_root: - item_root = item_root['root_item'] - return self.get_all_pastes_domain(item_root) - - def get_all_pastes_domain(self, root_item): - if root_item is None: - return [] - l_crawled_pastes = [] - l_crawled_pastes = self.get_item_crawled_children(root_item) - l_crawled_pastes.append(root_item) - self.update_domain_tags(root_item) - return l_crawled_pastes - - def get_item_crawled_children(self, father): - if father is None: - return [] - l_crawled_pastes = [] - key = 'paste_children:{}'.format(father) - paste_childrens = self.r_serv_metadata.smembers(key) - for children in paste_childrens: - children = self.update_item_path_children(key, children) - if self.domain in children: - l_crawled_pastes.append(children) - self.update_domain_tags(children) - l_crawled_pastes.extend(self.get_item_crawled_children(children)) - return l_crawled_pastes - - def get_item_link(self, item): - link = self.r_serv_metadata.hget('paste_metadata:{}'.format(item), 'real_link') - if link is None: - if self.paste_directory in item: - self.r_serv_metadata.hget('paste_metadata:{}'.format(item.replace(self.paste_directory+'/', '')), 'real_link') - else: - key = os.path.join(self.paste_directory, item) - link = self.r_serv_metadata.hget('paste_metadata:{}'.format(key), 'real_link') - #if link: - #self.remove_absolute_path_link(key, link) - - return link - - def get_all_links(self, l_items): - dict_links = {} - for item in l_items: - link = self.get_item_link(item) - if link: - dict_links[item] = link - return dict_links - - # experimental - def get_domain_son(self, l_paste): - if l_paste is None: - return None - - set_domain = set() - for paste in l_paste: - paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(paste)) - for children in paste_childrens: - if not self.domain in children: - set_domain.add((children.split('.onion')[0]+'.onion').split('/')[-1]) - - return set_domain - - ''' - def get_all_domain_son(self, father): - if father is None: - return [] - l_crawled_pastes = [] - paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(father)) - for children in paste_childrens: - if not self.domain in children: - l_crawled_pastes.append(children) - #self.update_domain_tags(children) - l_crawled_pastes.extend(self.get_all_domain_son(children)) - - return l_crawled_pastes - ''' - - def get_item_screenshot(self, item): - screenshot = self.r_serv_metadata.hget('paste_metadata:{}'.format(item), 'screenshot') - if screenshot: - screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot[8:10], screenshot[10:12], screenshot[12:]) - return screenshot - return '' - - def get_domain_random_screenshot(self, l_crawled_pastes, num_screenshot = 1): - l_screenshot_paste = [] - for paste in l_crawled_pastes: - ## FIXME: # TODO: remove me - origin_paste = paste - paste= paste.replace(self.paste_directory+'/', '') - - screenshot = self.get_item_screenshot(paste) - if screenshot: - l_screenshot_paste.append({'screenshot': screenshot, 'item': origin_paste}) - - if len(l_screenshot_paste) > num_screenshot: - l_random_screenshot = [] - for index in random.sample( range(0, len(l_screenshot_paste)), num_screenshot ): - l_random_screenshot.append(l_screenshot_paste[index]) - return l_random_screenshot - else: - return l_screenshot_paste - - def get_all_domain_screenshot(self, l_crawled_pastes, filename=False): - l_screenshot_paste = [] - for paste in l_crawled_pastes: - ## FIXME: # TODO: remove me - origin_paste = paste - paste= paste.replace(self.paste_directory+'/', '') - - screenshot = self.get_item_screenshot(paste) - if screenshot: - screenshot = screenshot + '.png' - screenshot_full_path = os.path.join(self.screenshot_directory_screenshot, screenshot) - if filename: - screen_file_name = os.path.basename(paste) + '.png' - l_screenshot_paste.append( (screenshot_full_path, screen_file_name) ) - else: - l_screenshot_paste.append(screenshot_full_path) - return l_screenshot_paste - - def get_all_item_full_path(self, l_items, filename=False): - l_full_items = [] - for item in l_items: - item = os.path.join(self.PASTES_FOLDER, item) - if filename: - file_name = os.path.basename(item) + '.gz' - l_full_items.append( (item, file_name) ) - else: - l_full_items.append(item) - return l_full_items - - def get_crawled_pastes_by_date(self, date): - - pastes_path = os.path.join(self.paste_crawled_directory, date[0:4], date[4:6], date[6:8]) - paste_parent = self.r_serv_onion.hget('onion_metadata:{}'.format(self.domain), 'last_check') - - l_crawled_pastes = [] - return l_crawled_pastes - - def get_all_har(self, l_pastes, filename=False): - all_har = [] - for item in l_pastes: - if filename: - all_har.append( (self.get_item_har(item), os.path.basename(item) + '.json') ) - else: - all_har.append(self.get_item_har(item)) - return all_har - - - def get_item_har(self, item_path): - item_path = item_path.replace('{}/'.format(self.paste_crawled_directory_name), '', 1) - har_path = os.path.join(self.screenshot_directory, item_path) + '.json' - return har_path - - - def get_metadata_file(self, list_items): - file_content = '' - dict_url = self.get_all_links(list_items) - for key in dict_url: - file_content = '{}\n{} : {}'.format(file_content, os.path.basename(key), dict_url[key]) - return file_content - - - ''' - def get_last_crawled_pastes_fileSearch(self): - - last_check = self.r_serv_onion.hget('onion_metadata:{}'.format(self.domain), 'last_check') - return self.get_crawled_pastes_by_date_fileSearch(last_check) - - def get_crawled_pastes_by_date_fileSearch(self, date): - pastes_path = os.path.join(self.paste_crawled_directory, date[0:4], date[4:6], date[6:8]) - l_crawled_pastes = [f for f in os.listdir(pastes_path) if self.domain in f] - return l_crawled_pastes - ''' diff --git a/bin/packages/Import_helper.py b/bin/packages/Import_helper.py index 8a1dd840..b997d3e2 100755 --- a/bin/packages/Import_helper.py +++ b/bin/packages/Import_helper.py @@ -7,13 +7,12 @@ import os import sys import uuid -import redis +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() diff --git a/bin/packages/Item.py b/bin/packages/Item.py deleted file mode 100755 index 336a565b..00000000 --- a/bin/packages/Item.py +++ /dev/null @@ -1,695 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import base64 -import os -import re -import sys -import redis -import cld3 -import html2text - -from io import BytesIO - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Date -import Tag -import Cryptocurrency -import Pgp - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import item_basic -import domain_basic -import ConfigLoader -import Correlate_object -import Decoded -import Screenshot -import Username - -from objects.abstract_object import AbstractObject -from item_basic import * - -config_loader = ConfigLoader.ConfigLoader() -# get and sanityze PASTE DIRECTORY -# # TODO: rename PASTES_FOLDER -PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' -PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '') - -r_cache = config_loader.get_redis_conn("Redis_Cache") -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -screenshot_directory = config_loader.get_files_directory('screenshot') -har_directory = config_loader.get_files_directory('har') - -config_loader = None - -def exist_item(item_id): - return item_basic.exist_item(item_id) - -def get_basename(item_id): - return os.path.basename(item_id) - -def get_item_id(full_path): - return full_path.replace(PASTES_FOLDER, '', 1) - -def get_item_filepath(item_id): - return item_basic.get_item_filepath(item_id) - -def get_item_date(item_id, add_separator=False): - return item_basic.get_item_date(item_id, add_separator=add_separator) - -def get_source(item_id): - return item_basic.get_source(item_id) - -def get_all_sources(): - return item_basic.get_all_items_sources(r_list=True) - -def get_item_basename(item_id): - return os.path.basename(item_id) - -def get_item_size(item_id): - return round(os.path.getsize(os.path.join(PASTES_FOLDER, item_id))/1024.0, 2) - -def get_item_encoding(item_id): - return None - -def get_lines_info(item_id, item_content=None): - if not item_content: - item_content = get_item_content(item_id) - max_length = 0 - line_id = 0 - nb_line = 0 - for line in item_content.splitlines(): - length = len(line) - if length > max_length: - max_length = length - nb_line += 1 - return {'nb': nb_line, 'max_length': max_length} - - -def get_item_metadata(item_id, item_content=None): - ## TODO: FIXME ##performance - # encoding - # language - # lines info - item_metadata = {'date': get_item_date(item_id, add_separator=True), - 'source': get_source(item_id), - 'size': get_item_size(item_id), - 'encoding': get_item_encoding(item_id), - 'lines': get_lines_info(item_id, item_content=item_content) - } - return item_metadata - -def get_item_parent(item_id): - return item_basic.get_item_parent(item_id) - -def add_item_parent(item_parent, item_id): - return item_basic.add_item_parent(item_parent, item_id) - -def get_item_content(item_id): - return item_basic.get_item_content(item_id) - -def get_item_content_html2text(item_id, item_content=None, ignore_links=False): - if not item_content: - item_content = get_item_content(item_id) - h = html2text.HTML2Text() - h.ignore_links = ignore_links - h.ignore_images = ignore_links - return h.handle(item_content) - -def remove_all_urls_from_content(item_id, item_content=None): - if not item_content: - item_content = get_item_content(item_id) - regex = r'\b(?:http://|https://)?(?:[a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*\b' - url_regex = re.compile(regex) - urls = url_regex.findall(item_content) - urls = sorted(urls, key=len, reverse=True) - for url in urls: - item_content = item_content.replace(url, '') - - regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----' - regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----' - regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----' - re.compile(regex_pgp_public_blocs) - re.compile(regex_pgp_signature) - re.compile(regex_pgp_message) - - res = re.findall(regex_pgp_public_blocs, item_content) - for it in res: - item_content = item_content.replace(it, '') - res = re.findall(regex_pgp_signature, item_content) - for it in res: - item_content = item_content.replace(it, '') - res = re.findall(regex_pgp_message, item_content) - for it in res: - item_content = item_content.replace(it, '') - - return item_content - -def get_item_languages(item_id, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7): - all_languages = [] - - ## CLEAN CONTENT ## - content = get_item_content_html2text(item_id, ignore_links=True) - content = remove_all_urls_from_content(item_id, item_content=content) - - # REMOVE USELESS SPACE - content = ' '.join(content.split()) - #- CLEAN CONTENT -# - - #print(content) - #print(len(content)) - if len(content) >= min_len: - for lang in cld3.get_frequent_languages(content, num_langs=num_langs): - if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable: - all_languages.append(lang) - return all_languages - -# API -def get_item(request_dict): - if not request_dict: - return {'status': 'error', 'reason': 'Malformed JSON'}, 400 - - item_id = request_dict.get('id', None) - if not item_id: - return {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 - if not exist_item(item_id): - return {'status': 'error', 'reason': 'Item not found'}, 404 - - dict_item = {} - dict_item['id'] = item_id - date = request_dict.get('date', True) - if date: - add_separator = False - if request_dict.get('date_separator', False): - add_separator = True - dict_item['date'] = get_item_date(item_id, add_separator=add_separator) - tags = request_dict.get('tags', True) - if tags: - dict_item['tags'] = Tag.get_obj_tag(item_id) - - size = request_dict.get('size', False) - if size: - dict_item['size'] = get_item_size(item_id) - - content = request_dict.get('content', False) - if content: - # UTF-8 outpout, # TODO: use base64 - dict_item['content'] = get_item_content(item_id) - - raw_content = request_dict.get('raw_content', False) - if raw_content: - dict_item['raw_content'] = get_raw_content(item_id) - - lines_info = request_dict.get('lines', False) - if lines_info: - dict_item['lines'] = get_lines_info(item_id, dict_item.get('content', 'None')) - - if request_dict.get('pgp'): - dict_item['pgp'] = {} - if request_dict['pgp'].get('key'): - dict_item['pgp']['key'] = get_item_pgp_key(item_id) - if request_dict['pgp'].get('mail'): - dict_item['pgp']['mail'] = get_item_pgp_mail(item_id) - if request_dict['pgp'].get('name'): - dict_item['pgp']['name'] = get_item_pgp_name(item_id) - - if request_dict.get('cryptocurrency'): - dict_item['cryptocurrency'] = {} - if request_dict['cryptocurrency'].get('bitcoin'): - dict_item['cryptocurrency']['bitcoin'] = get_item_bitcoin(item_id) - - return dict_item, 200 - - - -def api_get_item_content_base64_utf8(request_dict): - item_id = request_dict.get('id', None) - if not request_dict: - return {'status': 'error', 'reason': 'Malformed JSON'}, 400 - if not item_id: - return {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 - if not exist_item(item_id): - return {'status': 'error', 'reason': 'Item not found'}, 404 - - item_content = get_item_content(item_id) - item_content = base64.b64encode((item_content.encode('utf-8'))).decode('UTF-8') - return {'status': 'success', 'content': item_content}, 200 - - -def api_get_items_sources(): - item_content = {'sources': get_all_sources()} - return item_content, 200 - -# def check_item_source(request_dict): -# source = request_dict.get('source', None) -# if not request_dict: -# return {'status': 'error', 'reason': 'Malformed JSON'}, 400 -# if not source: -# return {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 -# -# all_sources = item_basic.get_all_items_sources() -# -# if source not in all_sources: -# return {'status': 'error', 'reason': 'Invalid source', 'provide': source}, 400 -# return {'status': 'success', 'reason': 'Valid source', 'provide': source}, 200 - -### -### correlation -### -def get_item_cryptocurrency(item_id, currencies_type=None, get_nb=False): - ''' - Return all cryptocurrencies of a given item. - - :param item_id: item id - :param currencies_type: list of cryptocurrencies type - :type currencies_type: list, optional - ''' - return Cryptocurrency.cryptocurrency.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb) - -def get_item_pgp(item_id, currencies_type=None, get_nb=False): - ''' - Return all pgp of a given item. - - :param item_id: item id - :param currencies_type: list of cryptocurrencies type - :type currencies_type: list, optional - ''' - return Pgp.pgp.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb) - -def get_item_username(item_id, sub_type=None, get_nb=False): - ''' - Return all pgp of a given item. - - :param item_id: item id - :param sub_type: list of username type - :type sub_type: list, optional - ''' - return Username.correlation.get_item_correlation_dict(item_id, correlation_type=sub_type, get_nb=get_nb) - -def get_item_decoded(item_id): - ''' - Return all pgp of a given item. - - :param item_id: item id - :param currencies_type: list of cryptocurrencies type - :type currencies_type: list, optional - ''' - return Decoded.get_item_decoded(item_id) - -def get_item_all_screenshot(item_id): - ''' - Return all screenshot of a given item. - - :param item_id: item id - ''' - return Screenshot.get_item_screenshot_list(item_id) - -def get_item_all_correlation(item_id, correlation_names=[], get_nb=False): - ''' - Retun all correlation of a given item id. - - :param item_id: item id - :type domain: str - - :return: a dict of all correlation for a item id - :rtype: dict - ''' - if not correlation_names: - correlation_names = Correlate_object.get_all_correlation_names() - item_correl = {} - for correlation_name in correlation_names: - if correlation_name=='cryptocurrency': - res = get_item_cryptocurrency(item_id, get_nb=get_nb) - elif correlation_name=='pgp': - res = get_item_pgp(item_id, get_nb=get_nb) - elif correlation_name=='username': - res = get_item_username(item_id, get_nb=get_nb) - elif correlation_name=='decoded': - res = get_item_decoded(item_id) - elif correlation_name=='screenshot': - res = get_item_all_screenshot(item_id) - else: - res = None - # add correllation to dict - if res: - item_correl[correlation_name] = res - return item_correl - - - -## TODO: REFRACTOR -def _get_item_correlation(correlation_name, correlation_type, item_id): - res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id)) - if res: - return list(res) - else: - return [] - -## TODO: REFRACTOR -def get_item_bitcoin(item_id): - return _get_item_correlation('cryptocurrency', 'bitcoin', item_id) - -## TODO: REFRACTOR -def get_item_pgp_key(item_id): - return _get_item_correlation('pgpdump', 'key', item_id) - -## TODO: REFRACTOR -def get_item_pgp_name(item_id): - return _get_item_correlation('pgpdump', 'name', item_id) - -## TODO: REFRACTOR -def get_item_pgp_mail(item_id): - return _get_item_correlation('pgpdump', 'mail', item_id) - -## TODO: REFRACTOR -def get_item_pgp_correlation(item_id): - pass - -### -### GET Internal Module DESC -### -def get_item_list_desc(list_item_id): - desc_list = [] - for item_id in list_item_id: - desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_obj_tag(item_id)} ) - return desc_list - -def is_crawled(item_id): - return item_basic.is_crawled(item_id) - -def get_crawler_matadata(item_id, ltags=None): - dict_crawler = {} - if is_crawled(item_id): - dict_crawler['domain'] = get_item_domain(item_id) - if not ltags: - ltags = Tag.get_obj_tag(item_id) - dict_crawler['is_tags_safe'] = Tag.is_tags_safe(ltags) - dict_crawler['url'] = get_item_link(item_id) - dict_crawler['screenshot'] = get_item_screenshot(item_id) - dict_crawler['har'] = get_item_har_name(item_id) - return dict_crawler - -def is_onion(item_id): - is_onion = False - if len(is_onion) > 62: - if is_crawled(item_id) and item_id[-42:-36] == '.onion': - is_onion = True - return is_onion - -def is_item_in_domain(domain, item_id): - is_in_domain = False - domain_lenght = len(domain) - if len(item_id) > (domain_lenght+48): - if item_id[-36-domain_lenght:-36] == domain: - is_in_domain = True - return is_in_domain - -def get_item_domain(item_id): - return item_basic.get_item_domain(item_id) - -def get_domain(item_id): - item_id = item_id.split('/') - item_id = item_id[-1] - return item_id[:-36] - -def get_item_domain_with_port(item_id): - return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'domain') - -def get_item_link(item_id): - return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'real_link') - -def get_item_screenshot(item_id): - screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot') - if screenshot: - return os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot[8:10], screenshot[10:12], screenshot[12:]) - return '' - -def get_item_har_name(item_id): - har_path = os.path.join(har_directory, item_id) + '.json' - if os.path.isfile(har_path): - return har_path - else: - return None - -def get_item_har(har_path): - pass - -def get_item_filename(item_id): - # Creating the full filepath - filename = os.path.join(PASTES_FOLDER, item_id) - filename = os.path.realpath(filename) - - # incorrect filename - if not os.path.commonprefix([filename, PASTES_FOLDER]) == PASTES_FOLDER: - return None - else: - return filename - -def get_item_duplicate(item_id, r_list=True): - res = r_serv_metadata.smembers('dup:{}'.format(item_id)) - if r_list: - if res: - return list(res) - else: - return [] - return res - -def get_item_nb_duplicates(item_id): - return r_serv_metadata.scard('dup:{}'.format(item_id)) - -def get_item_duplicates_dict(item_id): - dict_duplicates = {} - for duplicate in get_item_duplicate(item_id): - duplicate = duplicate[1:-1].replace('\'', '').replace(' ', '').split(',') - duplicate_id = duplicate[1] - if not duplicate_id in dict_duplicates: - dict_duplicates[duplicate_id] = {'date': get_item_date(duplicate_id, add_separator=True), 'algo': {}} - algo = duplicate[0] - if algo == 'tlsh': - similarity = 100 - int(duplicate[2]) - else: - similarity = int(duplicate[2]) - dict_duplicates[duplicate_id]['algo'][algo] = similarity - return dict_duplicates - -def add_item_duplicate(item_id, l_dup): - for item_dup in l_dup: - r_serv_metadata.sadd('dup:{}'.format(item_dup), item_id) - r_serv_metadata.sadd('dup:{}'.format(item_id), item_dup) - -def delete_item_duplicate(item_id): - item_dup = get_item_duplicate(item_id) - for item_dup in get_item_duplicate(item_id): - r_serv_metadata.srem('dup:{}'.format(item_dup), item_id) - r_serv_metadata.delete('dup:{}'.format(item_id)) - -def get_raw_content(item_id): - filepath = get_item_filepath(item_id) - with open(filepath, 'rb') as f: - file_content = BytesIO(f.read()) - return file_content - -def save_raw_content(item_id, io_content): - filepath = get_item_filename(item_id) - if os.path.isfile(filepath): - #print('File already exist') - return False - # create subdir - dirname = os.path.dirname(filepath) - if not os.path.exists(dirname): - os.makedirs(dirname) - # # TODO: check if is IO file - with open(filepath, 'wb') as f: - f.write(io_content.getvalue()) - return True - -# IDEA: send item to duplicate ? -def create_item(obj_id, obj_metadata, io_content): - ''' - Create a new Item (Import or Test only). - - :param obj_id: item id - :type obj_metadata: dict - 'first_seen', 'tags' - - :return: is item created - :rtype: boolean - ''' - # check if datetime match ?? - - - # # TODO: validate obj_id - - res = save_raw_content(obj_id, io_content) - # item saved - if res: - # creata tags - if 'tags' in obj_metadata: - # # TODO: handle mixed tags: taxonomies and Galaxies - Tag.api_add_obj_tags(tags=obj_metadata['tags'], object_id=obj_id, object_type="item") - return True - - # Item not created - return False - -def delete_item(obj_id): - # check if item exists - if not exist_item(obj_id): - return False - else: - delete_item_duplicate(obj_id) - # delete MISP event - r_serv_metadata.delete('misp_events:{}'.format(obj_id)) - r_serv_metadata.delete('hive_cases:{}'.format(obj_id)) - - os.remove(get_item_filename(obj_id)) - - # get all correlation - obj_correlations = get_item_all_correlation(obj_id) - for correlation in obj_correlations: - if correlation=='cryptocurrency' or correlation=='pgp': - for obj2_subtype in obj_correlations[correlation]: - for obj2_id in obj_correlations[correlation][obj2_subtype]: - Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id, - obj1_subtype=obj2_subtype) - else: - for obj2_id in obj_correlations[correlation]: - Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id) - - # delete father/child - delete_node(obj_id) - - # delete item metadata - r_serv_metadata.delete('paste_metadata:{}'.format(obj_id)) - - return True - - ### TODO in inport V2 - # delete from tracked items - - # # # TODO: # FIXME: LATER - # delete from queue - ### - return False - -#### #### -def delete_node(item_id): - if is_node(item_id): - if is_crawled(item_id): - delete_domain_node(item_id) - item_basic._delete_node(item_id) - -def delete_domain_node(item_id): - if is_domain_root(item_id): - # remove from domain history - domain, port = get_item_domain_with_port(item_id).split(':') - domain_basic.delete_domain_item_core(item_id, domain, port) - for child_id in get_all_domain_node_by_item_id(item_id): - delete_item(child_id) - - -class Item(AbstractObject): - """ - AIL Item Object. (strings) - """ - - def __init__(self, id): - super(Item, self).__init__('item', id) - - def get_date(self, separator=False): - """ - Returns Item date - """ - return item_basic.get_item_date(self.id, add_separator=separator) - - def get_source(self): - """ - Returns Item source/feeder name - """ - #return self.id.split('/')[-5] - l_source = self.id.split('/')[:-4] - return os.path.join(*l_source) - - def get_basename(self): - return os.path.basename(self.id) - - def get_filename(self): - # Creating the full filepath - filename = os.path.join(PASTES_FOLDER, self.id) - filename = os.path.realpath(filename) - - # incorrect filename - if not os.path.commonprefix([filename, PASTES_FOLDER]) == PASTES_FOLDER: - return None - else: - return filename - - def get_content(self): - """ - Returns Item content - """ - return item_basic.get_item_content(self.id) - - def get_gzip_content(self, b64=False): - with open(self.get_filename(), 'rb') as f: - content = f.read() - if b64: - content = base64.b64encode(content) - return content.decode() - - def get_ail_2_ail_payload(self): - payload = {'raw': self.get_gzip_content(b64=True)} - return payload - - # # TODO: - def create(self): - pass - - # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ - # TODO: DELETE ITEM CORRELATION + TAGS + METADATA + ... - def delete(self): - try: - os.remove(self.get_filename()) - return True - except FileNotFoundError: - return False - - ############################################################################ - ############################################################################ - ############################################################################ - - def exist_correlation(self): - pass - - def get_link(self, flask_context=False): - pass - - def get_svg_icon(self): - pass - - def get_misp_object(self): - pass - - ############################################################################ - ############################################################################ - ############################################################################ - ############################################################################ - ############################################################################ - ############################################################################ - ############################################################################ - ############################################################################ - -#if __name__ == '__main__': - - -# import Domain -# domain = Domain.Domain('domain.onion') -# for domain_history in domain.get_domain_history(): -# domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag -# if "items" in domain_item: -# for item_dict in domain_item['items']: -# item_id = item_dict['id'] -# print(item_id) -# for lang in get_item_languages(item_id, min_proportion=0.2, min_probability=0.8): -# print(lang) -# print() -# print(get_item_languages(item_id, min_proportion=0.2, min_probability=0.6)) # 0.7 ? diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py deleted file mode 100755 index 65c3ca46..00000000 --- a/bin/packages/Paste.py +++ /dev/null @@ -1,430 +0,0 @@ -#!/usr/bin/python3 - -""" -The ``Paste Class`` -=================== - -Use it to create an object from an existing paste or other random file. - -Conditions to fulfill to be able to use this class correctly: -------------------------------------------------------------- - -1/ The paste need to be saved on disk somewhere (have an accessible path) -2/ The paste need to be gziped. -3/ The filepath need to look like something like this: - /directory/source/year/month/day/paste.gz - -""" - -import os -import re -import sys -import magic -import gzip -import redis -import operator -import string -import json -from io import StringIO -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -from Date import Date -from Hash import Hash - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader - -from langid.langid import LanguageIdentifier, model - -from nltk.tokenize import RegexpTokenizer -from textblob import TextBlob - -clean = lambda dirty: ''.join(filter(string.printable.__contains__, dirty)) -"""It filters out non-printable characters from the string it receives.""" - - -class Paste(object): - """ - This class representing a Paste as an object. - When created, the object will have by default some "main attributes" - such as the size or the date of the paste already calculated, whereas other - attributes are not set and need to be "asked to be calculated" by their - methods. - It was design like this because some attributes take time to be calculated - such as the langage or the duplicate... - - :Example: - - PST = Paste("/home/2013/01/12/ZEeGaez5.gz") - - """ - - def __init__(self, p_path): - - config_loader = ConfigLoader.ConfigLoader() - self.cache = config_loader.get_redis_conn("Redis_Queues") - self.store_metadata = config_loader.get_redis_conn("ARDB_Metadata") - - self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) - if self.PASTES_FOLDER not in p_path: - self.p_rel_path = p_path - self.p_path = os.path.join(self.PASTES_FOLDER, p_path) - else: - self.p_path = p_path - self.p_rel_path = p_path.replace(self.PASTES_FOLDER+'/', '', 1) - - self.p_name = os.path.basename(self.p_path) - self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2) - self.p_mime = magic.from_buffer("test", mime=True) - self.p_mime = magic.from_buffer(self.get_p_content(), mime=True) - - # Assuming that the paste will alway be in a day folder which is itself - # in a month folder which is itself in a year folder. - # /year/month/day/paste.gz - - var = self.p_path.split('/') - self.p_date = Date(var[-4], var[-3], var[-2]) - self.p_date_path = os.path.join(var[-4], var[-3], var[-2], self.p_name) - self.p_source = var[-5] - self.supposed_url = 'https://{}/{}'.format(self.p_source.replace('_pro', ''), var[-1].split('.gz')[0]) - - self.p_encoding = None - self.p_hash_kind = {} - self.p_hash = {} - self.p_langage = None - self.p_nb_lines = None - self.p_max_length_line = None - self.array_line_above_threshold = None - self.p_duplicate = None - self.p_tags = None - - def get_item_dict(self): - dict_item = {} - dict_item['id'] = self.p_rel_path - dict_item['date'] = str(self.p_date) - dict_item['content'] = self.get_p_content() - tags = self._get_p_tags() - if tags: - dict_item['tags'] = tags - return dict_item - - - def get_p_content(self): - """ - Returning the content of the Paste - - :Example: - - PST.get_p_content() - - """ - - try: - paste = self.cache.get(self.p_path) - except UnicodeDecodeError: - paste = None - except Exception as e: - print("ERROR in: " + self.p_path) - print(e) - paste = None - - if paste is None: - try: - with gzip.open(self.p_path, 'r') as f: - paste = f.read() - self.cache.set(self.p_path, paste) - self.cache.expire(self.p_path, 300) - except: - paste = '' - - return str(paste) - - def get_p_content_as_file(self): - message = StringIO(self.get_p_content()) - return message - - def get_p_content_with_removed_lines(self, threshold): - num_line_removed = 0 - line_length_threshold = threshold - string_content = "" - f = self.get_p_content_as_file() - line_id = 0 - for line_id, line in enumerate(f): - length = len(line) - - if length < line_length_threshold: - string_content += line - else: - num_line_removed+=1 - - return (num_line_removed, string_content) - - def get_lines_info(self): - """ - Returning and setting the number of lines and the maximum lenght of the - lines of the paste. - - :return: tuple (#lines, max_length_line) - - :Example: PST.get_lines_info() - - """ - if self.p_nb_lines is None or self.p_max_length_line is None: - max_length_line = 0 - f = self.get_p_content_as_file() - line_id = 0 - for line_id, line in enumerate(f): - length = len(line) - if length >= max_length_line: - max_length_line = length - - f.close() - self.p_nb_lines = line_id - self.p_max_length_line = max_length_line - - return (self.p_nb_lines, self.p_max_length_line) - - def _get_p_encoding(self): - """ - Setting the encoding of the paste. - - :Example: PST._set_p_encoding() - - """ - return self.p_mime - - def _set_p_hash_kind(self, hashkind): - """ - Setting the hash (as an object) used for futur operation on it. - - :Example: PST._set_p_hash_kind("md5") - - .. seealso:: Hash.py Object to get the available hashs. - - """ - self.p_hash_kind[hashkind] = (Hash(hashkind)) - - def _get_p_hash(self): - """ - Setting the hash of the paste as a kind of "uniq" identificator - - :return: a dictionnary of hash string (md5, sha1....) - - :Example: PST._get_p_hash() - - .. note:: You need first to "declare which kind of hash you want to use - before using this function - .. seealso:: _set_p_hash_kind("md5") - - """ - for hash_name, the_hash in self.p_hash_kind.items(): - self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content().encode()) - return self.p_hash - - def _get_p_language(self): - """ - Returning and setting the language of the paste (guessing) - - :Example: PST._get_p_language() - - ..note:: The language returned is purely guessing and may not be accurate - if the paste doesn't contain any human dictionnary words - ..seealso: git@github.com:saffsd/langid.py.git - - FIXME: This procedure is using more than 20% of CPU - - """ - identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True) - return identifier.classify(self.get_p_content()) - - def _get_p_hash_kind(self): - return self.p_hash_kind - - def _get_p_date(self): - return self.p_date - - # used - def get_p_date(self): - return self.p_date - - def get_item_source(self): - return self.p_source - - def get_item_size(self): - return self.p_size - - def _get_p_size(self): - return self.p_size - - def is_duplicate(self, obj, min=1, percent=50, start=1, jump=10): - """ - Returning the percent of similarity with another paste. - ( Using the previous hashing method ) - - :param obj: (Paste) The paste to compare with - :param min: -- (int) Minimum line length to be hashed. - :param percent: -- (int) - :param start: -- (int) Number the line where to start. - :param jump: -- (int) Granularity of the hashing 0 or 1 means no jumps - (Maximum Granularity) - - :return: (tuple) (bool, percent) - - :Example: - PST.is_duplicate(PST) - - >>> return (True, 100.0) - - ..seealso: _get_hash_lines() - - """ - - set1 = self._get_hash_lines(min, start, jump) - set2 = obj._get_hash_lines(min, start, jump) - - inter = set.intersection(set1, set2) - - numerator = len(inter) - denominator = float((len(set1) + len(set2)) / 2) - - try: - var = round((numerator / denominator)*100, 2) - except ZeroDivisionError: - var = 0.0 - - if var >= percent: - return True, var - else: - return False, var - - def _get_p_duplicate(self): - p_duplicate = self.store_metadata.smembers('dup:'+self.p_path) - # remove absolute path #fix-db - if p_duplicate: - for duplicate_string in p_duplicate: - self.store_metadata.srem('dup:'+self.p_path, duplicate_string) - self.store_metadata.sadd('dup:'+self.p_rel_path, duplicate_string.replace(self.PASTES_FOLDER+'/', '', 1)) - self.p_duplicate = self.store_metadata.smembers('dup:'+self.p_rel_path) - if self.p_duplicate is not None: - return list(self.p_duplicate) - else: - return '[]' - - def get_nb_duplicate(self): - # # TODO: FIXME use relative path - return self.store_metadata.scard('dup:'+self.p_path) + self.store_metadata.scard('dup:'+self.p_rel_path) - - def _get_p_tags(self): - self.p_tags = self.store_metadata.smembers('tag:'+self.p_rel_path) - if self.p_tags is not None: - return list(self.p_tags) - else: - return '[]' - - def get_p_rel_path(self): - return self.p_rel_path - - def get_p_date_path(self): - return self.p_date_path - - # def save_all_attributes_redis(self, key=None): - # """ - # Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) - # - # :param r_serv: -- Connexion to the Database. - # :param key: -- Key of an additionnal set. - # - # Example: - # import redis - # - # r_serv = redis.StrictRedis(host = 127.0.0.1, port = 6739, db = 0) - # - # PST = Paste("/home/Zkopkmlk.gz") - # PST.save_all_attributes_redis(r_serv) - # - # """ - # - # def save_attribute_redis(self, attr_name, value): - # """ - # Save an attribute as a field - # """ - - def save_attribute_duplicate(self, value): - """ - Save an attribute as a field - """ - for tuple in value: - self.store_metadata.sadd('dup:'+self.p_path, tuple) - - def save_others_pastes_attribute_duplicate(self, list_value): - """ - Save a new duplicate on others pastes - """ - for hash_type, path, percent, date in list_value: - path = path.replace(self.PASTES_FOLDER, '', 1) - to_add = (hash_type, self.p_rel_path, percent, date) - self.store_metadata.sadd('dup:'+path,to_add) - - def _get_from_redis(self, r_serv): - ans = {} - for hash_name, the_hash in self.p_hash: - ans[hash_name] = r_serv.hgetall(the_hash) - return ans - - def _get_top_words(self, sort=False): - """ - Tokenising method: Returning a sorted list or a set of paste's words - - :param sort: Selecting the output: sorted list or a set. (set by default) - - :return: set or sorted list of tuple [(word, occurency)...] - - :Example: PST._get_top_words(False) - - """ - words = {} - tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', - gaps=True, discard_empty=True) - - blob = TextBlob(clean( (self.get_p_content()) ), tokenizer=tokenizer) - - for word in blob.tokens: - if word in words.keys(): - num = words[word] - else: - num = 0 - words[word] = num + 1 - if sort: - var = sorted(words.items(), key=operator.itemgetter(1), reverse=True) - else: - var = words - - return var - - def _get_word(self, word): - """ - Returning a specific word and his occurence if present in the paste - - :param word: (str) The word - - :return: (tuple) ("foo", 1337) - - """ - return [item for item in self._get_top_words() if item[0] == word] - - def get_regex(self, regex): - """ - Returning matches with the regex given as an argument. - - :param regex: -- (str) a regex - - :return: (list) - - :Example: PST.get_regex("4[0-9]{12}(?:[0-9]{3})?") - - - """ - matchs = [] - for match in re.findall(regex, self.get_p_content()): - if match != '' and len(match) < 100: - matchs.append(match) - return matchs diff --git a/bin/packages/Pgp.py b/bin/packages/Pgp.py deleted file mode 100755 index 9012d1cc..00000000 --- a/bin/packages/Pgp.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import redis - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import Correlation - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader - -config_loader = ConfigLoader.ConfigLoader() -serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -config_loader = None - -pgp = Correlation.Correlation('pgpdump', ['key', 'mail', 'name']) - -def get_pgp(request_dict, pgp_type): - # basic verification - res = pgp.verify_correlation_field_request(request_dict, pgp_type) - if res: - return res - # cerify address - field_name = request_dict.get(pgp_type) - - return pgp.get_correlation(request_dict, pgp_type, field_name) diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 5bf9bf91..73d188d8 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -6,23 +6,22 @@ import re import sys import time import uuid -import redis import datetime from collections import defaultdict +from flask import escape from nltk.tokenize import RegexpTokenizer from textblob import TextBlob -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader -import Tracker - -from flask import escape - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Date -import Item +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader +from lib import Tracker +from packages import Date +from lib.objects import Items config_loader = ConfigLoader.ConfigLoader() r_serv_term = config_loader.get_db_conn("Kvrocks_DB") @@ -50,7 +49,7 @@ def is_valid_uuid_v4(UUID): # # TODO: use new package => duplicate fct def is_in_role(user_id, role): - if r_serv_db.sismember('user_role:{}'.format(role), user_id): + if r_serv_term.sismember('user_role:{}'.format(role), user_id): return True else: return False @@ -93,7 +92,7 @@ def get_text_word_frequency(item_content, filtering=True): words_dict = defaultdict(int) if filtering: - blob = TextBlob(item_content , tokenizer=tokenizer) + blob = TextBlob(item_content, tokenizer=tokenizer) else: blob = TextBlob(item_content) for word in blob.tokens: @@ -132,7 +131,7 @@ def is_term_tracked_in_global_level(term, term_type): res = r_serv_term.smembers('all:tracker_uuid:{}:{}'.format(term_type, term)) if res: for elem_uuid in res: - if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'level')=='1': + if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'level') == '1': return True return False @@ -140,8 +139,8 @@ def is_term_tracked_in_user_level(term, term_type, user_id): res = r_serv_term.smembers('user:tracker:{}'.format(user_id)) if res: for elem_uuid in res: - if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'tracked')== term: - if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'type')== term_type: + if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'tracked') == term: + if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'type') == term_type: return True return False @@ -161,8 +160,8 @@ def parse_json_term_to_add(dict_input, user_id): webhook = dict_input.get('webhook', '') webhook = escape(webhook) - res = parse_tracked_term_to_add(term , term_type, nb_words=nb_words) - if res[1]!=200: + res = parse_tracked_term_to_add(term, term_type, nb_words=nb_words) + if res[1] != 200: return res term = res[0]['term'] term_type = res[0]['type'] @@ -182,23 +181,23 @@ def parse_json_term_to_add(dict_input, user_id): level = 1 # check if term already tracked in global - if level==1: + if level == 1: if is_term_tracked_in_global_level(term, term_type): return {"status": "error", "reason": "Term already tracked"}, 409 else: if is_term_tracked_in_user_level(term, term_type, user_id): return {"status": "error", "reason": "Term already tracked"}, 409 - term_uuid = add_tracked_term(term , term_type, user_id, level, tags, mails, description,webhook) + term_uuid = add_tracked_term(term, term_type, user_id, level, tags, mails, description, webhook) return {'term': term, 'type': term_type, 'uuid': term_uuid}, 200 -def parse_tracked_term_to_add(term , term_type, nb_words=1): - if term_type=='regex': +def parse_tracked_term_to_add(term, term_type, nb_words=1): + if term_type == 'regex': if not is_valid_regex(term): return {"status": "error", "reason": "Invalid regex"}, 400 - elif term_type=='word' or term_type=='set': + elif term_type == 'word' or term_type == 'set': # force lowercase term = term.lower() word_set = set(term) @@ -207,16 +206,16 @@ def parse_tracked_term_to_add(term , term_type, nb_words=1): return {"status": "error", "reason": "special character not allowed", "message": "Please use a regex or remove all special characters"}, 400 words = term.split() # not a word - if term_type=='word' and len(words)>1: + if term_type == 'word' and len(words) > 1: term_type = 'set' - # ouput format: term1,term2,term3;2 - if term_type=='set': + # output format: term1,term2,term3;2 + if term_type == 'set': try: nb_words = int(nb_words) except: nb_words = 1 - if nb_words==0: + if nb_words == 0: nb_words = 1 words_set = set(words) @@ -228,19 +227,19 @@ def parse_tracked_term_to_add(term , term_type, nb_words=1): term = ",".join(words_set) term = "{};{}".format(term, nb_words) - elif term_type=='yara_custom': + elif term_type == 'yara_custom': if not Tracker.is_valid_yara_rule(term): return {"status": "error", "reason": "Invalid custom Yara Rule"}, 400 - elif term_type=='yara_default': + elif term_type == 'yara_default': if not Tracker.is_valid_default_yara_rule(term): return {"status": "error", "reason": "The Yara Rule doesn't exist"}, 400 else: return {"status": "error", "reason": "Incorrect type"}, 400 return {"status": "success", "term": term, "type": term_type}, 200 -def add_tracked_term(term , term_type, user_id, level, tags, mails, description,webhook, dashboard=0): +def add_tracked_term(term, term_type, user_id, level, tags, mails, description, webhook, dashboard=0): - term_uuid = str(uuid.uuid4()) + term_uuid = str(uuid.uuid4()) # YARA if term_type == 'yara_custom' or term_type == 'yara_default': @@ -248,7 +247,7 @@ def add_tracked_term(term , term_type, user_id, level, tags, mails, description, term_type = 'yara' # create metadata - r_serv_term.hset('tracker:{}'.format(term_uuid), 'tracked',term) # # TODO: use hash + r_serv_term.hset('tracker:{}'.format(term_uuid), 'tracked', term) # # TODO: use hash r_serv_term.hset('tracker:{}'.format(term_uuid), 'type', term_type) r_serv_term.hset('tracker:{}'.format(term_uuid), 'date', datetime.date.today().strftime("%Y%m%d")) r_serv_term.hset('tracker:{}'.format(term_uuid), 'user_id', user_id) @@ -268,20 +267,20 @@ def add_tracked_term(term , term_type, user_id, level, tags, mails, description, r_serv_term.sadd('all:tracker_uuid:{}:{}'.format(term_type, term), term_uuid) # add display level set - if level == 0: # user only + if level == 0: # user only r_serv_term.sadd('user:tracker:{}'.format(user_id), term_uuid) r_serv_term.sadd('user:tracker:{}:{}'.format(user_id, term_type), term_uuid) - elif level == 1: # global + elif level == 1: # global r_serv_term.sadd('global:tracker', term_uuid) r_serv_term.sadd('global:tracker:{}'.format(term_type), term_uuid) # create term tags list for tag in tags: - r_serv_term.sadd('tracker:tags:{}'.format(term_uuid), escape(tag) ) + r_serv_term.sadd('tracker:tags:{}'.format(term_uuid), escape(tag)) # create term tags mail notification list for mail in mails: - r_serv_term.sadd('tracker:mail:{}'.format(term_uuid), escape(mail) ) + r_serv_term.sadd('tracker:mail:{}'.format(term_uuid), escape(mail)) # toggle refresh module tracker list/set r_serv_term.set('tracker:refresh:{}'.format(term_type), time.time()) @@ -315,11 +314,11 @@ def delete_term(term_uuid): # toggle refresh module tracker list/set r_serv_term.set('tracker:refresh:{}'.format(term_type), time.time()) - if level == '0': # user only + if level == '0': # user only user_id = term_type = r_serv_term.hget('tracker:{}'.format(term_uuid), 'user_id') r_serv_term.srem('user:tracker:{}'.format(user_id), term_uuid) r_serv_term.srem('user:tracker:{}:{}'.format(user_id, term_type), term_uuid) - elif level == '1': # global + elif level == '1': # global r_serv_term.srem('global:tracker', term_uuid) r_serv_term.srem('global:tracker:{}'.format(term_type), term_uuid) @@ -415,7 +414,6 @@ def parse_get_tracker_term_item(dict_input, user_id): if res: return res - date_from = dict_input.get('date_from', None) date_to = dict_input.get('date_to', None) @@ -431,7 +429,7 @@ def parse_get_tracker_term_item(dict_input, user_id): date_from = date_to all_item_id = Tracker.get_tracker_items_by_daterange(term_uuid, date_from, date_to) - all_item_id = Item.get_item_list_desc(all_item_id) + all_item_id = Items.get_item_list_desc(all_item_id) res_dict = {'uuid': term_uuid, 'date_from': date_from, 'date_to': date_to, 'items': all_item_id} return res_dict, 200 @@ -487,8 +485,8 @@ def get_list_tracked_term_stats_by_day(list_tracker_uuid, num_day=31, date_from= nb_seen_this_day = r_serv_term.scard('tracker:item:{}:{}'.format(tracker_uuid, date_day)) if nb_seen_this_day is None: nb_seen_this_day = 0 - dict_tracker_data.append({"date": date_day,"value": int(nb_seen_this_day)}) - list_tracker_stats.append({"name": tracker,"Data": dict_tracker_data}) + dict_tracker_data.append({"date": date_day, "value": int(nb_seen_this_day)}) + list_tracker_stats.append({"name": tracker, "Data": dict_tracker_data}) return list_tracker_stats def get_list_trackeed_term_tags(term_uuid): @@ -507,7 +505,7 @@ def get_list_trackeed_term_mails(term_uuid): def get_user_tracked_term_uuid(user_id, filter_type=None): if filter_type: - return list(r_serv_term.smembers('user:tracker:{}:{}'.format(user_id,filter_type))) + return list(r_serv_term.smembers('user:tracker:{}:{}'.format(user_id, filter_type))) else: return list(r_serv_term.smembers('user:tracker:{}'.format(user_id))) diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index 21fec078..eb4ea50f 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -1,7 +1,6 @@ #!/usr/bin/python3 import os -import re import sys import dns.resolver import dns.exception @@ -10,8 +9,11 @@ from pubsublogger import publisher from datetime import timedelta -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ConfigLoader config_loader = ConfigLoader.ConfigLoader() dns_server = config_loader.get_config_str("Web", "dns") diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 00ffdb4f..5369cb1b 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -150,7 +150,7 @@ publish = Redis_Tags subscribe = Redis_Global publish = Redis_Tags -[Bitcoin] +[Cryptocurrencies] subscribe = Redis_Global publish = Redis_Tags diff --git a/bin/update-background.py b/bin/update-background.py index 3a38fc82..f5a3c58e 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -28,7 +28,7 @@ def launch_background_upgrade(version, l_script_name): update_file = ail_updates.get_current_background_update_script_path(version, script_name) # # TODO: Get error output - process = subprocess.run(['python' ,update_file]) + process = subprocess.run(['python', update_file]) update_progress = ail_updates.get_current_background_update_progress() if update_progress == 100: @@ -37,15 +37,16 @@ def launch_background_upgrade(version, l_script_name): # 'Please relaunch the bin/update-background.py script' # # TODO: Create Class background update - ail_updates.end_background_update() + ail_updates.end_background_update(version) + if __name__ == "__main__": if not ail_updates.exits_background_update_to_launch(): ail_updates.clear_background_update() else: - launch_background_upgrade('v1.5', ['Update-ARDB_Onions.py', 'Update-ARDB_Metadata.py', 'Update-ARDB_Tags.py', 'Update-ARDB_Tags_background.py', 'Update-ARDB_Onions_screenshots.py']) - launch_background_upgrade('v2.4', ['Update_domain.py']) + launch_background_upgrade('v1.5', ['Update-ARDB_Onions.py', 'Update-ARDB_Metadata.py', 'Update-ARDB_Tags.py', + 'Update-ARDB_Tags_background.py', 'Update-ARDB_Onions_screenshots.py']) launch_background_upgrade('v2.6', ['Update_screenshots.py']) launch_background_upgrade('v2.7', ['Update_domain_tags.py']) launch_background_upgrade('v3.4', ['Update_domain.py']) diff --git a/tests/testApi.py b/tests/testApi.py index 89e2b167..18ec9eef 100644 --- a/tests/testApi.py +++ b/tests/testApi.py @@ -6,15 +6,15 @@ import sys import time import unittest -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'bin')) +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import Tag +from packages import Import_helper + sys.path.append(os.environ['AIL_FLASK']) -sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) - -import Import_helper -import Tag - -from Flask_server import app +from var.www.Flask_server import app def parse_response(obj, ail_response): res_json = ail_response.get_json() @@ -36,6 +36,7 @@ def get_api_key(): apikey = sys.argv[1] return apikey + APIKEY = get_api_key() class TestApiV1(unittest.TestCase): diff --git a/tests/test_modules.py b/tests/test_modules.py index 0731b26d..2528cd43 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -24,7 +24,7 @@ from modules.Telegram import Telegram # project packages from lib.ConfigLoader import ConfigLoader import lib.crawlers as crawlers -import packages.Item as Item +import lib.objects.Items as Items #### COPY SAMPLES #### config_loader = ConfigLoader() @@ -75,11 +75,11 @@ class Test_Module_CreditCards(unittest.TestCase): def test_module(self): item_id = 'tests/2021/01/01/credit_cards.gz 7' - test_cards = ['341039324930797', # American Express - '6011613905509166', # Discover Card - '3547151714018657', # Japan Credit Bureau (JCB) - '5492981206527330', # 16 digits MasterCard - '4024007132849695', # '4532525919781' # 16-digit VISA, with separators + test_cards = ['341039324930797', # American Express + '6011613905509166', # Discover Card + '3547151714018657', # Japan Credit Bureau (JCB) + '5492981206527330', # 16 digits MasterCard + '4024007132849695', # '4532525919781' # 16-digit VISA, with separators ] result = self.module_obj.compute(item_id, r_result=True) @@ -107,7 +107,7 @@ class Test_Module_Global(unittest.TestCase): def test_module(self): # # TODO: delete item item_id = 'tests/2021/01/01/global.gz' - item = Item.Item(item_id) + item = Items.Item(item_id) item.delete() item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit' @@ -126,7 +126,7 @@ class Test_Module_Global(unittest.TestCase): self.assertIsNone(result) # Test same id with != content - item = Item.Item('tests/2021/01/01/global_831875da824fc86ab5cc0e835755b520.gz') + item = Items.Item('tests/2021/01/01/global_831875da824fc86ab5cc0e835755b520.gz') item.delete() message = f'{item_id} {item_content_2}' result = self.module_obj.compute(message, r_result=True) @@ -135,7 +135,7 @@ class Test_Module_Global(unittest.TestCase): self.assertNotEqual(result, item_id) # cleanup - # item = Item.Item(result) + # item = Items.Item(result) # item.delete() # # TODO: remove from queue diff --git a/update/v3.4/Update_domain.py b/update/v3.4/Update_domain.py index 99cd362a..c6183725 100755 --- a/update/v3.4/Update_domain.py +++ b/update/v3.4/Update_domain.py @@ -11,7 +11,6 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from lib import ConfigLoader from lib.objects.Items import Item -from lib import Domain def get_domain_type(domain_name): if str(domain_name).endswith('.onion'): @@ -109,7 +108,6 @@ if __name__ == '__main__': print(domain) domain = str(domain) domain_t = get_domain_type(domain) - domain = Domain.Domain(domain) for domain_history in get_domain_history(domain_t, domain): domain_items = get_domain_items_crawled(domain, domain_t, domain_history[1]) for id_item in domain_items: diff --git a/var/www/blueprints/correlation.py b/var/www/blueprints/correlation.py index 1b5a5aeb..3d0db8ac 100644 --- a/var/www/blueprints/correlation.py +++ b/var/www/blueprints/correlation.py @@ -8,7 +8,6 @@ import os import sys import json -import random from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort from flask_login import login_required, current_user, login_user, logout_user diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index bd9b43d1..7cdbab4e 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -34,8 +34,6 @@ from lib import Tag from packages import Date -from lib import Domain # # # # # # # # # # # # # # # # TODO: - #import Config_DB bootstrap_label = Flask_config.bootstrap_label @@ -261,12 +259,13 @@ def domains_explorer_post_filter(): date_from = None date_to = None - if domain_onion and domain_regular: - if date_from and date_to: - return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to)) - else: - return redirect(url_for('crawler_splash.domains_explorer_all')) - elif domain_regular: + # TODO SEARCH BOTH + # if domain_onion and domain_regular: + # if date_from and date_to: + # return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to)) + # else: + # return redirect(url_for('crawler_splash.domains_explorer_all')) + if domain_regular: if date_from and date_to: return redirect(url_for('crawler_splash.domains_explorer_web', date_from=date_from, date_to=date_to)) else: @@ -277,20 +276,22 @@ def domains_explorer_post_filter(): else: return redirect(url_for('crawler_splash.domains_explorer_onion')) -@crawler_splash.route('/domains/explorer/all', methods=['GET']) -@login_required -@login_read_only -def domains_explorer_all(): - page = request.args.get('page') - date_from = request.args.get('date_from') - date_to = request.args.get('date_to') - try: - page = int(page) - except: - page = 1 - - dict_data = Domain.get_domains_up_by_filers('all', page=page, date_from=date_from, date_to=date_to) - return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='all') +# TODO TEMP DISABLE +# @crawler_splash.route('/domains/explorer/all', methods=['GET']) +# @login_required +# @login_read_only +# def domains_explorer_all(): +# page = request.args.get('page') +# date_from = request.args.get('date_from') +# date_to = request.args.get('date_to') +# try: +# page = int(page) +# except: +# page = 1 +# +# dict_data = Domain.get_domains_up_by_filers('all', page=page, date_from=date_from, date_to=date_to) +# return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='all') +# @crawler_splash.route('/domains/explorer/onion', methods=['GET']) @login_required @@ -304,7 +305,7 @@ def domains_explorer_onion(): except: page = 1 - dict_data = Domain.get_domains_up_by_filers('onion', page=page, date_from=date_from, date_to=date_to) + dict_data = Domains.get_domains_up_by_filers('onion', page=page, date_from=date_from, date_to=date_to) return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='onion') @crawler_splash.route('/domains/explorer/web', methods=['GET']) @@ -319,7 +320,7 @@ def domains_explorer_web(): except: page = 1 - dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to) + dict_data = Domains.get_domains_up_by_filers('web', page=page, date_from=date_from, date_to=date_to) return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular') @crawler_splash.route('/domains/languages/all/json', methods=['GET']) @@ -329,7 +330,7 @@ def domains_all_languages_json(): # # TODO: get domain type iso = request.args.get('iso') domain_types = request.args.getlist('domain_types') - return jsonify(Language.get_languages_from_iso(Domain.get_all_domains_languages(), sort=True)) + return jsonify(Language.get_languages_from_iso(Domains.get_all_domains_languages(), sort=True)) @crawler_splash.route('/domains/languages/search_get', methods=['GET']) @login_required @@ -344,12 +345,12 @@ def domains_search_languages_get(): domains_types = request.args.getlist('domain_types') if domains_types: domains_types = domains_types[0].split(',') - domains_types = Domain.sanitize_domain_types(domains_types) + domains_types = Domains.sanitize_domains_types(domains_types) languages = request.args.getlist('languages') if languages: languages = languages[0].split(',') - l_dict_domains = Domain.api_get_domains_by_languages(domains_types, Language.get_iso_from_languages(languages), domains_metadata=True, page=page) + l_dict_domains = Domains.api_get_domains_by_languages(domains_types, Language.get_iso_from_languages(languages), meta=True, page=page) return render_template("domains/domains_filter_languages.html", template_folder='../../', l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, current_languages=languages, domains_types=domains_types) @@ -368,9 +369,9 @@ def domains_search_name(): domains_types = request.args.getlist('domain_types') if domains_types: domains_types = domains_types[0].split(',') - domains_types = Domain.sanitize_domain_types(domains_types) + domains_types = Domains.sanitize_domains_types(domains_types) - l_dict_domains = Domain.api_search_domains_by_name(name, domains_types, domains_metadata=True, page=page) + l_dict_domains = Domains.api_search_domains_by_name(name, domains_types, meta=True, page=page) return render_template("domains/domains_result_list.html", template_folder='../../', l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, domains_types=domains_types) diff --git a/var/www/blueprints/hunters.py b/var/www/blueprints/hunters.py index e5ded14a..5ac5feb3 100644 --- a/var/www/blueprints/hunters.py +++ b/var/www/blueprints/hunters.py @@ -8,7 +8,6 @@ import os import sys import json -import random from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, make_response from flask_login import login_required, current_user, login_user, logout_user diff --git a/var/www/blueprints/import_export.py b/var/www/blueprints/import_export.py index 76681f9b..6dac8d0d 100644 --- a/var/www/blueprints/import_export.py +++ b/var/www/blueprints/import_export.py @@ -9,7 +9,6 @@ import os import sys import uuid import json -import random from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, send_file from flask_login import login_required, current_user, login_user, logout_user diff --git a/var/www/blueprints/objects_item.py b/var/www/blueprints/objects_item.py index 8edf49e5..2ca37c05 100644 --- a/var/www/blueprints/objects_item.py +++ b/var/www/blueprints/objects_item.py @@ -22,6 +22,7 @@ sys.path.append(os.environ['AIL_BIN']) from lib import ConfigLoader from lib import item_basic from lib.objects.Items import Item +from lib.objects.Screenshots import Screenshot from lib import Tag from export import Export @@ -47,7 +48,13 @@ config_loader = None @login_read_only @no_cache def screenshot(filename): - return send_from_directory(SCREENSHOT_FOLDER, f'{filename}.png', as_attachment=True) + if not filename: + abort(404) + if not 64 <= len(filename) <= 70: + abort(404) + filename = filename.replace('/', '') + s = Screenshot(filename) + return send_from_directory(SCREENSHOT_FOLDER, s.get_rel_path(add_extension=True), as_attachment=True) @objects_item.route("/object/item") @login_required diff --git a/var/www/blueprints/old_endpoints.py b/var/www/blueprints/old_endpoints.py index 09f6bfaa..72f29768 100644 --- a/var/www/blueprints/old_endpoints.py +++ b/var/www/blueprints/old_endpoints.py @@ -7,7 +7,6 @@ import os import sys -import json from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response from flask_login import login_required, current_user diff --git a/var/www/blueprints/settings_b.py b/var/www/blueprints/settings_b.py index d5440fd4..0b5b1eff 100644 --- a/var/www/blueprints/settings_b.py +++ b/var/www/blueprints/settings_b.py @@ -15,9 +15,6 @@ from flask_login import login_required, current_user # Import Role_Manager from Role_Manager import login_admin, login_analyst, login_read_only -# sys.path.append('modules') -# import Flask_config - sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages @@ -29,7 +26,7 @@ from packages import git_status settings_b = Blueprint('settings_b', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/settings')) # ============ VARIABLES ============ -#bootstrap_label = Flask_config.bootstrap_label +# bootstrap_label = Flask_config.bootstrap_label # ============ FUNCTIONS ============ def create_json_response(data, status_code): diff --git a/var/www/blueprints/tags_ui.py b/var/www/blueprints/tags_ui.py index fd0eb04d..f7451afb 100644 --- a/var/www/blueprints/tags_ui.py +++ b/var/www/blueprints/tags_ui.py @@ -7,7 +7,6 @@ import os import sys -import json from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, abort from flask_login import login_required, current_user, login_user, logout_user @@ -21,6 +20,7 @@ from Role_Manager import login_admin, login_analyst, login_read_only sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages +################################## from packages import Date from lib import Tag from lib.objects import ail_objects diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index f33b0daa..6468d2a4 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -14,8 +14,8 @@ import sys ################################## # Import Project packages ################################## -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader +sys.path.append(os.environ['AIL_BIN']) +from lib import ConfigLoader from pubsublogger import publisher # FLASK # diff --git a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py index 5af36456..43a4216b 100644 --- a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py +++ b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py @@ -13,7 +13,6 @@ import sys import json import string import datetime -import redis import unicodedata import uuid from io import BytesIO @@ -33,8 +32,8 @@ from lib import Tag from lib.objects.Items import Item from packages import Import_helper + from pytaxonomies import Taxonomies -from pymispgalaxies import Galaxies, Clusters try: from pymisp.mispevent import MISPObject diff --git a/var/www/modules/PasteSubmit/templates/edit_tag_export.html b/var/www/modules/PasteSubmit/templates/edit_tag_export.html index 94980787..45f56e5c 100644 --- a/var/www/modules/PasteSubmit/templates/edit_tag_export.html +++ b/var/www/modules/PasteSubmit/templates/edit_tag_export.html @@ -280,7 +280,7 @@ - diff --git a/var/www/modules/Tags/Flask_Tags.py b/var/www/modules/Tags/Flask_Tags.py index 11e67986..544e9afb 100644 --- a/var/www/modules/Tags/Flask_Tags.py +++ b/var/www/modules/Tags/Flask_Tags.py @@ -18,7 +18,6 @@ from pymispgalaxies import Galaxies, Clusters # ============ VARIABLES ============ import Flask_config -from lib import Tag app = Flask_config.app baseUrl = Flask_config.baseUrl diff --git a/var/www/modules/dashboard/Flask_dashboard.py b/var/www/modules/dashboard/Flask_dashboard.py index 7a9f3247..cb33b268 100644 --- a/var/www/modules/dashboard/Flask_dashboard.py +++ b/var/www/modules/dashboard/Flask_dashboard.py @@ -11,16 +11,19 @@ import datetime import time import flask -from Date import Date - from flask import Flask, render_template, jsonify, request, Blueprint, url_for from Role_Manager import login_admin, login_analyst, login_read_only from flask_login import login_required -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import queues_modules -import ail_updates +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import queues_modules +from lib import ail_updates + +from packages.Date import Date # ============ VARIABLES ============ import Flask_config diff --git a/var/www/modules/hashDecoded/Flask_hashDecoded.py b/var/www/modules/hashDecoded/Flask_hashDecoded.py index 6463d033..1260a3ba 100644 --- a/var/www/modules/hashDecoded/Flask_hashDecoded.py +++ b/var/www/modules/hashDecoded/Flask_hashDecoded.py @@ -4,24 +4,19 @@ ''' Flask functions and routes for the trending modules page ''' -import redis import os import sys import datetime -import json -from Date import Date -from io import BytesIO - -from hashlib import sha256 - -import requests from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, send_file from Role_Manager import login_admin, login_analyst, login_read_only from flask_login import login_required -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import Decoded +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from packages.Date import Date # ============ VARIABLES ============ import Flask_config diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index 9c3e316d..c8d099ad 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -4,21 +4,21 @@ ''' Flask functions and routes for the trending modules page ''' -import redis import datetime import sys import os import time -import json from pyfaup.faup import Faup from flask import Flask, render_template, jsonify, request, send_file, Blueprint, redirect, url_for from Role_Manager import login_admin, login_analyst, login_read_only, no_cache from flask_login import login_required -from Date import Date -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import crawlers +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import crawlers # ============ VARIABLES ============ import Flask_config diff --git a/var/www/modules/hunter/Flask_hunter.py b/var/www/modules/hunter/Flask_hunter.py index 208b1708..c1be3fe6 100644 --- a/var/www/modules/hunter/Flask_hunter.py +++ b/var/www/modules/hunter/Flask_hunter.py @@ -7,24 +7,21 @@ import os import sys import json -import redis -import datetime -import calendar import flask from flask import Flask, render_template, jsonify, request, Blueprint, url_for, redirect, Response, escape from Role_Manager import login_admin, login_analyst, login_read_only from flask_login import login_required, current_user -# --------------------------------------------------------------- +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import item_basic +from lib import Tracker +from lib import Tag +from packages import Term -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import Term -import Tracker -import item_basic - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import Tag # ============ VARIABLES ============ import Flask_config diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py index 1a5b2251..85484000 100644 --- a/var/www/modules/restApi/Flask_restApi.py +++ b/var/www/modules/restApi/Flask_restApi.py @@ -10,20 +10,19 @@ import re import sys import uuid import json -import redis import datetime -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import Domain +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.objects.Items import Item +from lib import Tag +from lib import Tracker -import Import_helper -import Cryptocurrency -import Pgp -import Item -import Paste -import Tag -import Term -import Tracker +from packages import Term + +from packages import Import_helper sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import')) import importer @@ -419,7 +418,8 @@ def get_cryptocurrency_bitcoin_metadata(): data = request.get_json() crypto_address = data.get('bitcoin', None) req_data = {'bitcoin': crypto_address, 'metadata': True} - res = Cryptocurrency.get_cryptocurrency(req_data, 'bitcoin') + raise Exception('TO MIGRATE') + res = 0 return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] @restApi.route("api/v1/get/cryptocurrency/bitcoin/item", methods=['POST']) @@ -428,7 +428,8 @@ def get_cryptocurrency_bitcoin_item(): data = request.get_json() bitcoin_address = data.get('bitcoin', None) req_data = {'bitcoin': bitcoin_address, 'items': True} - res = Cryptocurrency.get_cryptocurrency(req_data, 'bitcoin') + raise Exception('TO MIGRATE') + res = 0 return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -440,7 +441,8 @@ def get_pgp_key_metadata(): data = request.get_json() pgp_field = data.get('key', None) req_data = {'key': pgp_field, 'metadata': True} - res = Pgp.get_pgp(req_data, 'key') + raise Exception('TO MIGRATE') + res = 0 return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] @restApi.route("api/v1/get/pgp/mail/metadata", methods=['POST']) @@ -449,7 +451,8 @@ def get_pgp_mail_metadata(): data = request.get_json() pgp_field = data.get('mail', None) req_data = {'mail': pgp_field, 'metadata': True} - res = Pgp.get_pgp(req_data, 'mail') + raise Exception('TO MIGRATE') + res = 0 return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] @restApi.route("api/v1/get/pgp/name/metadata", methods=['POST']) @@ -458,7 +461,8 @@ def get_pgp_name_metadata(): data = request.get_json() pgp_field = data.get('name', None) req_data = {'name': pgp_field, 'metadata': True} - res = Pgp.get_pgp(req_data, 'name') + raise Exception('TO MIGRATE') + res = 0 return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] @restApi.route("api/v1/get/pgp/key/item", methods=['POST']) @@ -467,7 +471,8 @@ def get_pgp_key_item(): data = request.get_json() pgp_field = data.get('key', None) req_data = {'key': pgp_field, 'items': True} - res = Pgp.get_pgp(req_data, 'key') + res = 0 + raise Exception('TO MIGRATE') return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] @restApi.route("api/v1/get/pgp/mail/item", methods=['POST']) @@ -476,7 +481,8 @@ def get_pgp_mail_item(): data = request.get_json() pgp_mail = data.get('mail', None) req_data = {'mail': pgp_mail, 'items': True} - res = Pgp.get_pgp(req_data, 'mail') + raise Exception('TO MIGRATE') + res = 0 return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] @restApi.route("api/v1/get/pgp/name/item", methods=['POST']) @@ -485,7 +491,8 @@ def get_pgp_name_item(): data = request.get_json() pgp_name = data.get('name', None) req_data = {'name': pgp_name, 'items': True} - res = Pgp.get_pgp(req_data, 'name') + raise Exception('TO MIGRATE') + res = 0 return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] ''' @@ -553,10 +560,14 @@ def get_domain_status_minimal(): data = request.get_json() domain = data.get('domain', None) # error handler - res = Domain.api_verify_if_domain_exist(domain) + # TODO TO MIGRATE + raise Exception('TO MIGRATE') + # res = Domain.api_verify_if_domain_exist(domain) if res: return create_json_response(res[0], res[1]) - res = Domain.api_get_domain_up_range(domain) + # TODO TO MIGRATE + raise Exception('TO MIGRATE') + # res = Domain.api_get_domain_up_range(domain) res[0]['domain'] = domain return create_json_response(res[0], res[1]) @@ -572,7 +583,9 @@ def get_crawled_domain_list(): date_to = data.get('date_to', None) domain_type = data.get('domain_type', None) domain_status = 'UP' - res = Domain.api_get_domains_by_status_daterange(date_from, date_to, domain_type) + # TODO TO MIGRATE + raise Exception('TO MIGRATE') + # res = Domain.api_get_domains_by_status_daterange(date_from, date_to, domain_type) dict_res = res[0] dict_res['date_from'] = date_from dict_res['date_to'] = date_to diff --git a/var/www/modules/search/Flask_search.py b/var/www/modules/search/Flask_search.py index 361da03f..306a9443 100644 --- a/var/www/modules/search/Flask_search.py +++ b/var/www/modules/search/Flask_search.py @@ -4,8 +4,8 @@ ''' Flask functions and routes for the trending modules page ''' -import json import os +import sys import datetime import flask from flask import Flask, render_template, jsonify, request, Blueprint @@ -17,6 +17,10 @@ from whoosh import index from whoosh.fields import Schema, TEXT, ID from whoosh.qparser import QueryParser +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## from lib.objects.Items import Item import time diff --git a/var/www/modules/sentiment/Flask_sentiment.py b/var/www/modules/sentiment/Flask_sentiment.py index c4e88841..663f6f02 100644 --- a/var/www/modules/sentiment/Flask_sentiment.py +++ b/var/www/modules/sentiment/Flask_sentiment.py @@ -4,17 +4,21 @@ ''' Flask functions and routes for the trending modules page ''' -import redis +import os +import sys import datetime import calendar -from Date import Date import flask from flask import Flask, render_template, jsonify, request, Blueprint from Role_Manager import login_admin, login_analyst, login_read_only from flask_login import login_required -import Paste +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from packages.Date import Date # ============ VARIABLES ============ import Flask_config diff --git a/var/www/modules/settings/Flask_settings.py b/var/www/modules/settings/Flask_settings.py index b9e50186..4316d490 100644 --- a/var/www/modules/settings/Flask_settings.py +++ b/var/www/modules/settings/Flask_settings.py @@ -4,16 +4,20 @@ ''' Flask functions and routes for the settings modules page ''' +import os +import sys + from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for from flask_login import login_required, current_user from Role_Manager import login_admin, login_analyst, login_user, login_read_only -import json -import datetime - -import d4 -import Users +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import d4 +from lib import Users # ============ VARIABLES ============ diff --git a/var/www/modules/terms/Flask_terms.py b/var/www/modules/terms/Flask_terms.py index c92d22c2..e0c4c745 100644 --- a/var/www/modules/terms/Flask_terms.py +++ b/var/www/modules/terms/Flask_terms.py @@ -6,10 +6,8 @@ note: The matching of credential against supplied credential is done using Levenshtein distance ''' -import json -import redis -import datetime -import calendar +import os +import sys import flask from flask import Flask, render_template, jsonify, request, Blueprint, url_for, redirect, Response @@ -17,10 +15,11 @@ from Role_Manager import login_admin, login_analyst, login_user_no_api, login_re from flask_login import login_required, current_user import Levenshtein -# --------------------------------------------------------------- - +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## from lib.objects.Items import Item -import Term # ============ VARIABLES ============ import Flask_config diff --git a/var/www/modules/trendingcharts/Flask_trendingcharts.py b/var/www/modules/trendingcharts/Flask_trendingcharts.py index 3041347a..52761bb9 100644 --- a/var/www/modules/trendingcharts/Flask_trendingcharts.py +++ b/var/www/modules/trendingcharts/Flask_trendingcharts.py @@ -4,15 +4,21 @@ ''' Flask functions and routes for the trending charts page ''' -import redis +import os +import sys import datetime -from Date import Date import flask from flask import Flask, render_template, jsonify, request, Blueprint from Role_Manager import login_admin, login_analyst, login_read_only from flask_login import login_required +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from packages.Date import Date + # ============ VARIABLES ============ import Flask_config diff --git a/var/www/modules/trendingmodules/Flask_trendingmodules.py b/var/www/modules/trendingmodules/Flask_trendingmodules.py index 128618e6..7305a8f8 100644 --- a/var/www/modules/trendingmodules/Flask_trendingmodules.py +++ b/var/www/modules/trendingmodules/Flask_trendingmodules.py @@ -4,15 +4,21 @@ ''' Flask functions and routes for the trending modules page ''' -import redis +import os +import sys import datetime -from Date import Date import flask from flask import Flask, render_template, jsonify, request, Blueprint from Role_Manager import login_admin, login_analyst, login_read_only from flask_login import login_required +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from packages.Date import Date + # ============ VARIABLES ============ import Flask_config diff --git a/var/www/templates/crawler/crawler_splash/domain_explorer.html b/var/www/templates/crawler/crawler_splash/domain_explorer.html index c7ecc5d9..3caaebc0 100644 --- a/var/www/templates/crawler/crawler_splash/domain_explorer.html +++ b/var/www/templates/crawler/crawler_splash/domain_explorer.html @@ -32,65 +32,62 @@
-
-
+
+
- {% with domain_type=dict_data['domain_type'], date_from=dict_data['date_from'], date_to=dict_data['date_to'], domain_type=domain_type%} - {% include 'domains/filter_domains.html' %} - {% endwith %} + {% with domain_type=dict_data['domain_type'], date_from=dict_data['date_from'], date_to=dict_data['date_to'], domain_type=domain_type%} + {% include 'domains/filter_domains.html' %} + {% endwith %} +
+
-
-
- -
-
-
-
- -
-
- -
-
- -
-
-
-
+ +
+
+ +
+
+ +
+
+
+
-
- + + - {% with l_dict_domains=dict_data['list_elem'], bootstrap_label=bootstrap_label %} - {% include 'domains/card_img_domain.html' %} - {% endwith %} - -
-
- - {%if 'list_elem' in dict_data%} - {% with page=dict_data['page'], nb_page_max=dict_data['nb_pages'], nb_first_elem=dict_data['nb_first_elem'], nb_last_elem=dict_data['nb_last_elem'], nb_all_elem=dict_data['nb_all_elem'] %} - {% set object_name="domain" %} - {%if domain_type=='onion'%} - {% set target_url=url_for('crawler_splash.domains_explorer_onion') + "?domain_type=onion" %} - {%else%} - {% set target_url=url_for('crawler_splash.domains_explorer_web') + "?domain_type=regular" %} - {%endif%} - {%if 'date_from' in dict_data %} - {% set target_url = target_url + '&date_from=' + dict_data['date_from'] + '&date_to=' + dict_data['date_to'] %} - {%endif%} - {% include 'pagination.html' %} - {% endwith %} - {%endif%} + {% with l_dict_domains=dict_data['list_elem'], bootstrap_label=bootstrap_label %} + {% include 'domains/card_img_domain.html' %} + {% endwith %} +
+
+ {%if 'list_elem' in dict_data%} + {% with page=dict_data['page'], nb_page_max=dict_data['nb_pages'], nb_first_elem=dict_data['nb_first_elem'], nb_last_elem=dict_data['nb_last_elem'], nb_all_elem=dict_data['nb_all_elem'] %} + {% set object_name="domain" %} + {%if domain_type=='onion'%} + {% set target_url=url_for('crawler_splash.domains_explorer_onion') + "?domain_type=onion" %} + {%else%} + {% set target_url=url_for('crawler_splash.domains_explorer_web') + "?domain_type=regular" %} + {%endif%} + {%if 'date_from' in dict_data %} + {% set target_url = target_url + '&date_from=' + dict_data['date_from'] + '&date_to=' + dict_data['date_to'] %} + {%endif%} + {% include 'pagination.html' %} + {% endwith %} + {%endif%} diff --git a/var/www/templates/domains/card_img_domain.html b/var/www/templates/domains/card_img_domain.html index fb5480f1..4634a3ec 100644 --- a/var/www/templates/domains/card_img_domain.html +++ b/var/www/templates/domains/card_img_domain.html @@ -47,7 +47,6 @@

- Ports: {{dict_domain["ports"]}}
{% if dict_domain['languages'] %} Languages: {% for language in dict_domain['languages'] %} diff --git a/var/www/templates/domains/domains_result_list.html b/var/www/templates/domains/domains_result_list.html index 5eefc2e0..3ef5b18f 100644 --- a/var/www/templates/domains/domains_result_list.html +++ b/var/www/templates/domains/domains_result_list.html @@ -1,22 +1,22 @@ - Domain Search - AIL + Domain Search - AIL - + - - + + - + diff --git a/var/www/templates/import_export/block_add_user_object_to_export_small.html b/var/www/templates/import_export/block_add_user_object_to_export_small.html index f18561e7..b1c9cb74 100644 --- a/var/www/templates/import_export/block_add_user_object_to_export_small.html +++ b/var/www/templates/import_export/block_add_user_object_to_export_small.html @@ -5,7 +5,7 @@

- +
diff --git a/var/www/templates/modals/add_tags.html b/var/www/templates/modals/add_tags.html index 5c6ce227..17d80c3b 100644 --- a/var/www/templates/modals/add_tags.html +++ b/var/www/templates/modals/add_tags.html @@ -47,12 +47,12 @@