From 61701e2fcc341b7664b47e5bf62679454189e7c1 Mon Sep 17 00:00:00 2001 From: terrtia Date: Tue, 9 Apr 2024 14:22:11 +0200 Subject: [PATCH] chg: [perf] reduce memory usage --- bin/lib/ConfigLoader.py | 1 - bin/lib/Tag.py | 60 +++++++++++++++++++++++++--------- bin/lib/Tracker.py | 38 +++++++++++---------- bin/lib/btc_ail.py | 5 +-- bin/lib/objects/Chats.py | 7 ++-- bin/lib/objects/Ocrs.py | 8 ++--- bin/lib/objects/ail_objects.py | 4 +-- bin/modules/Mail.py | 1 - 8 files changed, 77 insertions(+), 47 deletions(-) diff --git a/bin/lib/ConfigLoader.py b/bin/lib/ConfigLoader.py index 6ecd4b02..0b1d258d 100755 --- a/bin/lib/ConfigLoader.py +++ b/bin/lib/ConfigLoader.py @@ -9,7 +9,6 @@ The ``Domain`` import os import sys -import time import redis import configparser diff --git a/bin/lib/Tag.py b/bin/lib/Tag.py index 15edcd0a..ae0cb020 100755 --- a/bin/lib/Tag.py +++ b/bin/lib/Tag.py @@ -32,6 +32,9 @@ config_loader = None # # # # UNSAFE TAGS # # # # +# set of unsafe tags +UNSAFE_TAGS = None + def build_unsafe_tags(): tags = set() # CE content @@ -52,12 +55,12 @@ def is_tags_safe(ltags): :return: is a tag in the set unsafe :rtype: boolean """ - return unsafe_tags.isdisjoint(ltags) + global UNSAFE_TAGS + if UNSAFE_TAGS is None: + UNSAFE_TAGS = build_unsafe_tags() + return UNSAFE_TAGS.isdisjoint(ltags) -# set of unsafe tags -unsafe_tags = build_unsafe_tags() - # - - - UNSAFE TAGS - - - # # # TODO: verify tags + object_type @@ -80,16 +83,15 @@ def get_obj_by_tag(key_tag): #### Taxonomies #### -TAXONOMIES = {} +TAXONOMIES = None def load_taxonomies(): global TAXONOMIES manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json') TAXONOMIES = Taxonomies(manifest_path=manifest) - -load_taxonomies() - def get_taxonomies(): + if TAXONOMIES is None: + load_taxonomies() return TAXONOMIES.keys() # TODO rename me to get enabled_taxonomies @@ -111,12 +113,18 @@ def disable_taxonomy(taxonomy): r_tags.srem('taxonomies:enabled', taxonomy) def exists_taxonomy(taxonomy): + if TAXONOMIES is None: + load_taxonomies() return TAXONOMIES.get(taxonomy) is not None def get_taxonomy_description(taxonomy): + if TAXONOMIES is None: + load_taxonomies() return TAXONOMIES.get(taxonomy).description def get_taxonomy_name(taxonomy): + if TAXONOMIES is None: + load_taxonomies() return TAXONOMIES.get(taxonomy).name def get_taxonomy_predicates(taxonomy): @@ -133,12 +141,18 @@ def get_taxonomy_predicates(taxonomy): return meta def get_taxonomy_refs(taxonomy): + if TAXONOMIES is None: + load_taxonomies() return TAXONOMIES.get(taxonomy).refs def get_taxonomy_version(taxonomy): + if TAXONOMIES is None: + load_taxonomies() return TAXONOMIES.get(taxonomy).version def get_taxonomy_tags(taxonomy, enabled=False): + if TAXONOMIES is None: + load_taxonomies() taxonomy_obj = TAXONOMIES.get(taxonomy) tags = [] for p, content in taxonomy_obj.items(): @@ -165,6 +179,8 @@ def get_taxonomy_meta(taxonomy_name, enabled=False, enabled_tags=False, nb_activ meta = {} if not exists_taxonomy(taxonomy_name): return meta + if TAXONOMIES is None: + load_taxonomies() taxonomy = TAXONOMIES.get(taxonomy_name) meta['description'] = taxonomy.description meta['name'] = taxonomy.name @@ -241,6 +257,8 @@ def api_update_taxonomy_tag_enabled(data): if not exists_taxonomy(taxonomy): return {'error': f'taxonomy {taxonomy} not found'}, 404 tags = data.get('tags', []) + if TAXONOMIES is None: + load_taxonomies() taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags()) for tag in tags: if tag not in taxonomy_tags: @@ -249,6 +267,8 @@ def api_update_taxonomy_tag_enabled(data): def enable_taxonomy_tags(taxonomy): enable_taxonomy(taxonomy) + if TAXONOMIES is None: + load_taxonomies() for tag in TAXONOMIES.get(taxonomy).machinetags(): add_taxonomy_tag_enabled(taxonomy, tag) @@ -279,9 +299,8 @@ def api_disable_taxonomy_tags(data): # # TODO Synonyms - -GALAXIES = {} -CLUSTERS = {} +GALAXIES = None +CLUSTERS = None def load_galaxies(): global GALAXIES galaxies = [] @@ -298,11 +317,10 @@ def load_galaxies(): clusters.append(json.load(f)) CLUSTERS = Clusters(clusters) - -# LOAD GALAXY + CLUSTERS -load_galaxies() - def get_galaxies(): + if GALAXIES is None: + # LOAD GALAXY + CLUSTERS + load_galaxies() return GALAXIES.keys() # TODO RENAME ME @@ -310,9 +328,15 @@ def get_active_galaxies(): return r_tags.smembers('galaxies:enabled') def get_galaxy(galaxy_name): + if GALAXIES is None: + # LOAD GALAXY + CLUSTERS + load_galaxies() return GALAXIES.get(galaxy_name) def exists_galaxy(galaxy): + if CLUSTERS is None: + # LOAD GALAXY + CLUSTERS + load_galaxies() return CLUSTERS.get(galaxy) is not None def is_galaxy_enabled(galaxy): @@ -369,9 +393,15 @@ def get_galaxy_tag_meta(galaxy_type, tag): def get_clusters(): + if CLUSTERS is None: + # LOAD GALAXY + CLUSTERS + load_galaxies() return CLUSTERS.keys() def get_cluster(cluster_type): + if CLUSTERS is None: + # LOAD GALAXY + CLUSTERS + load_galaxies() return CLUSTERS.get(cluster_type) def get_galaxy_tags(galaxy_type): diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index 1bc7ea2f..a4e41c90 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -12,7 +12,6 @@ import yara import datetime import base64 -from ail_typo_squatting import runAll import math from collections import defaultdict @@ -38,24 +37,22 @@ logger = logging.getLogger() config_loader = ConfigLoader.ConfigLoader() r_cache = config_loader.get_redis_conn("Redis_Cache") - r_tracker = config_loader.get_db_conn("Kvrocks_Trackers") - -items_dir = config_loader.get_config_str("Directories", "pastes") -if items_dir[-1] == '/': - items_dir = items_dir[:-1] config_loader = None -email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}' -email_regex = re.compile(email_regex) - -special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\') -special_characters.add('\\s') - # NLTK tokenizer -tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', +TOKENIZER = None + +def init_tokenizer(): + global TOKENIZER + TOKENIZER = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', gaps=True, discard_empty=True) +def get_special_characters(): + special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\') + special_characters.add('\\s') + return special_characters + ############### #### UTILS #### def is_valid_uuid_v4(curr_uuid): @@ -76,6 +73,8 @@ def is_valid_regex(tracker_regex): return False def is_valid_mail(email): + email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}' + email_regex = re.compile(email_regex) result = email_regex.match(email) if result: return True @@ -400,6 +399,9 @@ class Tracker: tracker_type = 'yara' elif tracker_type == 'typosquatting': + + from ail_typo_squatting import runAll + domain = to_track.split(" ")[0] typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1 for typo in typo_generation: @@ -857,7 +859,7 @@ def api_validate_tracker_to_add(to_track, tracker_type, nb_words=1): # force lowercase to_track = to_track.lower() word_set = set(to_track) - set_inter = word_set.intersection(special_characters) + set_inter = word_set.intersection(get_special_characters()) if set_inter: return {"status": "error", "reason": f'special character(s) not allowed: {set_inter}', @@ -1113,7 +1115,9 @@ def get_text_word_frequency(content, filtering=True): words_dict = defaultdict(int) if filtering: - blob = TextBlob(content, tokenizer=tokenizer) + if TOKENIZER is None: + init_tokenizer() + blob = TextBlob(content, tokenizer=TOKENIZER) else: blob = TextBlob(content) for word in blob.tokens: @@ -1800,9 +1804,9 @@ def _fix_db_custom_tags(): #### -- #### -if __name__ == '__main__': +# if __name__ == '__main__': - _fix_db_custom_tags() + # _fix_db_custom_tags() # fix_all_tracker_uuid_list() # res = get_all_tracker_uuid() # print(len(res)) diff --git a/bin/lib/btc_ail.py b/bin/lib/btc_ail.py index 34ddd9d5..51f4c207 100755 --- a/bin/lib/btc_ail.py +++ b/bin/lib/btc_ail.py @@ -8,7 +8,6 @@ import sys import requests sys.path.append(os.environ['AIL_BIN']) -from lib.objects.CryptoCurrencies import CryptoCurrency logger = logging.getLogger() @@ -53,9 +52,11 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50): # filter btc seen in ail def filter_btc_seen(btc_addr_set): + from lib.objects import CryptoCurrencies + list_seen_btc = [] for btc_addr in btc_addr_set: - cryptocurrency = CryptoCurrency(btc_addr, 'bitcoin') + cryptocurrency = CryptoCurrencies.CryptoCurrency(btc_addr, 'bitcoin') if cryptocurrency.exists(): list_seen_btc.append(btc_addr) return list_seen_btc diff --git a/bin/lib/objects/Chats.py b/bin/lib/objects/Chats.py index f7c82d5f..941228ce 100755 --- a/bin/lib/objects/Chats.py +++ b/bin/lib/objects/Chats.py @@ -18,13 +18,10 @@ from lib.ConfigLoader import ConfigLoader from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects -from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id -from lib.data_retention_engine import update_obj_date -from lib.objects import ail_objects +from lib.objects.abstract_subtype_object import get_all_id +# from lib.data_retention_engine import update_obj_date from lib.timeline_engine import Timeline -from lib.correlations_engine import get_correlation_by_correl_type - config_loader = ConfigLoader() baseurl = config_loader.get_config_str("Notifications", "ail_domain") r_object = config_loader.get_db_conn("Kvrocks_Objects") diff --git a/bin/lib/objects/Ocrs.py b/bin/lib/objects/Ocrs.py index 52d58808..5dc0edab 100755 --- a/bin/lib/objects/Ocrs.py +++ b/bin/lib/objects/Ocrs.py @@ -213,10 +213,10 @@ class Ocr(AbstractObject): draw = ImageDraw.Draw(img) for bbox in self.get_coords(): c1, c2, c3, c4 = bbox - draw.line((tuple(c1), tuple(c2)), fill="yellow") - draw.line((tuple(c2), tuple(c3)), fill="yellow") - draw.line((tuple(c3), tuple(c4)), fill="yellow") - draw.line((tuple(c4), tuple(c1)), fill="yellow") + draw.line((tuple(c1), tuple(c2)), fill="yellow", width=2) + draw.line((tuple(c2), tuple(c3)), fill="yellow", width=2) + draw.line((tuple(c3), tuple(c4)), fill="yellow", width=2) + draw.line((tuple(c4), tuple(c1)), fill="yellow", width=2) # img.show() buff = BytesIO() img.save(buff, "PNG") diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py index 9d901a1a..59b632e0 100755 --- a/bin/lib/objects/ail_objects.py +++ b/bin/lib/objects/ail_objects.py @@ -37,7 +37,7 @@ from lib.objects import Ocrs from lib.objects import Pgps from lib.objects.Screenshots import Screenshot from lib.objects import Titles -from lib.objects.UsersAccount import UserAccount +from lib.objects import UsersAccount from lib.objects import Usernames config_loader = ConfigLoader() @@ -113,7 +113,7 @@ def get_object(obj_type, subtype, obj_id): elif obj_type == 'pgp': return Pgps.Pgp(obj_id, subtype) elif obj_type == 'user-account': - return UserAccount(obj_id, subtype) + return UsersAccount.UserAccount(obj_id, subtype) elif obj_type == 'username': return Usernames.Username(obj_id, subtype) else: diff --git a/bin/modules/Mail.py b/bin/modules/Mail.py index a87aec46..29477ed2 100755 --- a/bin/modules/Mail.py +++ b/bin/modules/Mail.py @@ -26,7 +26,6 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages # ################################## from modules.abstract_module import AbstractModule -from lib.objects.Items import Item from lib.ConfigLoader import ConfigLoader # from lib import Statistics