From 7ed7b4c63ea4d94ae5fe4d5dd0b45982e9a573f3 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 20 Apr 2023 16:05:58 +0200 Subject: [PATCH] fix: [module extractor] fix cache + fix yara match --- bin/lib/module_extractor.py | 65 +++++++++++++------ var/www/templates/objects/item/show_item.html | 1 + var/www/update_thirdparty.sh | 10 ++- 3 files changed, 53 insertions(+), 23 deletions(-) diff --git a/bin/lib/module_extractor.py b/bin/lib/module_extractor.py index 38f9a7e2..8ad532aa 100755 --- a/bin/lib/module_extractor.py +++ b/bin/lib/module_extractor.py @@ -71,30 +71,34 @@ def get_correl_match(extract_type, obj_id, content): return extracted def _get_yara_match(data): - for row in data.get('strings'): - start, i, value = row - value = value.decode() - end = start + len(value) - r_cache.sadd(f'extractor:yara:match:{r_key}', f'{start}:{end}:{value}') - r_cache.expire(f'extractor:yara:match:{r_key}', 300) + for string_match in data.get('strings'): + for string_match_instance in string_match.instances: + start = string_match_instance.offset + value = string_match_instance.matched_data.decode() + end = start + string_match_instance.matched_length + r_cache.sadd(f'extractor:yara:match:{r_key}', f'{start}:{end}:{value}') + r_cache.expire(f'extractor:yara:match:{r_key}', 300) return yara.CALLBACK_CONTINUE def _get_word_regex(word): return '(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' +def convert_byte_offset_to_string(b_content, offset): + byte_chunk = b_content[:offset + 1] + string_chunk = byte_chunk.decode() + offset = len(string_chunk) - 1 + return offset + + # TODO RETRO HUNTS # TODO TRACKER TYPE IN UI def get_tracker_match(obj_id, content): - cached = r_cache.get(f'extractor:cache:{obj_id}') - if cached: - r_cache.expire(f'extractor:cache:{obj_id}', 300) - return json.loads(cached) - extracted = [] + extracted_yara = [] trackers = Tracker.get_obj_all_trackers('item', '', obj_id) for tracker_uuid in trackers: tracker_type = Tracker.get_tracker_type(tracker_uuid) - print(tracker_type) + # print(tracker_type) tracker = Tracker.get_tracker_by_uuid(tracker_uuid) if tracker_type == 'regex': # TODO Improve word detection -> word delimiter regex_match = regex_helper.regex_finditer(r_key, tracker, obj_id, content) @@ -102,14 +106,14 @@ def get_tracker_match(obj_id, content): extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker_uuid}']) elif tracker_type == 'yara': rule = Tracker.get_yara_rule_by_uuid(tracker_uuid) - rule.match(data=content, callback=_get_yara_match, + rule.match(data=content.encode(), callback=_get_yara_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=30) yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}') r_cache.delete(f'extractor:yara:match:{r_key}') extracted = [] for match in yara_match: start, end, value = match.split(':', 2) - extracted.append([int(start), int(end), value, f'tracker:{tracker_uuid}']) + extracted_yara.append([int(start), int(end), value, f'tracker:{tracker_uuid}']) elif tracker_type == 'word' or tracker_type == 'set': if tracker_type == 'set': @@ -120,15 +124,20 @@ def get_tracker_match(obj_id, content): for word in words: regex = _get_word_regex(word) regex_match = regex_helper.regex_finditer(r_key, regex, obj_id, content) - print(regex_match) + # print(regex_match) for match in regex_match: extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker_uuid}']) - # Save In Cache - if extracted: - extracted_dump = json.dumps(extracted) - r_cache.set(f'extractor:cache:{obj_id}', extracted_dump) - r_cache.expire(f'extractor:cache:{obj_id}', 300) # TODO Reduce CACHE ??????????????? + # Convert byte offset to string offset + if extracted_yara: + b_content = content.encode() + if len(b_content) == len(content): + extracted[0:0] = extracted_yara + else: + for yara_m in extracted_yara: + start = convert_byte_offset_to_string(b_content, yara_m[0]) + end = convert_byte_offset_to_string(b_content, yara_m[1]) + extracted.append([int(start), int(end), yara_m[2], yara_m[3]]) return extracted @@ -138,6 +147,15 @@ def get_tracker_match(obj_id, content): def extract(obj_id, content=None): item = Item(obj_id) + if not item.exists(): + return [] + + # CHECK CACHE + cached = r_cache.get(f'extractor:cache:{obj_id}') + if cached: + r_cache.expire(f'extractor:cache:{obj_id}', 300) + return json.loads(cached) + if not content: content = item.get_content() @@ -160,6 +178,13 @@ def extract(obj_id, content=None): # SORT By Start Pos extracted = sorted(extracted, key=itemgetter(0)) # print(extracted) + + # Save In Cache + if extracted: + extracted_dump = json.dumps(extracted) + r_cache.set(f'extractor:cache:{obj_id}', extracted_dump) + r_cache.expire(f'extractor:cache:{obj_id}', 300) # TODO Reduce CACHE ??????????????? + return extracted # TODO ADD LINK UI diff --git a/var/www/templates/objects/item/show_item.html b/var/www/templates/objects/item/show_item.html index 18499a78..1b65ee97 100644 --- a/var/www/templates/objects/item/show_item.html +++ b/var/www/templates/objects/item/show_item.html @@ -476,6 +476,7 @@ } }); + $('[data-toggle="popover"]').popover(); {% if meta['crawler'] %} diff --git a/var/www/update_thirdparty.sh b/var/www/update_thirdparty.sh index dcd1e508..4033bc79 100755 --- a/var/www/update_thirdparty.sh +++ b/var/www/update_thirdparty.sh @@ -99,11 +99,15 @@ mv temp/jquery-ui-1.12.1/jquery-ui.min.js ./static/js/jquery-ui.min.js mv temp/jquery-ui-1.12.1/jquery-ui.min.css ./static/css/jquery-ui.min.css # INSTALL YARA -YARA_VERSION="4.0.2" -wget -q https://github.com/VirusTotal/yara/archive/v${YARA_VERSION}.zip -O temp/yara.zip -unzip -qq temp/yara.zip -d temp/ +YARA_VERSION="4.3.0" +wget https://github.com/VirusTotal/yara/archive/v${YARA_VERSION}.zip -O temp/yara.zip +unzip temp/yara.zip -d temp/ pushd temp/yara-${YARA_VERSION} ./bootstrap.sh +./configure +make +sudo make install +make check popd