mirror of
https://github.com/ail-project/ail-framework.git
synced 2025-02-07 09:56:24 +00:00
chg: [module extractor] signal timeout global extraction + reduce regex and yara timeout
This commit is contained in:
parent
f27acbf185
commit
bc23518713
1 changed files with 43 additions and 29 deletions
|
@ -17,6 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
from lib.ail_users import get_user_org
|
from lib.ail_users import get_user_org
|
||||||
from lib.objects import ail_objects
|
from lib.objects import ail_objects
|
||||||
from lib.objects.Titles import Title
|
from lib.objects.Titles import Title
|
||||||
|
from lib.exceptions import TimeoutException
|
||||||
from lib import correlations_engine
|
from lib import correlations_engine
|
||||||
from lib import regex_helper
|
from lib import regex_helper
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
@ -38,6 +39,15 @@ config_loader = None
|
||||||
|
|
||||||
r_key = regex_helper.generate_redis_cache_key('extractor')
|
r_key = regex_helper.generate_redis_cache_key('extractor')
|
||||||
|
|
||||||
|
|
||||||
|
# SIGNAL ALARM
|
||||||
|
import signal
|
||||||
|
def timeout_handler(signum, frame):
|
||||||
|
raise TimeoutException
|
||||||
|
|
||||||
|
|
||||||
|
signal.signal(signal.SIGALRM, timeout_handler)
|
||||||
|
|
||||||
# TODO UI Link
|
# TODO UI Link
|
||||||
|
|
||||||
CORRELATION_TO_EXTRACT = {
|
CORRELATION_TO_EXTRACT = {
|
||||||
|
@ -98,7 +108,7 @@ def get_correl_match(extract_type, obj, content):
|
||||||
sha256_val = sha256(value.encode()).hexdigest()
|
sha256_val = sha256(value.encode()).hexdigest()
|
||||||
map_value_id[sha256_val] = value
|
map_value_id[sha256_val] = value
|
||||||
if to_extract:
|
if to_extract:
|
||||||
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj.get_global_id(), content)
|
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj.get_global_id(), content, max_time=5)
|
||||||
if extract_type == 'title' and objs:
|
if extract_type == 'title' and objs:
|
||||||
objs = [objs[0]]
|
objs = [objs[0]]
|
||||||
for ob in objs:
|
for ob in objs:
|
||||||
|
@ -154,13 +164,13 @@ def get_tracker_match(user_org, user_id, obj, content):
|
||||||
# print(tracker_type)
|
# print(tracker_type)
|
||||||
tracked = tracker.get_tracked()
|
tracked = tracker.get_tracked()
|
||||||
if tracker_type == 'regex': # TODO Improve word detection -> word delimiter
|
if tracker_type == 'regex': # TODO Improve word detection -> word delimiter
|
||||||
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_gid, content)
|
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_gid, content, max_time=5)
|
||||||
for match in regex_match:
|
for match in regex_match:
|
||||||
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
|
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
|
||||||
elif tracker_type == 'yara':
|
elif tracker_type == 'yara':
|
||||||
rule = tracker.get_rule()
|
rule = tracker.get_rule()
|
||||||
rule.match(data=content.encode(), callback=_get_yara_match,
|
rule.match(data=content.encode(), callback=_get_yara_match,
|
||||||
which_callbacks=yara.CALLBACK_MATCHES, timeout=30)
|
which_callbacks=yara.CALLBACK_MATCHES, timeout=5)
|
||||||
yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}')
|
yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}')
|
||||||
r_cache.delete(f'extractor:yara:match:{r_key}')
|
r_cache.delete(f'extractor:yara:match:{r_key}')
|
||||||
extracted = []
|
extracted = []
|
||||||
|
@ -176,7 +186,7 @@ def get_tracker_match(user_org, user_id, obj, content):
|
||||||
words = [tracked]
|
words = [tracked]
|
||||||
for word in words:
|
for word in words:
|
||||||
regex = _get_word_regex(word)
|
regex = _get_word_regex(word)
|
||||||
regex_match = regex_helper.regex_finditer(r_key, regex, obj_gid, content)
|
regex_match = regex_helper.regex_finditer(r_key, regex, obj_gid, content, max_time=5)
|
||||||
# print(regex_match)
|
# print(regex_match)
|
||||||
for match in regex_match:
|
for match in regex_match:
|
||||||
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
|
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
|
||||||
|
@ -194,7 +204,7 @@ def get_tracker_match(user_org, user_id, obj, content):
|
||||||
retro_hunt.delete_objs()
|
retro_hunt.delete_objs()
|
||||||
|
|
||||||
rule.match(data=content.encode(), callback=_get_yara_match,
|
rule.match(data=content.encode(), callback=_get_yara_match,
|
||||||
which_callbacks=yara.CALLBACK_MATCHES, timeout=30)
|
which_callbacks=yara.CALLBACK_MATCHES, timeout=5)
|
||||||
yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}')
|
yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}')
|
||||||
r_cache.delete(f'extractor:yara:match:{r_key}')
|
r_cache.delete(f'extractor:yara:match:{r_key}')
|
||||||
extracted = []
|
extracted = []
|
||||||
|
@ -234,35 +244,39 @@ def extract(user_id, obj_type, subtype, obj_id, content=None):
|
||||||
r_cache.expire(f'extractor:cache:{obj_gid}:{user_org}:{user_id}', 300)
|
r_cache.expire(f'extractor:cache:{obj_gid}:{user_org}:{user_id}', 300)
|
||||||
return json.loads(cached)
|
return json.loads(cached)
|
||||||
|
|
||||||
if not content:
|
signal.alarm(60)
|
||||||
content = obj.get_content()
|
try:
|
||||||
|
if not content:
|
||||||
|
content = obj.get_content()
|
||||||
|
extracted = get_tracker_match(user_org, user_id, obj, content)
|
||||||
|
# print(item.get_tags())
|
||||||
|
for tag in obj.get_tags():
|
||||||
|
if MODULES.get(tag):
|
||||||
|
# print(tag)
|
||||||
|
module = MODULES.get(tag)
|
||||||
|
matches = module.extract(obj, content, tag)
|
||||||
|
if matches:
|
||||||
|
extracted = extracted + matches
|
||||||
|
|
||||||
extracted = get_tracker_match(user_org, user_id, obj, content)
|
for obj_t in CORRELATION_TO_EXTRACT[obj.type]:
|
||||||
|
matches = get_correl_match(obj_t, obj, content)
|
||||||
# print(item.get_tags())
|
|
||||||
for tag in obj.get_tags():
|
|
||||||
if MODULES.get(tag):
|
|
||||||
# print(tag)
|
|
||||||
module = MODULES.get(tag)
|
|
||||||
matches = module.extract(obj, content, tag)
|
|
||||||
if matches:
|
if matches:
|
||||||
extracted = extracted + matches
|
extracted = extracted + matches
|
||||||
|
|
||||||
for obj_t in CORRELATION_TO_EXTRACT[obj.type]:
|
# SORT By Start Pos
|
||||||
matches = get_correl_match(obj_t, obj, content)
|
if extracted:
|
||||||
if matches:
|
extracted = sorted(extracted, key=itemgetter(0))
|
||||||
extracted = extracted + matches
|
extracted = merge_overlap(extracted)
|
||||||
|
|
||||||
# SORT By Start Pos
|
# Save In Cache
|
||||||
if extracted:
|
if extracted:
|
||||||
extracted = sorted(extracted, key=itemgetter(0))
|
extracted_dump = json.dumps(extracted)
|
||||||
extracted = merge_overlap(extracted)
|
r_cache.set(f'extractor:cache:{obj_gid}:{user_org}:{user_id}', extracted_dump)
|
||||||
|
r_cache.expire(f'extractor:cache:{obj_gid}:{user_org}:{user_id}', 300) # TODO Reduce CACHE ???????????????
|
||||||
# Save In Cache
|
except TimeoutException:
|
||||||
if extracted:
|
extracted = []
|
||||||
extracted_dump = json.dumps(extracted)
|
else:
|
||||||
r_cache.set(f'extractor:cache:{obj_gid}:{user_org}:{user_id}', extracted_dump)
|
signal.alarm(0)
|
||||||
r_cache.expire(f'extractor:cache:{obj_gid}:{user_org}:{user_id}', 300) # TODO Reduce CACHE ???????????????
|
|
||||||
|
|
||||||
return extracted
|
return extracted
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue