mirror of
https://github.com/ail-project/ail-framework.git
synced 2025-01-18 08:26:15 +00:00
chg: [ocr] add cache + correlation ocr-chats-messages + launch ocr extractor by default
This commit is contained in:
parent
8bd1ae3815
commit
c25ccb8618
9 changed files with 122 additions and 26 deletions
|
@ -275,8 +275,11 @@ function launching_scripts {
|
|||
screen -S "Script_AIL" -X screen -t "MISP_Thehive_Auto_Push" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./MISP_Thehive_Auto_Push.py; read x"
|
||||
sleep 0.1
|
||||
|
||||
# IMAGES
|
||||
screen -S "Script_AIL" -X screen -t "Exif" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Exif.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "OcrExtractor" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./OcrExtractor.py; read x"
|
||||
sleep 0.1
|
||||
|
||||
##################################
|
||||
# TRACKERS MODULES #
|
||||
|
|
|
@ -41,26 +41,26 @@ config_loader = None
|
|||
##################################
|
||||
|
||||
CORRELATION_TYPES_BY_OBJ = {
|
||||
"chat": ["chat-subchannel", "chat-thread", "image", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
|
||||
"chat-subchannel": ["chat", "chat-thread", "image", "message", "user-account"],
|
||||
"chat-thread": ["chat", "chat-subchannel", "image", "message", "user-account"], # TODO user account
|
||||
"chat": ["chat-subchannel", "chat-thread", "image", "message", "ocr", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
|
||||
"chat-subchannel": ["chat", "chat-thread", "image", "message", "ocr", "user-account"],
|
||||
"chat-thread": ["chat", "chat-subchannel", "image", "message", "ocr", "user-account"], # TODO user account
|
||||
"cookie-name": ["domain"],
|
||||
"cryptocurrency": ["domain", "item", "message"],
|
||||
"cve": ["domain", "item", "message"],
|
||||
"decoded": ["domain", "item", "message"],
|
||||
"cryptocurrency": ["domain", "item", "message", "ocr"],
|
||||
"cve": ["domain", "item", "message", "ocr"],
|
||||
"decoded": ["domain", "item", "message", "ocr"],
|
||||
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
|
||||
"etag": ["domain"],
|
||||
"favicon": ["domain", "item"], # TODO Decoded
|
||||
"file-name": ["chat", "message"],
|
||||
"hhhash": ["domain"],
|
||||
"image": ["chat", "message", "ocr", "user-account"],
|
||||
"image": ["chat", "chat-subchannel", "chat-thread", "message", "ocr", "user-account"], # TODO subchannel + threads ????
|
||||
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
|
||||
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "pgp", "user-account"], # chat ??
|
||||
"ocr": ["image"],
|
||||
"pgp": ["domain", "item", "message"],
|
||||
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "ocr", "pgp", "user-account"],
|
||||
"ocr": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "image", "message", "pgp", "user-account"],
|
||||
"pgp": ["domain", "item", "message", "ocr"],
|
||||
"screenshot": ["domain", "item"],
|
||||
"title": ["domain", "item"],
|
||||
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "username"],
|
||||
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "ocr", "username"],
|
||||
"username": ["domain", "item", "message", "user-account"],
|
||||
}
|
||||
|
||||
|
|
|
@ -209,7 +209,7 @@ class Domain(AbstractObject):
|
|||
def get_screenshot(self):
|
||||
last_item = self.get_last_item_root()
|
||||
if last_item:
|
||||
screenshot = self._get_external_correlation('item', '', last_item, 'screenshot').get('screenshot')
|
||||
screenshot = self.get_obj_correlations('item', '', last_item, ['screenshot']).get('screenshot')
|
||||
if screenshot:
|
||||
return screenshot.pop()[1:]
|
||||
|
||||
|
@ -392,7 +392,7 @@ class Domain(AbstractObject):
|
|||
print(har)
|
||||
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json.gz')
|
||||
# Screenshot
|
||||
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
|
||||
screenshot = self.get_obj_correlations('item', '', item_id, ['screenshot'])
|
||||
if screenshot and screenshot['screenshot']:
|
||||
screenshot = screenshot['screenshot'].pop()[1:]
|
||||
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
|
||||
|
|
|
@ -141,7 +141,7 @@ class Message(AbstractObject):
|
|||
# TODO get thread ID
|
||||
|
||||
def _get_image_ocr(self, obj_id):
|
||||
return bool(self._get_external_correlation('image', '', obj_id, 'ocr').get('ocr'))
|
||||
return bool(self.get_correlation('ocr').get('ocr'))
|
||||
|
||||
def get_images(self):
|
||||
images = []
|
||||
|
|
|
@ -228,6 +228,14 @@ class Ocr(AbstractObject):
|
|||
def remove(self, val):
|
||||
return r_object.srem(f'ocr:{self.id}', val)
|
||||
|
||||
def update_correlation(self):
|
||||
image_correl = self.get_obj_correlations('image', '', self.id)
|
||||
for obj_type in image_correl:
|
||||
if obj_type != 'ocr':
|
||||
for obj_raw in image_correl[obj_type]:
|
||||
obj_subtype, obj_id = obj_raw.split(':', 1)
|
||||
self.add_correlation(obj_type, obj_subtype, obj_id)
|
||||
|
||||
def create(self, extracted_texts, tags=[]):
|
||||
r_object.sadd(f'{self.type}:all', self.id)
|
||||
for extracted in extracted_texts:
|
||||
|
@ -235,7 +243,10 @@ class Ocr(AbstractObject):
|
|||
if len(text) > 1:
|
||||
str_coords = self.create_coord_str(bbox)
|
||||
self.add(str_coords, text)
|
||||
self.add_correlation('image', '', self.id)
|
||||
|
||||
# Correlations
|
||||
self.update_correlation()
|
||||
self.add_correlation('image', '', self.id)
|
||||
|
||||
for tag in tags:
|
||||
self.add_tag(tag)
|
||||
|
|
|
@ -225,11 +225,11 @@ class AbstractObject(ABC):
|
|||
|
||||
## Correlation ##
|
||||
|
||||
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
|
||||
def get_obj_correlations(self, obj_type, obj_subtype, obj_id, filter_types=[]):
|
||||
"""
|
||||
Get object correlation
|
||||
"""
|
||||
return get_correlations(req_type, req_subtype, req_id, filter_types=[obj_type])
|
||||
return get_correlations(obj_type, obj_subtype, obj_id, filter_types=filter_types)
|
||||
|
||||
def get_correlation(self, obj_type):
|
||||
"""
|
||||
|
|
|
@ -17,6 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib import chats_viewer
|
||||
from lib.objects import Messages
|
||||
from lib.objects import Ocrs
|
||||
|
@ -68,30 +69,45 @@ class OcrExtractor(AbstractModule):
|
|||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
|
||||
# Send module state to logs
|
||||
self.logger.info(f'Module {self.module_name} initialized')
|
||||
|
||||
def is_cached(self):
|
||||
return self.r_cache.exists(f'ocr:no:{self.obj.id}')
|
||||
|
||||
def add_to_cache(self):
|
||||
self.r_cache.setex(f'ocr:no:{self.obj.id}', 86400, 0)
|
||||
|
||||
def compute(self, message):
|
||||
image = self.get_obj()
|
||||
path = image.get_filepath()
|
||||
print(image)
|
||||
|
||||
languages = get_model_languages(image)
|
||||
print(languages)
|
||||
print(image.id)
|
||||
|
||||
ocr = Ocrs.Ocr(image.id)
|
||||
ocr.delete()
|
||||
if self.is_cached():
|
||||
return None
|
||||
|
||||
if not ocr.exists():
|
||||
path = image.get_filepath()
|
||||
languages = get_model_languages(image)
|
||||
print(languages)
|
||||
texts = Ocrs.extract_text(path, languages)
|
||||
if texts:
|
||||
print('create')
|
||||
ocr = Ocrs.create(image.id, texts)
|
||||
self.add_message_to_queue(ocr)
|
||||
# Save in cache
|
||||
else:
|
||||
print('no text detected')
|
||||
self.add_to_cache()
|
||||
else:
|
||||
print('update correlation')
|
||||
ocr.update_correlation()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = OcrExtractor()
|
||||
module.run()
|
||||
# from lib.objects import Images
|
||||
# module.obj = Images.Image('')
|
||||
# module.compute('')
|
||||
|
|
26
update/v5.5/Update.py
Executable file
26
update/v5.5/Update.py
Executable file
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_HOME'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from update.bin.ail_updater import AIL_Updater
|
||||
from lib import ail_updates
|
||||
from lib import chats_viewer
|
||||
|
||||
class Updater(AIL_Updater):
|
||||
"""default Updater."""
|
||||
|
||||
def __init__(self, version):
|
||||
super(Updater, self).__init__(version)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
chats_viewer.fix_correlations_subchannel_message()
|
||||
updater = Updater('v5.5')
|
||||
updater.run_update()
|
||||
|
40
update/v5.5/Update.sh
Executable file
40
update/v5.5/Update.sh
Executable file
|
@ -0,0 +1,40 @@
|
|||
#!/bin/bash
|
||||
|
||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
||||
|
||||
export PATH=$AIL_HOME:$PATH
|
||||
export PATH=$AIL_REDIS:$PATH
|
||||
export PATH=$AIL_BIN:$PATH
|
||||
export PATH=$AIL_FLASK:$PATH
|
||||
|
||||
GREEN="\\033[1;32m"
|
||||
DEFAULT="\\033[0;39m"
|
||||
|
||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
||||
wait
|
||||
|
||||
# SUBMODULES #
|
||||
git submodule update
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Updating python packages ..."$DEFAULT
|
||||
echo ""
|
||||
pip install -U easyocr
|
||||
|
||||
|
||||
bash ${AIL_BIN}/LAUNCH.sh -lrv
|
||||
bash ${AIL_BIN}/LAUNCH.sh -lkv
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
||||
echo ""
|
||||
python ${AIL_HOME}/update/v5.5/Update.py
|
||||
wait
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
exit 0
|
Loading…
Add table
Reference in a new issue