mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-26 07:47:17 +00:00
chg: [ocr] extract text from image + add ocr object
This commit is contained in:
parent
dbde04caa3
commit
6ca4b29329
13 changed files with 478 additions and 8 deletions
|
@ -18,14 +18,14 @@ config_loader = None
|
||||||
|
|
||||||
AIL_OBJECTS = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cookie-name', 'cve', 'cryptocurrency', 'decoded',
|
AIL_OBJECTS = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cookie-name', 'cve', 'cryptocurrency', 'decoded',
|
||||||
'domain', 'etag', 'favicon', 'file-name', 'hhhash',
|
'domain', 'etag', 'favicon', 'file-name', 'hhhash',
|
||||||
'item', 'image', 'message', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
|
'item', 'image', 'message', 'ocr', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
|
||||||
|
|
||||||
AIL_OBJECTS_WITH_SUBTYPES = {'chat', 'chat-subchannel', 'cryptocurrency', 'pgp', 'username', 'user-account'}
|
AIL_OBJECTS_WITH_SUBTYPES = {'chat', 'chat-subchannel', 'cryptocurrency', 'pgp', 'username', 'user-account'}
|
||||||
|
|
||||||
# TODO by object TYPE ????
|
# TODO by object TYPE ????
|
||||||
AIL_OBJECTS_CORRELATIONS_DEFAULT = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cve', 'cryptocurrency', 'decoded',
|
AIL_OBJECTS_CORRELATIONS_DEFAULT = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cve', 'cryptocurrency', 'decoded',
|
||||||
'domain', 'favicon', 'file-name',
|
'domain', 'favicon', 'file-name',
|
||||||
'item', 'image', 'message', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
|
'item', 'image', 'message', 'ocr', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
|
||||||
|
|
||||||
def get_ail_uuid():
|
def get_ail_uuid():
|
||||||
ail_uuid = r_serv_db.get('ail:uuid')
|
ail_uuid = r_serv_db.get('ail:uuid')
|
||||||
|
@ -105,7 +105,7 @@ def unpack_obj_global_id(global_id, r_type='tuple'):
|
||||||
obj = global_id.split(':', 2)
|
obj = global_id.split(':', 2)
|
||||||
return {'type': obj[0], 'subtype': obj[1], 'id': obj[2]}
|
return {'type': obj[0], 'subtype': obj[1], 'id': obj[2]}
|
||||||
else: # tuple(type, subtype, id)
|
else: # tuple(type, subtype, id)
|
||||||
return global_id.split(':', 2)
|
return global_id.split(':', 2) # TODO REPLACE get_obj_type_subtype_id_from_global_id(global_id)
|
||||||
|
|
||||||
def unpack_objs_global_id(objs_global_id, r_type='tuple'):
|
def unpack_objs_global_id(objs_global_id, r_type='tuple'):
|
||||||
objs = []
|
objs = []
|
||||||
|
|
|
@ -53,9 +53,10 @@ CORRELATION_TYPES_BY_OBJ = {
|
||||||
"favicon": ["domain", "item"], # TODO Decoded
|
"favicon": ["domain", "item"], # TODO Decoded
|
||||||
"file-name": ["chat", "message"],
|
"file-name": ["chat", "message"],
|
||||||
"hhhash": ["domain"],
|
"hhhash": ["domain"],
|
||||||
"image": ["chat", "message", "user-account"],
|
"image": ["chat", "message", "ocr", "user-account"],
|
||||||
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
|
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
|
||||||
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "pgp", "user-account"], # chat ??
|
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "pgp", "user-account"], # chat ??
|
||||||
|
"ocr": ["image"],
|
||||||
"pgp": ["domain", "item", "message"],
|
"pgp": ["domain", "item", "message"],
|
||||||
"screenshot": ["domain", "item"],
|
"screenshot": ["domain", "item"],
|
||||||
"title": ["domain", "item"],
|
"title": ["domain", "item"],
|
||||||
|
|
244
bin/lib/objects/Ocrs.py
Executable file
244
bin/lib/objects/Ocrs.py
Executable file
|
@ -0,0 +1,244 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from io import BytesIO
|
||||||
|
from PIL import Image
|
||||||
|
from PIL import ImageDraw
|
||||||
|
|
||||||
|
from pymisp import MISPObject
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib.objects.abstract_object import AbstractObject
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
# from lib import Language
|
||||||
|
# from lib.data_retention_engine import update_obj_date, get_obj_date_first
|
||||||
|
|
||||||
|
from flask import url_for
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||||
|
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
||||||
|
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||||
|
IMAGE_FOLDER = config_loader.get_files_directory('images')
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
|
# SET x1,y1:x2,y2:x3,y3:x4,y4:extracted_text
|
||||||
|
|
||||||
|
class Ocr(AbstractObject):
|
||||||
|
"""
|
||||||
|
AIL Message Object. (strings)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, id):
|
||||||
|
super(Ocr, self).__init__('ocr', id)
|
||||||
|
|
||||||
|
def exists(self):
|
||||||
|
return r_object.exists(f'ocr:{self.id}')
|
||||||
|
|
||||||
|
def get_content(self, r_type='str'):
|
||||||
|
"""
|
||||||
|
Returns content
|
||||||
|
"""
|
||||||
|
global_id = self.get_global_id()
|
||||||
|
content = r_cache.get(f'content:{global_id}')
|
||||||
|
if not content:
|
||||||
|
content = ''
|
||||||
|
for extracted in r_object.smembers(f'ocr:{self.id}'):
|
||||||
|
text = extracted.split(':', 4)[-1]
|
||||||
|
content = f'{content}\n{text}'
|
||||||
|
# Set Cache
|
||||||
|
if content:
|
||||||
|
global_id = self.get_global_id()
|
||||||
|
r_cache.set(f'content:{global_id}', content)
|
||||||
|
r_cache.expire(f'content:{global_id}', 300)
|
||||||
|
|
||||||
|
if r_type == 'str':
|
||||||
|
return content
|
||||||
|
elif r_type == 'bytes':
|
||||||
|
if content:
|
||||||
|
return content.encode()
|
||||||
|
|
||||||
|
def get_date(self): # TODO
|
||||||
|
timestamp = self.get_timestamp()
|
||||||
|
return datetime.utcfromtimestamp(float(timestamp)).strftime('%Y%m%d')
|
||||||
|
|
||||||
|
def get_link(self, flask_context=False):
|
||||||
|
if flask_context:
|
||||||
|
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
|
||||||
|
else:
|
||||||
|
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
||||||
|
return url
|
||||||
|
|
||||||
|
def get_svg_icon(self):
|
||||||
|
return {'style': 'fas', 'icon': '\uf20a', 'color': 'yellow', 'radius': 5}
|
||||||
|
|
||||||
|
def get_image_path(self):
|
||||||
|
rel_path = os.path.join(self.id[0:2], self.id[2:4], self.id[4:6], self.id[6:8], self.id[8:10], self.id[10:12], self.id[12:])
|
||||||
|
filename = os.path.join(IMAGE_FOLDER, rel_path)
|
||||||
|
return os.path.realpath(filename)
|
||||||
|
|
||||||
|
def get_misp_object(self): # TODO
|
||||||
|
obj = MISPObject('instant-message', standalone=True)
|
||||||
|
obj_date = self.get_date()
|
||||||
|
if obj_date:
|
||||||
|
obj.first_seen = obj_date
|
||||||
|
else:
|
||||||
|
self.logger.warning(
|
||||||
|
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={obj_date}')
|
||||||
|
|
||||||
|
# obj_attrs = [obj.add_attribute('first-seen', value=obj_date),
|
||||||
|
# obj.add_attribute('raw-data', value=self.id, data=self.get_raw_content()),
|
||||||
|
# obj.add_attribute('sensor', value=get_ail_uuid())]
|
||||||
|
obj_attrs = []
|
||||||
|
for obj_attr in obj_attrs:
|
||||||
|
for tag in self.get_tags():
|
||||||
|
obj_attr.add_tag(tag)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
# options: set of optional meta fields
|
||||||
|
def get_meta(self, options=None, timestamp=None, translation_target=''):
|
||||||
|
"""
|
||||||
|
:type options: set
|
||||||
|
:type timestamp: float
|
||||||
|
"""
|
||||||
|
if options is None:
|
||||||
|
options = set()
|
||||||
|
meta = self.get_default_meta(tags=True)
|
||||||
|
meta['content'] = self.get_content()
|
||||||
|
|
||||||
|
# optional meta fields
|
||||||
|
if 'investigations' in options:
|
||||||
|
meta['investigations'] = self.get_investigations()
|
||||||
|
if 'link' in options:
|
||||||
|
meta['link'] = self.get_link(flask_context=True)
|
||||||
|
if 'icon' in options:
|
||||||
|
meta['icon'] = self.get_svg_icon()
|
||||||
|
if 'img' in options:
|
||||||
|
meta['img'] = self.draw_bounding_boxs()
|
||||||
|
if 'map' in options:
|
||||||
|
meta['map'] = self.get_img_map_coords()
|
||||||
|
|
||||||
|
# # TODO
|
||||||
|
# if 'language' in options:
|
||||||
|
# meta['language'] = self.get_language()
|
||||||
|
# if 'translation' in options and translation_target:
|
||||||
|
# if meta.get('language'):
|
||||||
|
# source = meta['language']
|
||||||
|
# else:
|
||||||
|
# source = None
|
||||||
|
# meta['translation'] = self.translate(content=meta.get('content'), source=source, target=translation_target)
|
||||||
|
# if 'language' in options:
|
||||||
|
# meta['language'] = self.get_language()
|
||||||
|
return meta
|
||||||
|
|
||||||
|
def get_objs_container(self): # TODO
|
||||||
|
pass
|
||||||
|
# objs_containers = set()
|
||||||
|
# # chat
|
||||||
|
# objs_containers.add(self.get_chat())
|
||||||
|
# subchannel = self.get_subchannel()
|
||||||
|
# if subchannel:
|
||||||
|
# objs_containers.add(subchannel)
|
||||||
|
# thread = self.get_current_thread()
|
||||||
|
# if thread:
|
||||||
|
# objs_containers.add(thread)
|
||||||
|
# return objs_containers
|
||||||
|
|
||||||
|
def create_coord_str(self, bbox):
|
||||||
|
c1, c2, c3, c4 = bbox
|
||||||
|
x1, y1 = c1
|
||||||
|
x2, y2 = c2
|
||||||
|
x3, y3 = c3
|
||||||
|
x4, y4 = c4
|
||||||
|
return f'{int(x1)},{int(y1)}:{int(x2)},{int(y2)}:{int(x3)},{int(y3)}:{int(x4)},{int(y4)}'
|
||||||
|
|
||||||
|
def _unpack_coord(self, coord):
|
||||||
|
return coord.split(',', 1)
|
||||||
|
|
||||||
|
def get_coords(self):
|
||||||
|
coords = []
|
||||||
|
for extracted in r_object.smembers(f'ocr:{self.id}'):
|
||||||
|
coord = []
|
||||||
|
bbox = extracted.split(':', 4)[:-1]
|
||||||
|
for c in bbox:
|
||||||
|
x, y = self._unpack_coord(c)
|
||||||
|
coord.append((int(x), int(y)))
|
||||||
|
coords.append(coord)
|
||||||
|
return coords
|
||||||
|
|
||||||
|
def get_img_map_coords(self):
|
||||||
|
coords = []
|
||||||
|
for extracted in r_object.smembers(f'ocr:{self.id}'):
|
||||||
|
extract = extracted.split(':', 4)
|
||||||
|
x1, y1 = self._unpack_coord(extract[0])
|
||||||
|
x2, y2 = self._unpack_coord(extract[1])
|
||||||
|
x3, y3 = self._unpack_coord(extract[2])
|
||||||
|
x4, y4 = self._unpack_coord(extract[3])
|
||||||
|
coords.append((f'{x1},{y1},{x2},{y2},{x3},{y3},{x4},{y4}', extract[4]))
|
||||||
|
return coords
|
||||||
|
|
||||||
|
def edit(self, coordinates, text, new_text, new_coordinates=None):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def add(self, coordinates, text):
|
||||||
|
val = f'{coordinates}:{text}'
|
||||||
|
return r_object.sadd(f'ocr:{self.id}', val)
|
||||||
|
|
||||||
|
def remove(self, val):
|
||||||
|
return r_object.srem(f'ocr:{self.id}', val)
|
||||||
|
|
||||||
|
def create(self, extracted_texts, tags=[]):
|
||||||
|
for extracted in extracted_texts:
|
||||||
|
bbox, text = extracted
|
||||||
|
str_coords = self.create_coord_str(bbox)
|
||||||
|
self.add(str_coords, text)
|
||||||
|
self.add_correlation('image', '', self.id)
|
||||||
|
|
||||||
|
for tag in tags:
|
||||||
|
self.add_tag(tag)
|
||||||
|
|
||||||
|
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||||
|
def delete(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def draw_bounding_boxs(self):
|
||||||
|
img = Image.open(self.get_image_path()).convert("RGBA")
|
||||||
|
draw = ImageDraw.Draw(img)
|
||||||
|
for bbox in self.get_coords():
|
||||||
|
c1, c2, c3, c4 = bbox
|
||||||
|
draw.line((tuple(c1), tuple(c2)), fill="yellow")
|
||||||
|
draw.line((tuple(c2), tuple(c3)), fill="yellow")
|
||||||
|
draw.line((tuple(c3), tuple(c4)), fill="yellow")
|
||||||
|
draw.line((tuple(c4), tuple(c1)), fill="yellow")
|
||||||
|
# img.show()
|
||||||
|
buff = BytesIO()
|
||||||
|
img.save(buff, "PNG")
|
||||||
|
return buff.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
def create(obj_id, detections, tags=[]):
|
||||||
|
obj = Ocr(obj_id)
|
||||||
|
if not obj.exists():
|
||||||
|
obj.create(detections, tags=tags)
|
||||||
|
# TODO Edit
|
||||||
|
return obj
|
||||||
|
|
||||||
|
# TODO preload languages
|
||||||
|
def extract_text(image_path, languages, threshold=0.2):
|
||||||
|
import easyocr
|
||||||
|
reader = easyocr.Reader(languages)
|
||||||
|
texts = reader.readtext(image_path)
|
||||||
|
extracted = []
|
||||||
|
for bbox, text, score in texts:
|
||||||
|
if score > threshold:
|
||||||
|
extracted.append((bbox, text))
|
||||||
|
return extracted
|
||||||
|
|
||||||
|
# TODO OCRS Class
|
|
@ -9,8 +9,6 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
from lib.exceptions import AILObjectUnknown
|
from lib.exceptions import AILObjectUnknown
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
from lib.ail_core import get_all_objects, get_object_all_subtypes, get_objects_with_subtypes, get_default_correlation_objects
|
from lib.ail_core import get_all_objects, get_object_all_subtypes, get_objects_with_subtypes, get_default_correlation_objects
|
||||||
from lib import correlations_engine
|
from lib import correlations_engine
|
||||||
|
@ -35,6 +33,7 @@ from lib.objects import HHHashs
|
||||||
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
|
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
|
||||||
from lib.objects import Images
|
from lib.objects import Images
|
||||||
from lib.objects import Messages
|
from lib.objects import Messages
|
||||||
|
from lib.objects import Ocrs
|
||||||
from lib.objects import Pgps
|
from lib.objects import Pgps
|
||||||
from lib.objects.Screenshots import Screenshot
|
from lib.objects.Screenshots import Screenshot
|
||||||
from lib.objects import Titles
|
from lib.objects import Titles
|
||||||
|
@ -93,6 +92,8 @@ def get_object(obj_type, subtype, obj_id):
|
||||||
return Images.Image(obj_id)
|
return Images.Image(obj_id)
|
||||||
elif obj_type == 'message':
|
elif obj_type == 'message':
|
||||||
return Messages.Message(obj_id)
|
return Messages.Message(obj_id)
|
||||||
|
elif obj_type == 'ocr':
|
||||||
|
return Ocrs.Ocr(obj_id)
|
||||||
elif obj_type == 'screenshot':
|
elif obj_type == 'screenshot':
|
||||||
return Screenshot(obj_id)
|
return Screenshot(obj_id)
|
||||||
elif obj_type == 'title':
|
elif obj_type == 'title':
|
||||||
|
@ -254,7 +255,7 @@ def get_objects_meta(objs, options=set(), flask_context=False):
|
||||||
|
|
||||||
def get_object_card_meta(obj_type, subtype, id, related_btc=False):
|
def get_object_card_meta(obj_type, subtype, id, related_btc=False):
|
||||||
obj = get_object(obj_type, subtype, id)
|
obj = get_object(obj_type, subtype, id)
|
||||||
meta = obj.get_meta(options={'chat', 'chats', 'created_at', 'icon', 'info', 'nb_messages', 'nb_participants', 'threads', 'username'})
|
meta = obj.get_meta(options={'chat', 'chats', 'created_at', 'icon', 'info', 'map', 'nb_messages', 'nb_participants', 'threads', 'username'})
|
||||||
# meta['icon'] = obj.get_svg_icon()
|
# meta['icon'] = obj.get_svg_icon()
|
||||||
meta['svg_icon'] = obj.get_svg_icon()
|
meta['svg_icon'] = obj.get_svg_icon()
|
||||||
if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash':
|
if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash':
|
||||||
|
|
|
@ -89,7 +89,7 @@ class Categ(AbstractModule):
|
||||||
# Search for pattern categories in obj content
|
# Search for pattern categories in obj content
|
||||||
for categ, pattern in self.categ_words:
|
for categ, pattern in self.categ_words:
|
||||||
|
|
||||||
if obj.type == 'message':
|
if obj.type == 'message' or obj.type == 'ocr':
|
||||||
self.add_message_to_queue(message='0', queue=categ)
|
self.add_message_to_queue(message='0', queue=categ)
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
57
bin/modules/OcrExtractor.py
Executable file
57
bin/modules/OcrExtractor.py
Executable file
|
@ -0,0 +1,57 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
"""
|
||||||
|
The OcrExtractor Module
|
||||||
|
======================
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Import External packages
|
||||||
|
##################################
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from modules.abstract_module import AbstractModule
|
||||||
|
from lib.objects import Ocrs
|
||||||
|
|
||||||
|
|
||||||
|
class OcrExtractor(AbstractModule):
|
||||||
|
"""
|
||||||
|
OcrExtractor for AIL framework
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(OcrExtractor, self).__init__()
|
||||||
|
|
||||||
|
# Waiting time in seconds between to message processed
|
||||||
|
self.pending_seconds = 1
|
||||||
|
|
||||||
|
# Send module state to logs
|
||||||
|
self.logger.info(f'Module {self.module_name} initialized')
|
||||||
|
|
||||||
|
def compute(self, message):
|
||||||
|
image = self.get_obj()
|
||||||
|
print(image)
|
||||||
|
path = image.get_filepath()
|
||||||
|
languages = ['en', 'ru']
|
||||||
|
|
||||||
|
ocr = Ocrs.Ocr(image.id)
|
||||||
|
if not ocr.exists():
|
||||||
|
# TODO Get Language to extract -> add en by default
|
||||||
|
|
||||||
|
texts = Ocrs.extract_text(path, languages)
|
||||||
|
print(texts)
|
||||||
|
if texts:
|
||||||
|
ocr = Ocrs.create(image.id, texts)
|
||||||
|
self.add_message_to_queue(ocr)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
module = OcrExtractor()
|
||||||
|
module.run()
|
|
@ -162,6 +162,9 @@ publish = Tags
|
||||||
subscribe = Image
|
subscribe = Image
|
||||||
publish = Tags
|
publish = Tags
|
||||||
|
|
||||||
|
[OcrExtractor]
|
||||||
|
subscribe = Image
|
||||||
|
publish = Item
|
||||||
|
|
||||||
######## CORE ########
|
######## CORE ########
|
||||||
|
|
||||||
|
|
|
@ -82,6 +82,9 @@ bcrypt>3.1.6
|
||||||
# Ail typo squatting
|
# Ail typo squatting
|
||||||
ail_typo_squatting
|
ail_typo_squatting
|
||||||
|
|
||||||
|
# OCR
|
||||||
|
easyocr
|
||||||
|
|
||||||
# Tests
|
# Tests
|
||||||
nose2>=0.12.0
|
nose2>=0.12.0
|
||||||
coverage>=5.5
|
coverage>=5.5
|
||||||
|
|
|
@ -52,6 +52,7 @@ from blueprints.objects_etag import objects_etag
|
||||||
from blueprints.objects_hhhash import objects_hhhash
|
from blueprints.objects_hhhash import objects_hhhash
|
||||||
from blueprints.chats_explorer import chats_explorer
|
from blueprints.chats_explorer import chats_explorer
|
||||||
from blueprints.objects_image import objects_image
|
from blueprints.objects_image import objects_image
|
||||||
|
from blueprints.objects_ocr import objects_ocr
|
||||||
from blueprints.objects_favicon import objects_favicon
|
from blueprints.objects_favicon import objects_favicon
|
||||||
from blueprints.api_rest import api_rest
|
from blueprints.api_rest import api_rest
|
||||||
|
|
||||||
|
@ -114,6 +115,7 @@ app.register_blueprint(objects_etag, url_prefix=baseUrl)
|
||||||
app.register_blueprint(objects_hhhash, url_prefix=baseUrl)
|
app.register_blueprint(objects_hhhash, url_prefix=baseUrl)
|
||||||
app.register_blueprint(chats_explorer, url_prefix=baseUrl)
|
app.register_blueprint(chats_explorer, url_prefix=baseUrl)
|
||||||
app.register_blueprint(objects_image, url_prefix=baseUrl)
|
app.register_blueprint(objects_image, url_prefix=baseUrl)
|
||||||
|
app.register_blueprint(objects_ocr, url_prefix=baseUrl)
|
||||||
app.register_blueprint(objects_favicon, url_prefix=baseUrl)
|
app.register_blueprint(objects_favicon, url_prefix=baseUrl)
|
||||||
app.register_blueprint(api_rest, url_prefix=baseUrl)
|
app.register_blueprint(api_rest, url_prefix=baseUrl)
|
||||||
|
|
||||||
|
|
|
@ -139,6 +139,9 @@ def show_correlation():
|
||||||
correl_option = request.form.get('imageCheck')
|
correl_option = request.form.get('imageCheck')
|
||||||
if correl_option:
|
if correl_option:
|
||||||
filter_types.append('image')
|
filter_types.append('image')
|
||||||
|
correl_option = request.form.get('ocrCheck')
|
||||||
|
if correl_option:
|
||||||
|
filter_types.append('ocr')
|
||||||
correl_option = request.form.get('user_accountCheck')
|
correl_option = request.form.get('user_accountCheck')
|
||||||
if correl_option:
|
if correl_option:
|
||||||
filter_types.append('user-account')
|
filter_types.append('user-account')
|
||||||
|
|
47
var/www/blueprints/objects_ocr.py
Normal file
47
var/www/blueprints/objects_ocr.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
'''
|
||||||
|
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file, send_from_directory
|
||||||
|
from flask_login import login_required, current_user
|
||||||
|
|
||||||
|
# Import Role_Manager
|
||||||
|
from Role_Manager import login_admin, login_analyst, login_read_only, no_cache
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib.objects import Ocrs
|
||||||
|
|
||||||
|
# ============ BLUEPRINT ============
|
||||||
|
objects_ocr = Blueprint('objects_ocr', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/ocr'))
|
||||||
|
|
||||||
|
# ============ VARIABLES ============
|
||||||
|
bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
|
||||||
|
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
# ============ FUNCTIONS ============
|
||||||
|
@objects_ocr.route('/ocr/<path:filename>')
|
||||||
|
@login_required
|
||||||
|
@login_read_only
|
||||||
|
@no_cache
|
||||||
|
def ocr_image(filename):
|
||||||
|
if not filename:
|
||||||
|
abort(404)
|
||||||
|
if not 64 <= len(filename) <= 70:
|
||||||
|
abort(404)
|
||||||
|
filename = filename.replace('/', '')
|
||||||
|
ocr = Ocrs.Ocr(filename)
|
||||||
|
return send_file(BytesIO(ocr.draw_bounding_boxs()), mimetype='image/png')
|
||||||
|
|
||||||
|
|
||||||
|
# ============= ROUTES ==============
|
||||||
|
|
|
@ -130,6 +130,8 @@
|
||||||
{% include 'correlation/metadata_card_hhhash.html' %}
|
{% include 'correlation/metadata_card_hhhash.html' %}
|
||||||
{% elif dict_object["object_type"] == "image" %}
|
{% elif dict_object["object_type"] == "image" %}
|
||||||
{% include 'chats_explorer/card_image.html' %}
|
{% include 'chats_explorer/card_image.html' %}
|
||||||
|
{% elif dict_object["object_type"] == "ocr" %}
|
||||||
|
{% include 'objects/ocr/card_ocr.html' %}
|
||||||
{% elif dict_object["object_type"] == "item" %}
|
{% elif dict_object["object_type"] == "item" %}
|
||||||
{% include 'correlation/metadata_card_item.html' %}
|
{% include 'correlation/metadata_card_item.html' %}
|
||||||
{% elif dict_object["object_type"] == "favicon" %}
|
{% elif dict_object["object_type"] == "favicon" %}
|
||||||
|
@ -309,6 +311,10 @@
|
||||||
<input class="form-check-input" type="checkbox" value="True" id="imageCheck" name="imageCheck" {%if "image" in dict_object["filter"]%}checked{%endif%}>
|
<input class="form-check-input" type="checkbox" value="True" id="imageCheck" name="imageCheck" {%if "image" in dict_object["filter"]%}checked{%endif%}>
|
||||||
<label class="form-check-label" for="imageCheck">Image</label>
|
<label class="form-check-label" for="imageCheck">Image</label>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="form-check">
|
||||||
|
<input class="form-check-input" type="checkbox" value="True" id="ocrCheck" name="ocrCheck" {%if "ocr" in dict_object["filter"]%}checked{%endif%}>
|
||||||
|
<label class="form-check-label" for="ocrCheck">OCR</label>
|
||||||
|
</div>
|
||||||
|
|
||||||
<hr>
|
<hr>
|
||||||
<div class="form-check">
|
<div class="form-check">
|
||||||
|
|
103
var/www/templates/objects/ocr/card_ocr.html
Normal file
103
var/www/templates/objects/ocr/card_ocr.html
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
<link href="{{ url_for('static', filename='css/tags.css') }}" rel="stylesheet" type="text/css" />
|
||||||
|
<script src="{{ url_for('static', filename='js/tags.js') }}"></script>
|
||||||
|
|
||||||
|
{% with modal_add_tags=ail_tags %}
|
||||||
|
{% include 'modals/add_tags.html' %}
|
||||||
|
{% endwith %}
|
||||||
|
|
||||||
|
{% include 'modals/edit_tag.html' %}
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.object_image {
|
||||||
|
filter: blur(5px);
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|
||||||
|
<div class="card my-1">
|
||||||
|
<div class="card-header">
|
||||||
|
<h4 class="text-secondary">{{ meta["id"] }} :</h4>
|
||||||
|
<ul class="list-group mb-2">
|
||||||
|
<li class="list-group-item py-0">
|
||||||
|
<table class="table">
|
||||||
|
<thead class="">
|
||||||
|
<tr>
|
||||||
|
<th></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody style="font-size: 15px;">
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<svg height="26" width="26">
|
||||||
|
<g class="nodes">
|
||||||
|
<circle cx="13" cy="13" r="13" fill="orange"></circle>
|
||||||
|
<text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="{{ meta["svg_icon"]["style"] }}" font-size="16px">{{ meta["svg_icon"]["icon"] }}</text>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
{{ meta['type'] }}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</li>
|
||||||
|
<li class="list-group-item py-0">
|
||||||
|
<div id="accordion_image" class="my-3">
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-header py-1" id="headingImage">
|
||||||
|
<button class="btn w-100 collapsed rotate" data-toggle="collapse" data-target="#collapseImage" aria-expanded="false" aria-controls="collapseImage">
|
||||||
|
<span class="row text-left">
|
||||||
|
<div class="col-11">
|
||||||
|
<span class="mt-2">
|
||||||
|
<i class="far fa-image"></i> Show Image
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div class="col-1 text-primary">
|
||||||
|
<i class="fas fa-chevron-circle-down"></i>
|
||||||
|
</div>
|
||||||
|
</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div id="collapseImage" class="collapse show" aria-labelledby="headingImage" data-parent="#accordion_image">
|
||||||
|
<div class="card-body text-center">
|
||||||
|
{% include 'objects/image/block_blur_img_slider.html' %}
|
||||||
|
|
||||||
|
<img class="object_image mb-1" usemap="#image-map" src="{{ url_for('objects_ocr.ocr_image', filename=meta['id'])}}">
|
||||||
|
<map name="image-map">
|
||||||
|
{% for c in meta['map'] %}
|
||||||
|
<area shape="poly" coords="{{ c[0] }}" title="{{ c[1] }}">
|
||||||
|
{% endfor %}
|
||||||
|
</map>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</li>
|
||||||
|
<li class="list-group-item py-0">
|
||||||
|
<pre class="my-0" style="white-space: pre-wrap;">{{ meta['content'] }}</pre>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="list-group-item py-0">
|
||||||
|
<div class="my-2">
|
||||||
|
Tags:
|
||||||
|
{% for tag in meta['tags'] %}
|
||||||
|
<button class="btn btn-{{ bootstrap_label[loop.index0 % 5] }}"
|
||||||
|
data-toggle="modal" data-target="#edit_tags_modal"
|
||||||
|
data-tagid="{{ tag }}" data-objtype="{{ meta['type'] }}" data-objsubtype="" data-objid="{{ meta["id"] }}">
|
||||||
|
{{ tag }}
|
||||||
|
</button>
|
||||||
|
{% endfor %}
|
||||||
|
<button type="button" class="btn btn-light" data-toggle="modal" data-target="#add_tags_modal">
|
||||||
|
<i class="far fa-plus-square"></i>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
{% with obj_type='image', obj_id=meta['id'], obj_subtype='' %}
|
||||||
|
{% include 'modals/investigations_register_obj.html' %}
|
||||||
|
{% endwith %}
|
||||||
|
<button type="button" class="btn btn-primary" data-toggle="modal" data-target="#investigations_register_obj_modal">
|
||||||
|
<i class="fas fa-microscope"></i> Investigations
|
||||||
|
</button>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
Loading…
Reference in a new issue