From 31b519cc170b13c417ac55bf14a4b51d830d77aa Mon Sep 17 00:00:00 2001 From: terrtia Date: Fri, 26 Apr 2024 15:50:58 +0200 Subject: [PATCH] chg: [tags] search ocrs and images by tags + fix ocr, filter invalid image --- bin/modules/OcrExtractor.py | 7 +++++- var/www/blueprints/tags_ui.py | 18 +++++++++++++++ var/www/templates/tags/menu_sidebar.html | 12 ++++++++++ .../templates/tags/search_obj_by_tags.html | 22 ++++++++++++++++++- 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/bin/modules/OcrExtractor.py b/bin/modules/OcrExtractor.py index 34c72dbe..03b03724 100755 --- a/bin/modules/OcrExtractor.py +++ b/bin/modules/OcrExtractor.py @@ -101,7 +101,12 @@ class OcrExtractor(AbstractModule): languages = get_model_languages(image) languages = Ocrs.sanityze_ocr_languages(languages, ocr_languages=self.ocr_languages) print(image.id, languages) - texts = Ocrs.extract_text(path, languages) + try: + texts = Ocrs.extract_text(path, languages) + except ValueError as e: + self.logger.warning(e) + self.obj.add_tag('infoleak:confirmed="false-positive"') + texts = None if texts: print('create') ocr = Ocrs.create(image.id, texts) diff --git a/var/www/blueprints/tags_ui.py b/var/www/blueprints/tags_ui.py index 2bf7faef..cc4e0c25 100644 --- a/var/www/blueprints/tags_ui.py +++ b/var/www/blueprints/tags_ui.py @@ -293,6 +293,24 @@ def tags_search_messages(): dict_tagged['date'] = Date.sanitise_date_range('', '', separator='-') return render_template("tags/search_obj_by_tags.html", bootstrap_label=bootstrap_label, dict_tagged=dict_tagged) +@tags_ui.route('/tag/search/image') +@login_required +@login_read_only +def tags_search_images(): + object_type = 'image' + dict_tagged = {"object_type": object_type, "object_name": object_type.title() + "s"} + dict_tagged['date'] = Date.sanitise_date_range('', '', separator='-') + return render_template("tags/search_obj_by_tags.html", bootstrap_label=bootstrap_label, dict_tagged=dict_tagged) + +@tags_ui.route('/tag/search/ocr') +@login_required +@login_read_only +def tags_search_ocrs(): + object_type = 'ocr' + dict_tagged = {"object_type": object_type, "object_name": object_type.title() + "s"} + dict_tagged['date'] = Date.sanitise_date_range('', '', separator='-') + return render_template("tags/search_obj_by_tags.html", bootstrap_label=bootstrap_label, dict_tagged=dict_tagged) + @tags_ui.route('/tag/search/domain') @login_required @login_read_only diff --git a/var/www/templates/tags/menu_sidebar.html b/var/www/templates/tags/menu_sidebar.html index b4da0792..198cb53b 100644 --- a/var/www/templates/tags/menu_sidebar.html +++ b/var/www/templates/tags/menu_sidebar.html @@ -22,6 +22,18 @@ Search Messages by Tags + +