From 3f78457dffdb7156a1194c1d4e126227f0bfbdfe Mon Sep 17 00:00:00 2001 From: terrtia Date: Wed, 16 Oct 2024 10:56:35 +0200 Subject: [PATCH] chg: [tools] add reprocess title + CEDetector --- bin/lib/objects/ail_objects.py | 2 ++ bin/modules/CEDetector.py | 27 +++++++++++++++++++-------- tools/reprocess_objects.py | 4 +++- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py index 82eb2bb6..d590b0c4 100755 --- a/bin/lib/objects/ail_objects.py +++ b/bin/lib/objects/ail_objects.py @@ -341,6 +341,8 @@ def obj_iterator(obj_type, filters): return Pgps.get_all_pgps_objects(filters=filters) elif obj_type == 'message': return chats_viewer.get_messages_iterator(filters=filters) + elif obj_type == 'title': + return Titles.Titles().get_iterator() def card_objs_iterators(filters): diff --git a/bin/modules/CEDetector.py b/bin/modules/CEDetector.py index 06e58e68..68f0adf4 100755 --- a/bin/modules/CEDetector.py +++ b/bin/modules/CEDetector.py @@ -111,17 +111,28 @@ def test_detection(): if not is_detected: not_detected.add(domain) - print() - print() - print() - print() for domain in not_detected: dom = Domain(domain) - print('-----------', domain) + # print('-----------', domain) for h in dom.get_correlation('title').get('title', []): - print(Title(h[1:]).get_content().lower()) - print() - print() + c = Title(h[1:]).get_content().lower() + if c == '404 not found': + lt = [] + dom = Domain(domain) + print('-----------', domain) + for hi in dom.get_correlation('title').get('title', []): + print(Title(hi[1:]).get_content().lower()) + ci = Title(hi[1:]).get_content().lower() + if ci != '404 not found' and ci not in []: + lt.append(ci) + if lt: + print('-----------', domain) + for ti in lt: + print(ti) + print() + print() + + # Tag.delete_object_tag(tag, 'domain', domain) if __name__ == "__main__": diff --git a/tools/reprocess_objects.py b/tools/reprocess_objects.py index f6c33236..a2505a19 100755 --- a/tools/reprocess_objects.py +++ b/tools/reprocess_objects.py @@ -29,11 +29,13 @@ from lib.objects import ail_objects # from modules.Onion import Onion # from modules.Telegram import Telegram +from modules.CEDetector import CEDetector from modules.Languages import Languages from modules.OcrExtractor import OcrExtractor from modules.QrCodeReader import QrCodeReader MODULES = { + 'CEDetector': CEDetector, 'Languages': Languages, 'OcrExtractor': OcrExtractor, 'QrCodeReader': QrCodeReader @@ -70,7 +72,7 @@ if __name__ == "__main__": obj_type = args.type if not is_object_type(obj_type): raise Exception(f'Invalid Object Type: {obj_type}') - if obj_type not in ['image', 'item', 'message', 'screenshot']: + if obj_type not in ['image', 'item', 'message', 'screenshot', 'title']: raise Exception(f'Currently not supported Object Type: {obj_type}') modulename = args.module