From d4152462f5ac97e87435db09f811ae90cb70ce29 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 13 Jun 2023 15:29:11 +0200 Subject: [PATCH] chg: [titles] add title IDs and contents search --- bin/lib/objects/Favicons.py | 9 +- bin/lib/objects/Titles.py | 31 +++-- bin/lib/objects/abstract_daterange_object.py | 68 ++++++++--- var/www/blueprints/objects_title.py | 37 ++++-- .../objects/title/TitleDaterange.html | 13 +- .../objects/title/block_titles_search.html | 19 +++ .../objects/title/search_title_result.html | 113 ++++++++++++++++++ 7 files changed, 231 insertions(+), 59 deletions(-) create mode 100644 var/www/templates/objects/title/block_titles_search.html create mode 100644 var/www/templates/objects/title/search_title_result.html diff --git a/bin/lib/objects/Favicons.py b/bin/lib/objects/Favicons.py index 8080effd..469e84ff 100755 --- a/bin/lib/objects/Favicons.py +++ b/bin/lib/objects/Favicons.py @@ -105,19 +105,14 @@ def create_favicon(content, url=None): # TODO URL ???? favicon.create(content) -# TODO ADD SEARCH FUNCTION - class Favicons(AbstractDaterangeObjects): """ Favicons Objects """ def __init__(self): - super().__init__('favicon') + super().__init__('favicon', Favicon) - def get_metas(self, obj_ids, options=set()): - return self._get_metas(Favicon, obj_ids, options=options) - - def sanitize_name_to_search(self, name_to_search): + def sanitize_id_to_search(self, name_to_search): return name_to_search # TODO diff --git a/bin/lib/objects/Titles.py b/bin/lib/objects/Titles.py index 59db2abe..f633a0cf 100755 --- a/bin/lib/objects/Titles.py +++ b/bin/lib/objects/Titles.py @@ -7,6 +7,8 @@ import sys from hashlib import sha256 from flask import url_for +# import warnings +# warnings.filterwarnings("ignore", category=DeprecationWarning) from pymisp import MISPObject sys.path.append(os.environ['AIL_BIN']) @@ -100,21 +102,24 @@ class Titles(AbstractDaterangeObjects): Titles Objects """ def __init__(self): - super().__init__('title') + super().__init__('title', Title) - def get_metas(self, obj_ids, options=set()): - return self._get_metas(Title, obj_ids, options=options) - - def sanitize_name_to_search(self, name_to_search): + def sanitize_id_to_search(self, name_to_search): return name_to_search # if __name__ == '__main__': -# from lib import crawlers -# from lib.objects import Items -# for item in Items.get_all_items_objects(filters={'sources': ['crawled']}): -# title_content = crawlers.extract_title_from_html(item.get_content()) -# if title_content: -# print(item.id, title_content) -# title = create_title(title_content) -# title.add(item.get_date(), item.id) +# # from lib import crawlers +# # from lib.objects import Items +# # for item in Items.get_all_items_objects(filters={'sources': ['crawled']}): +# # title_content = crawlers.extract_title_from_html(item.get_content()) +# # if title_content: +# # print(item.id, title_content) +# # title = create_title(title_content) +# # title.add(item.get_date(), item.id) +# titles = Titles() +# # for r in titles.get_ids_iterator(): +# # print(r) +# r = titles.search_by_id('f7d57B', r_pos=True, case_sensitive=False) +# print(r) + diff --git a/bin/lib/objects/abstract_daterange_object.py b/bin/lib/objects/abstract_daterange_object.py index 59a579f4..674c6219 100755 --- a/bin/lib/objects/abstract_daterange_object.py +++ b/bin/lib/objects/abstract_daterange_object.py @@ -163,16 +163,21 @@ class AbstractDaterangeObjects(ABC): Abstract Daterange Objects """ - def __init__(self, obj_type): + def __init__(self, obj_type, obj_class): """ Abstract for Daterange Objects :param obj_type: object type (item, ...) + :param obj_class: object python class (Item, ...) """ self.type = obj_type + self.obj_class = obj_class - def get_all(self): + def get_ids(self): return r_object.smembers(f'{self.type}:all') + # def get_ids_iterator(self): + # return r_object.sscan_iter(r_object, f'{self.type}:all') + def get_by_date(self, date): return r_object.zrange(f'{self.type}:date:{date}', 0, -1) @@ -185,35 +190,61 @@ class AbstractDaterangeObjects(ABC): obj_ids = obj_ids | set(self.get_by_date(date)) return obj_ids - @abstractmethod def get_metas(self, obj_ids, options=set()): - pass - - def _get_metas(self, obj_class_ref, obj_ids, options=set()): dict_obj = {} for obj_id in obj_ids: - obj = obj_class_ref(obj_id) + obj = self.obj_class(obj_id) dict_obj[obj_id] = obj.get_meta(options=options) return dict_obj @abstractmethod - def sanitize_name_to_search(self, name_to_search): - return name_to_search + def sanitize_id_to_search(self, id_to_search): + return id_to_search - def search_by_name(self, name_to_search, r_pos=False): + def search_by_id(self, name_to_search, r_pos=False, case_sensitive=True): objs = {} + if case_sensitive: + flags = 0 + else: + flags = re.IGNORECASE # for subtype in subtypes: - r_name = self.sanitize_name_to_search(name_to_search) + r_name = self.sanitize_id_to_search(name_to_search) if not name_to_search or isinstance(r_name, dict): return objs - r_name = re.compile(r_name) - for title_name in self.get_all(): - res = re.search(r_name, title_name) + r_name = re.compile(r_name, flags=flags) + for obj_id in self.get_ids(): # TODO REPLACE ME WITH AN ITERATOR + res = re.search(r_name, obj_id) if res: - objs[title_name] = {} + objs[obj_id] = {} if r_pos: - objs[title_name]['hl-start'] = res.start() - objs[title_name]['hl-end'] = res.end() + objs[obj_id]['hl-start'] = res.start() + objs[obj_id]['hl-end'] = res.end() + return objs + + def sanitize_content_to_search(self, content_to_search): + return content_to_search + + def search_by_content(self, content_to_search, r_pos=False, case_sensitive=True): + objs = {} + if case_sensitive: + flags = 0 + else: + flags = re.IGNORECASE + # for subtype in subtypes: + r_search = self.sanitize_content_to_search(content_to_search) + if not r_search or isinstance(r_search, dict): + return objs + r_search = re.compile(r_search, flags=flags) + for obj_id in self.get_ids(): # TODO REPLACE ME WITH AN ITERATOR + obj = self.obj_class(obj_id) + content = obj.get_content() + res = re.search(r_search, content) + if res: + objs[obj_id] = {} + if r_pos: # TODO ADD CONTENT ???? + objs[obj_id]['hl-start'] = res.start() + objs[obj_id]['hl-end'] = res.end() + objs[obj_id]['content'] = content return objs def api_get_chart_nb_by_daterange(self, date_from, date_to): @@ -226,5 +257,4 @@ class AbstractDaterangeObjects(ABC): def api_get_meta_by_daterange(self, date_from, date_to): date = Date.sanitise_date_range(date_from, date_to) - return self.get_metas(self.get_by_daterange(date['date_from'], date['date_to']), options={'sparkline'}) - + return self.get_metas(self.get_by_daterange(date['date_from'], date['date_to']), options={'sparkline'}) \ No newline at end of file diff --git a/var/www/blueprints/objects_title.py b/var/www/blueprints/objects_title.py index eef7f69c..c20f8626 100644 --- a/var/www/blueprints/objects_title.py +++ b/var/www/blueprints/objects_title.py @@ -5,6 +5,7 @@ Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... ''' +import json import os import sys @@ -27,8 +28,11 @@ objects_title = Blueprint('objects_title', __name__, template_folder=os.path.joi # ============ VARIABLES ============ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] - # ============ FUNCTIONS ============ +def create_json_response(data, status_code): + return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code + +# ============= ROUTES ============== @objects_title.route("/objects/title", methods=['GET']) @login_required @login_read_only @@ -72,15 +76,30 @@ def objects_title_range_json(): @login_required @login_read_only def objects_title_search(): - to_search = request.form.get('object_id') + to_search = request.form.get('to_search') + type_to_search = request.form.get('search_type', 'id') + case_sensitive = request.form.get('case_sensitive') + case_sensitive = bool(case_sensitive) + titles = Titles.Titles() - # TODO SANITIZE ID - # TODO Search all - title = Titles.Title(to_search) - if not title.exists(): - abort(404) + if type_to_search == 'id': + if len(type_to_search) == 64: + title = Titles.Title(to_search) + if not title.exists(): + abort(404) + else: + return redirect(title.get_link(flask_context=True)) + else: + search_result = titles.search_by_id(to_search, r_pos=True, case_sensitive=case_sensitive) + elif type_to_search == 'content': + search_result = titles.search_by_content(to_search, r_pos=True, case_sensitive=case_sensitive) else: - return redirect(title.get_link(flask_context=True)) + return create_json_response({'error': 'Unknown search type'}, 400) -# ============= ROUTES ============== + if search_result: + dict_objects = titles.get_metas(search_result.keys(), options={'sparkline'}) + else: + dict_objects = {} + return render_template("search_title_result.html", dict_objects=dict_objects, search_result=search_result, + to_search=to_search, case_sensitive=case_sensitive, type_to_search=type_to_search) diff --git a/var/www/templates/objects/title/TitleDaterange.html b/var/www/templates/objects/title/TitleDaterange.html index 17efda7c..f1c63420 100644 --- a/var/www/templates/objects/title/TitleDaterange.html +++ b/var/www/templates/objects/title/TitleDaterange.html @@ -75,17 +75,8 @@
-
-
-
Search Title by name:
-
-
- - -
-
-
-
+ {% include 'title/block_titles_search.html' %} +
diff --git a/var/www/templates/objects/title/block_titles_search.html b/var/www/templates/objects/title/block_titles_search.html new file mode 100644 index 00000000..4f49c287 --- /dev/null +++ b/var/www/templates/objects/title/block_titles_search.html @@ -0,0 +1,19 @@ +
+
+
Titles Search:
+
+
+ + + +
+
+ + +
+
+
+
\ No newline at end of file diff --git a/var/www/templates/objects/title/search_title_result.html b/var/www/templates/objects/title/search_title_result.html new file mode 100644 index 00000000..4885b449 --- /dev/null +++ b/var/www/templates/objects/title/search_title_result.html @@ -0,0 +1,113 @@ + + + + + Titles - AIL + + + + + + + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'sidebars/sidebar_objects.html' %} + +
+ + {% include 'title/block_titles_search.html' %} + + + + + + + + + + + + + + + {% for obj_id in dict_objects %} + + + + + + + + {% endfor %} + +
First SeenLast SeenTotalLast days
+ + {% if type_to_search == 'content' %} + {{ dict_objects[obj_id]['content'][:search_result[obj_id]['hl-start']] }}{{dict_objects[obj_id]['content'][search_result[obj_id]['hl-start']:search_result[obj_id]['hl-end']]}}{{ dict_objects[obj_id]['content'][search_result[obj_id]['hl-end']:] }} + {% else %} + {{ dict_objects[obj_id]['content'] }} + {% endif %} + + {{ dict_objects[obj_id]['first_seen'] }}{{ dict_objects[obj_id]['last_seen'] }}{{ dict_objects[obj_id]['nb_seen'] }}
+ +
+
+
+ + + + + + + +