chg: [titles] add title IDs and contents search

This commit is contained in:
Terrtia 2023-06-13 15:29:11 +02:00
parent 94961f2eba
commit d4152462f5
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
7 changed files with 231 additions and 59 deletions

View file

@ -105,19 +105,14 @@ def create_favicon(content, url=None): # TODO URL ????
favicon.create(content)
# TODO ADD SEARCH FUNCTION
class Favicons(AbstractDaterangeObjects):
"""
Favicons Objects
"""
def __init__(self):
super().__init__('favicon')
super().__init__('favicon', Favicon)
def get_metas(self, obj_ids, options=set()):
return self._get_metas(Favicon, obj_ids, options=options)
def sanitize_name_to_search(self, name_to_search):
def sanitize_id_to_search(self, name_to_search):
return name_to_search # TODO

View file

@ -7,6 +7,8 @@ import sys
from hashlib import sha256
from flask import url_for
# import warnings
# warnings.filterwarnings("ignore", category=DeprecationWarning)
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
@ -100,21 +102,24 @@ class Titles(AbstractDaterangeObjects):
Titles Objects
"""
def __init__(self):
super().__init__('title')
super().__init__('title', Title)
def get_metas(self, obj_ids, options=set()):
return self._get_metas(Title, obj_ids, options=options)
def sanitize_name_to_search(self, name_to_search):
def sanitize_id_to_search(self, name_to_search):
return name_to_search
# if __name__ == '__main__':
# from lib import crawlers
# from lib.objects import Items
# for item in Items.get_all_items_objects(filters={'sources': ['crawled']}):
# title_content = crawlers.extract_title_from_html(item.get_content())
# if title_content:
# print(item.id, title_content)
# title = create_title(title_content)
# title.add(item.get_date(), item.id)
# # from lib import crawlers
# # from lib.objects import Items
# # for item in Items.get_all_items_objects(filters={'sources': ['crawled']}):
# # title_content = crawlers.extract_title_from_html(item.get_content())
# # if title_content:
# # print(item.id, title_content)
# # title = create_title(title_content)
# # title.add(item.get_date(), item.id)
# titles = Titles()
# # for r in titles.get_ids_iterator():
# # print(r)
# r = titles.search_by_id('f7d57B', r_pos=True, case_sensitive=False)
# print(r)

View file

@ -163,16 +163,21 @@ class AbstractDaterangeObjects(ABC):
Abstract Daterange Objects
"""
def __init__(self, obj_type):
def __init__(self, obj_type, obj_class):
""" Abstract for Daterange Objects
:param obj_type: object type (item, ...)
:param obj_class: object python class (Item, ...)
"""
self.type = obj_type
self.obj_class = obj_class
def get_all(self):
def get_ids(self):
return r_object.smembers(f'{self.type}:all')
# def get_ids_iterator(self):
# return r_object.sscan_iter(r_object, f'{self.type}:all')
def get_by_date(self, date):
return r_object.zrange(f'{self.type}:date:{date}', 0, -1)
@ -185,35 +190,61 @@ class AbstractDaterangeObjects(ABC):
obj_ids = obj_ids | set(self.get_by_date(date))
return obj_ids
@abstractmethod
def get_metas(self, obj_ids, options=set()):
pass
def _get_metas(self, obj_class_ref, obj_ids, options=set()):
dict_obj = {}
for obj_id in obj_ids:
obj = obj_class_ref(obj_id)
obj = self.obj_class(obj_id)
dict_obj[obj_id] = obj.get_meta(options=options)
return dict_obj
@abstractmethod
def sanitize_name_to_search(self, name_to_search):
return name_to_search
def sanitize_id_to_search(self, id_to_search):
return id_to_search
def search_by_name(self, name_to_search, r_pos=False):
def search_by_id(self, name_to_search, r_pos=False, case_sensitive=True):
objs = {}
if case_sensitive:
flags = 0
else:
flags = re.IGNORECASE
# for subtype in subtypes:
r_name = self.sanitize_name_to_search(name_to_search)
r_name = self.sanitize_id_to_search(name_to_search)
if not name_to_search or isinstance(r_name, dict):
return objs
r_name = re.compile(r_name)
for title_name in self.get_all():
res = re.search(r_name, title_name)
r_name = re.compile(r_name, flags=flags)
for obj_id in self.get_ids(): # TODO REPLACE ME WITH AN ITERATOR
res = re.search(r_name, obj_id)
if res:
objs[title_name] = {}
objs[obj_id] = {}
if r_pos:
objs[title_name]['hl-start'] = res.start()
objs[title_name]['hl-end'] = res.end()
objs[obj_id]['hl-start'] = res.start()
objs[obj_id]['hl-end'] = res.end()
return objs
def sanitize_content_to_search(self, content_to_search):
return content_to_search
def search_by_content(self, content_to_search, r_pos=False, case_sensitive=True):
objs = {}
if case_sensitive:
flags = 0
else:
flags = re.IGNORECASE
# for subtype in subtypes:
r_search = self.sanitize_content_to_search(content_to_search)
if not r_search or isinstance(r_search, dict):
return objs
r_search = re.compile(r_search, flags=flags)
for obj_id in self.get_ids(): # TODO REPLACE ME WITH AN ITERATOR
obj = self.obj_class(obj_id)
content = obj.get_content()
res = re.search(r_search, content)
if res:
objs[obj_id] = {}
if r_pos: # TODO ADD CONTENT ????
objs[obj_id]['hl-start'] = res.start()
objs[obj_id]['hl-end'] = res.end()
objs[obj_id]['content'] = content
return objs
def api_get_chart_nb_by_daterange(self, date_from, date_to):
@ -226,5 +257,4 @@ class AbstractDaterangeObjects(ABC):
def api_get_meta_by_daterange(self, date_from, date_to):
date = Date.sanitise_date_range(date_from, date_to)
return self.get_metas(self.get_by_daterange(date['date_from'], date['date_to']), options={'sparkline'})
return self.get_metas(self.get_by_daterange(date['date_from'], date['date_to']), options={'sparkline'})

View file

@ -5,6 +5,7 @@
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
'''
import json
import os
import sys
@ -27,8 +28,11 @@ objects_title = Blueprint('objects_title', __name__, template_folder=os.path.joi
# ============ VARIABLES ============
bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
# ============ FUNCTIONS ============
def create_json_response(data, status_code):
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
# ============= ROUTES ==============
@objects_title.route("/objects/title", methods=['GET'])
@login_required
@login_read_only
@ -72,15 +76,30 @@ def objects_title_range_json():
@login_required
@login_read_only
def objects_title_search():
to_search = request.form.get('object_id')
to_search = request.form.get('to_search')
type_to_search = request.form.get('search_type', 'id')
case_sensitive = request.form.get('case_sensitive')
case_sensitive = bool(case_sensitive)
titles = Titles.Titles()
# TODO SANITIZE ID
# TODO Search all
title = Titles.Title(to_search)
if not title.exists():
abort(404)
if type_to_search == 'id':
if len(type_to_search) == 64:
title = Titles.Title(to_search)
if not title.exists():
abort(404)
else:
return redirect(title.get_link(flask_context=True))
else:
search_result = titles.search_by_id(to_search, r_pos=True, case_sensitive=case_sensitive)
elif type_to_search == 'content':
search_result = titles.search_by_content(to_search, r_pos=True, case_sensitive=case_sensitive)
else:
return redirect(title.get_link(flask_context=True))
return create_json_response({'error': 'Unknown search type'}, 400)
# ============= ROUTES ==============
if search_result:
dict_objects = titles.get_metas(search_result.keys(), options={'sparkline'})
else:
dict_objects = {}
return render_template("search_title_result.html", dict_objects=dict_objects, search_result=search_result,
to_search=to_search, case_sensitive=case_sensitive, type_to_search=type_to_search)

View file

@ -75,17 +75,8 @@
<div class="col-xl-10">
<div class="mt-1" id="barchart_type"></div>
<div class="card border-secondary my-2">
<div class="card-body text-dark">
<h5 class="card-title">Search Title by name:</h5>
<form action="{{ url_for('objects_title.objects_title_search') }}" id="search_subtype_onj" method='post'>
<div class="input-group mb-1">
<input type="text" class="form-control col-8" name="object_id" value="" placeholder="Title ID" required>
<button class="btn btn-primary input-group-addon search-obj col-2"><i class="fas fa-search"></i></button>
</div>
</form>
</div>
</div>
{% include 'title/block_titles_search.html' %}
</div>

View file

@ -0,0 +1,19 @@
<div class="card border-secondary my-2">
<div class="card-body text-dark">
<h5 class="card-title">Titles Search:</h5>
<form action="{{ url_for('objects_title.objects_title_search') }}" id="search_subtype_onj" method='post'>
<div class="input-group mb-1">
<select class="custom-select col-2" name="search_type" value="{% if type_to_search %}{{ type_to_search }}{% else %}content{% endif %}" required>
<option value="content">Content Search</option>
<option value="id" {% if type_to_search %}{% if type_to_search == "id" %}selected{% endif %}{% endif %}>ID Search</option>
</select>
<input type="text" class="form-control col-8" name="to_search" value="{% if to_search %}{{ to_search }}{% endif %}" placeholder="ID or content to Search" required>
<button class="btn btn-primary input-group-addon search-obj col-2"><i class="fas fa-search"></i></button>
</div>
<div class="custom-control custom-switch mt-1">
<input class="custom-control-input" type="checkbox" name="case_sensitive" id="case_sensitive" {% if type_to_search %}{% if case_sensitive %}value="True" checked{% else %}value="False"{% endif %}{% else %}value="True" checked{% endif %}>
<label class="custom-control-label" for="case_sensitive">Case Sensitive</label>
</div>
</form>
</div>
</div>

View file

@ -0,0 +1,113 @@
<!DOCTYPE html>
<html>
<head>
<title>Titles - AIL</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/ail-project.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/d3/sparklines.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'sidebars/sidebar_objects.html' %}
<div class="col-12 col-lg-10" id="core_content">
{% include 'title/block_titles_search.html' %}
<table id="table_objects" class="table table-striped table-bordered">
<thead class="bg-dark text-white">
<tr>
<th></th>
<th>First Seen</th>
<th>Last Seen</th>
<th>Total</th>
<th>Last days</th>
</tr>
</thead>
<tbody style="font-size: 15px;">
{% for obj_id in dict_objects %}
<tr>
<td>
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=title&id={{ obj_id }}">
{% if type_to_search == 'content' %}
<span>{{ dict_objects[obj_id]['content'][:search_result[obj_id]['hl-start']] }}</span><span class="hg-text">{{dict_objects[obj_id]['content'][search_result[obj_id]['hl-start']:search_result[obj_id]['hl-end']]}}</span>{{ dict_objects[obj_id]['content'][search_result[obj_id]['hl-end']:] }}
{% else %}
<span>{{ dict_objects[obj_id]['content'] }}</span>
{% endif %}
</a>
</td>
<td>{{ dict_objects[obj_id]['first_seen'] }}</td>
<td>{{ dict_objects[obj_id]['last_seen'] }}</td>
<td>{{ dict_objects[obj_id]['nb_seen'] }}</td>
<td id="sparklines_{{ obj_id }}" style="text-align:center;"></td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
<script>
var chart = {};
$(document).ready(function(){
$("#page-Decoded").addClass("active");
$("#nav_title").addClass("active");
$('#table_objects').DataTable({
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
"iDisplayLength": 10,
"order": [[ 3, "desc" ]]
});
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
<script>
{% for obj_id in dict_objects %}
sparkline("sparklines_{{ obj_id }}", {{ dict_objects[obj_id]['sparkline'] }}, {});
{% endfor %}
</script>
</body>
</html>