diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py
index 61b94a71..b1eda5a1 100755
--- a/bin/crawlers/Crawler.py
+++ b/bin/crawlers/Crawler.py
@@ -21,6 +21,7 @@ from lib.Tag import get_domain_vanity_tags
from lib.objects import CookiesNames
from lib.objects import Etags
from lib.objects.Domains import Domain
+from lib.objects import DomHashs
from lib.objects import Favicons
from lib.objects.Items import Item
from lib.objects import Screenshots
@@ -348,6 +349,11 @@ class Crawler(AbstractModule):
self.root_item = item_id
parent_id = item_id
+ # DOM-HASH
+ dom_hash = DomHashs.create(entries['html'])
+ dom_hash.add(self.date.replace('/', ''), item)
+ dom_hash.add_correlation('domain', '', self.domain.id)
+
title_content = crawlers.extract_title_from_html(entries['html'])
if title_content:
title = Titles.create_title(title_content)
diff --git a/bin/lib/ail_core.py b/bin/lib/ail_core.py
index 532a89f1..6e22f795 100755
--- a/bin/lib/ail_core.py
+++ b/bin/lib/ail_core.py
@@ -17,15 +17,15 @@ r_object = config_loader.get_db_conn("Kvrocks_Objects")
config_loader = None
AIL_OBJECTS = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cookie-name', 'cve', 'cryptocurrency', 'decoded',
- 'domain', 'etag', 'favicon', 'file-name', 'hhhash','item', 'image', 'message', 'ocr', 'pgp',
- 'qrcode', 'screenshot', 'title', 'user-account', 'username'})
+ 'domain', 'dom-hash', 'etag', 'favicon', 'file-name', 'hhhash','item', 'image', 'message', 'ocr',
+ 'pgp', 'qrcode', 'screenshot', 'title', 'user-account', 'username'})
AIL_OBJECTS_WITH_SUBTYPES = {'chat', 'chat-subchannel', 'cryptocurrency', 'pgp', 'username', 'user-account'}
# TODO by object TYPE ????
AIL_OBJECTS_CORRELATIONS_DEFAULT = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cve', 'cryptocurrency', 'decoded',
- 'domain', 'favicon', 'file-name', 'item', 'image', 'message', 'ocr', 'pgp',
- 'qrcode', 'screenshot', 'title', 'user-account', 'username'})
+ 'domain', 'dom-hash', 'favicon', 'file-name', 'item', 'image', 'message',
+ 'ocr', 'pgp', 'qrcode', 'screenshot', 'title', 'user-account', 'username'})
def get_ail_uuid():
ail_uuid = r_serv_db.get('ail:uuid')
diff --git a/bin/lib/ail_updates.py b/bin/lib/ail_updates.py
index 07fd791a..968c7004 100755
--- a/bin/lib/ail_updates.py
+++ b/bin/lib/ail_updates.py
@@ -46,6 +46,10 @@ BACKGROUND_UPDATES = {
'message': 'Compress HAR',
'scripts': ['compress_har.py']
},
+ 'v5.9': {
+ 'message': 'Compute Domain/Items Dom-Hash',
+ 'scripts': ['reprocess_dom_hash.py']
+ }
}
class AILBackgroundUpdate:
diff --git a/bin/lib/correlations_engine.py b/bin/lib/correlations_engine.py
index 662ed3a0..9cf8b5d2 100755
--- a/bin/lib/correlations_engine.py
+++ b/bin/lib/correlations_engine.py
@@ -48,13 +48,14 @@ CORRELATION_TYPES_BY_OBJ = {
"cryptocurrency": ["domain", "item", "message", "ocr", "qrcode"],
"cve": ["domain", "item", "message", "ocr", "qrcode"],
"decoded": ["domain", "item", "message", "ocr", "qrcode"],
- "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
+ "domain": ["cve", "cookie-name", "cryptocurrency", "dom-hash", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
+ "dom-hash": ["domain", "item"],
"etag": ["domain"],
"favicon": ["domain", "item"], # TODO Decoded
"file-name": ["chat", "message"],
"hhhash": ["domain"],
"image": ["chat", "chat-subchannel", "chat-thread", "message", "ocr", "qrcode", "user-account"], # TODO subchannel + threads ????
- "item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
+ "item": ["cve", "cryptocurrency", "decoded", "domain", "dom-hash", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "ocr", "pgp", "user-account"],
"ocr": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "image", "message", "pgp", "user-account"],
"pgp": ["domain", "item", "message", "ocr"],
diff --git a/bin/lib/objects/DomHashs.py b/bin/lib/objects/DomHashs.py
new file mode 100755
index 00000000..996a30dd
--- /dev/null
+++ b/bin/lib/objects/DomHashs.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import os
+import sys
+
+from bs4 import BeautifulSoup
+from hashlib import sha256
+from flask import url_for
+
+# import warnings
+# warnings.filterwarnings("ignore", category=DeprecationWarning)
+from pymisp import MISPObject
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.ConfigLoader import ConfigLoader
+from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
+
+config_loader = ConfigLoader()
+r_objects = config_loader.get_db_conn("Kvrocks_Objects")
+baseurl = config_loader.get_config_str("Notifications", "ail_domain")
+config_loader = None
+
+
+class DomHash(AbstractDaterangeObject):
+ """
+ AIL Title Object.
+ """
+
+ def __init__(self, id):
+ super(DomHash, self).__init__('dom-hash', id)
+
+ # def get_ail_2_ail_payload(self):
+ # payload = {'raw': self.get_gzip_content(b64=True),
+ # 'compress': 'gzip'}
+ # return payload
+
+ # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
+ def delete(self):
+ # # TODO:
+ pass
+
+ # def get_content(self, r_type='str'): # TODO Get random item -> compute hash
+ # if r_type == 'str':
+ # return self._get_field('content')
+ # elif r_type == 'bytes':
+ # return self._get_field('content').encode()
+
+ def get_link(self, flask_context=False):
+ if flask_context:
+ url = url_for('correlation.show_correlation', type=self.type, id=self.id)
+ else:
+ url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
+ return url
+
+ def get_svg_icon(self):
+ return {'style': 'fas', 'icon': '\uf714', 'color': 'grey', 'radius': 5}
+
+ def get_misp_object(self):
+ obj_attrs = []
+ obj = MISPObject('dom-hash')
+ first_seen = self.get_first_seen()
+ last_seen = self.get_last_seen()
+ if first_seen:
+ obj.first_seen = first_seen
+ if last_seen:
+ obj.last_seen = last_seen
+ if not first_seen or not last_seen:
+ self.logger.warning(
+ f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
+
+ obj_attrs.append(obj.add_attribute('dom-hash', value=self.get_id()))
+ # TODO ############################# URLS
+ for obj_attr in obj_attrs:
+ for tag in self.get_tags():
+ obj_attr.add_tag(tag)
+ return obj
+ return None
+
+ def get_nb_seen(self):
+ return self.get_nb_correlation('domain')
+
+ def get_meta(self, options=set()):
+ meta = self._get_meta(options=options)
+ meta['id'] = self.id
+ meta['tags'] = self.get_tags(r_list=True)
+ return meta
+
+ def create(self, _first_seen=None, _last_seen=None):
+ self._create()
+
+
+def _compute_dom_hash(html_content):
+ soup = BeautifulSoup(html_content, "lxml")
+ to_hash = "|".join(t.name for t in soup.findAll()).encode()
+ return sha256(to_hash).hexdigest()[:32]
+
+
+def create(content):
+ obj_id = _compute_dom_hash(content)
+ obj = DomHash(obj_id)
+ if not obj.exists():
+ obj.create()
+ return obj
+
+
+class DomHashs(AbstractDaterangeObjects):
+ """
+ Titles Objects
+ """
+ def __init__(self):
+ super().__init__('dom-hash', DomHash)
+
+ def sanitize_id_to_search(self, name_to_search):
+ return name_to_search
+
+
+# if __name__ == '__main__':
+# # from lib import crawlers
+# # from lib.objects import Items
+# # for item in Items.get_all_items_objects(filters={'sources': ['crawled']}):
+# # title_content = crawlers.extract_title_from_html(item.get_content())
+# # if title_content:
+# # print(item.id, title_content)
+# # title = create_title(title_content)
+# # title.add(item.get_date(), item.id)
+# titles = Titles()
+# # for r in titles.get_ids_iterator():
+# # print(r)
+# r = titles.search_by_id('f7d57B', r_pos=True, case_sensitive=False)
+# print(r)
diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py
index d590b0c4..9823e928 100755
--- a/bin/lib/objects/ail_objects.py
+++ b/bin/lib/objects/ail_objects.py
@@ -30,6 +30,7 @@ from lib.objects.Domains import Domain
from lib.objects import Etags
from lib.objects import Favicons
from lib.objects import FilesNames
+from lib.objects import DomHashs
from lib.objects import HHHashs
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
from lib.objects import Images
@@ -91,6 +92,8 @@ def get_object(obj_type, subtype, obj_id):
return Favicons.Favicon(obj_id)
elif obj_type == 'file-name':
return FilesNames.FileName(obj_id)
+ elif obj_type == 'dom-hash':
+ return DomHashs.DomHash(obj_id)
elif obj_type == 'hhhash':
return HHHashs.HHHash(obj_id)
elif obj_type == 'image':
diff --git a/update/v5.9/Update.py b/update/v5.9/Update.py
new file mode 100755
index 00000000..a60db2a1
--- /dev/null
+++ b/update/v5.9/Update.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import os
+import sys
+
+sys.path.append(os.environ['AIL_HOME'])
+##################################
+# Import Project packages
+##################################
+from update.bin.ail_updater import AIL_Updater
+from lib import ail_updates
+
+class Updater(AIL_Updater):
+ """default Updater."""
+
+ def __init__(self, version):
+ super(Updater, self).__init__(version)
+
+
+if __name__ == '__main__':
+ updater = Updater('v5.9')
+ updater.run_update()
+ ail_updates.add_background_update('v5.9')
diff --git a/update/v5.9/Update.sh b/update/v5.9/Update.sh
new file mode 100755
index 00000000..03353f35
--- /dev/null
+++ b/update/v5.9/Update.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
+[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
+[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
+[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
+
+export PATH=$AIL_HOME:$PATH
+export PATH=$AIL_REDIS:$PATH
+export PATH=$AIL_BIN:$PATH
+export PATH=$AIL_FLASK:$PATH
+
+GREEN="\\033[1;32m"
+DEFAULT="\\033[0;39m"
+
+echo -e $GREEN"Shutting down AIL ..."$DEFAULT
+bash ${AIL_BIN}/LAUNCH.sh -ks
+wait
+
+# SUBMODULES #
+git submodule update
+
+echo ""
+echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
+echo ""
+python ${AIL_HOME}/update/v5.9/Update.py
+wait
+echo ""
+echo ""
+
+exit 0
diff --git a/update/v5.9/reprocess_dom_hash.py b/update/v5.9/reprocess_dom_hash.py
new file mode 100755
index 00000000..cf192ea8
--- /dev/null
+++ b/update/v5.9/reprocess_dom_hash.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import gzip
+import os
+import sys
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib import ail_updates
+from lib.objects import ail_objects
+from lib.objects import DomHashs
+from lib.objects.Domains import Domain
+
+if __name__ == '__main__':
+ update = ail_updates.AILBackgroundUpdate('v5.9')
+ n = 0
+ nb_items = ail_objects.card_obj_iterator('item', filters={'sources': ['crawled']})
+ update.set_nb_to_update(nb_items)
+
+ for item in ail_objects.obj_iterator('item', filters={'sources': ['crawled']}):
+ dom = item.get_domain()
+ domain = Domain(dom)
+ i_content = item.get_content()
+ if domain.exists() and i_content:
+ date = item.get_date()
+ # DOM-HASH
+ dom_hash = DomHashs.create(i_content)
+ dom_hash.add(date, item)
+ dom_hash.add_correlation('domain', '', domain.id)
+
+ print(domain.id, item.id, dom_hash.id)
+
+ update.inc_nb_updated()
+ n += 1
+ if n % 100 == 0:
+ update.update_progress()
diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py
index c2fe5bdf..6890874f 100755
--- a/var/www/Flask_server.py
+++ b/var/www/Flask_server.py
@@ -54,6 +54,7 @@ from blueprints.objects_title import objects_title
from blueprints.objects_cookie_name import objects_cookie_name
from blueprints.objects_etag import objects_etag
from blueprints.objects_hhhash import objects_hhhash
+from blueprints.objects_dom_hash import objects_dom_hash
from blueprints.chats_explorer import chats_explorer
from blueprints.objects_image import objects_image
from blueprints.objects_ocr import objects_ocr
@@ -138,6 +139,7 @@ app.register_blueprint(objects_title, url_prefix=baseUrl)
app.register_blueprint(objects_cookie_name, url_prefix=baseUrl)
app.register_blueprint(objects_etag, url_prefix=baseUrl)
app.register_blueprint(objects_hhhash, url_prefix=baseUrl)
+app.register_blueprint(objects_dom_hash, url_prefix=baseUrl)
app.register_blueprint(chats_explorer, url_prefix=baseUrl)
app.register_blueprint(objects_image, url_prefix=baseUrl)
app.register_blueprint(objects_ocr, url_prefix=baseUrl)
diff --git a/var/www/blueprints/objects_dom_hash.py b/var/www/blueprints/objects_dom_hash.py
new file mode 100644
index 00000000..e493b3ab
--- /dev/null
+++ b/var/www/blueprints/objects_dom_hash.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+'''
+ Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
+'''
+
+import os
+import sys
+
+from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file
+from flask_login import login_required
+
+# Import Role_Manager
+from Role_Manager import login_admin, login_read_only
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.objects import DomHashs
+from packages import Date
+
+# ============ BLUEPRINT ============
+objects_dom_hash = Blueprint('objects_dom_hash', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/dom-hash'))
+
+# ============ VARIABLES ============
+bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
+
+
+# ============ FUNCTIONS ============
+@objects_dom_hash.route("/objects/dom-hashs", methods=['GET'])
+@login_required
+@login_read_only
+def objects_dom_hashs():
+ date_from = request.args.get('date_from')
+ date_to = request.args.get('date_to')
+ show_objects = request.args.get('show_objects')
+ date = Date.sanitise_date_range(date_from, date_to)
+ date_from = date['date_from']
+ date_to = date['date_to']
+
+ if show_objects:
+ dict_objects = DomHashs.DomHashs().api_get_meta_by_daterange(date_from, date_to)
+ else:
+ dict_objects = {}
+
+ return render_template("DomHashDaterange.html", date_from=date_from, date_to=date_to,
+ dict_objects=dict_objects, show_objects=show_objects)
+
+@objects_dom_hash.route("/objects/dom-hash/post", methods=['POST'])
+@login_required
+@login_read_only
+def objects_dom_hashs_post():
+ date_from = request.form.get('date_from')
+ date_to = request.form.get('date_to')
+ show_objects = request.form.get('show_objects')
+ return redirect(url_for('objects_dom_hash.objects_dom_hashs', date_from=date_from, date_to=date_to, show_objects=show_objects))
+
+@objects_dom_hash.route("/objects/dom-hash/range/json", methods=['GET'])
+@login_required
+@login_read_only
+def objects_dom_hash_range_json():
+ date_from = request.args.get('date_from')
+ date_to = request.args.get('date_to')
+ date = Date.sanitise_date_range(date_from, date_to)
+ date_from = date['date_from']
+ date_to = date['date_to']
+ return jsonify(DomHashs.DomHashs().api_get_chart_nb_by_daterange(date_from, date_to))
+
+# @objects_dom_hash.route("/objects/dom-hash/search", methods=['POST'])
+# @login_required
+# @login_read_only
+# def objects_dom_hash_search():
+# date_from = request.args.get('date_from')
+# date_to = request.args.get('date_to')
+# date = Date.sanitise_date_range(date_from, date_to)
+# date_from = date['date_from']
+# date_to = date['date_to']
+# return jsonify(HHHashs.HHHashs().api_get_chart_nb_by_daterange(date_from, date_to))
+#
+# search_by_id
+
+# @objects_dom_hash.route("/objects/dom-hash/graphline/json", methods=['GET'])
+# @login_required
+# @login_read_only
+# def objects_dom_hash_graphline_json():
+# dom_hash_id = request.args.get('id')
+# cve = Cves.Cve(cve_id)
+# if not cve.exists():
+# abort(404)
+# return jsonify(Cves.get_cve_graphline(cve_id))
+
+# ============= ROUTES ==============
+
diff --git a/var/www/templates/correlation/metadata_card_dom_hash.html b/var/www/templates/correlation/metadata_card_dom_hash.html
new file mode 100644
index 00000000..09d0e859
--- /dev/null
+++ b/var/www/templates/correlation/metadata_card_dom_hash.html
@@ -0,0 +1,173 @@
+
+
+
+{% with modal_add_tags=dict_object['metadata_card']['add_tags_modal']%}
+ {% include 'modals/add_tags.html' %}
+{% endwith %}
+
+{% include 'modals/edit_tag.html' %}
+
+
+
+
+
+
+
+
+
+
diff --git a/var/www/templates/correlation/show_correlation.html b/var/www/templates/correlation/show_correlation.html
index 8012f66c..30f66ab1 100644
--- a/var/www/templates/correlation/show_correlation.html
+++ b/var/www/templates/correlation/show_correlation.html
@@ -126,6 +126,8 @@
{% include 'correlation/metadata_card_cookie_name.html' %}
{% elif dict_object["object_type"] == "etag" %}
{% include 'correlation/metadata_card_etag.html' %}
+ {% elif dict_object["object_type"] == "dom-hash" %}
+ {% include 'correlation/metadata_card_dom_hash.html' %}
{% elif dict_object["object_type"] == "hhhash" %}
{% include 'correlation/metadata_card_hhhash.html' %}
{% elif dict_object["object_type"] == "image" %}
@@ -267,6 +269,10 @@
+
+
+
+