mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
chg: [favicon object] add favicon object
This commit is contained in:
parent
3380f5462b
commit
94961f2eba
5 changed files with 195 additions and 23 deletions
|
@ -15,7 +15,7 @@ config_loader = ConfigLoader()
|
||||||
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
|
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
|
||||||
config_loader = None
|
config_loader = None
|
||||||
|
|
||||||
AIL_OBJECTS = sorted({'cve', 'cryptocurrency', 'decoded', 'domain', 'item', 'pgp', 'screenshot', 'title', 'username'})
|
AIL_OBJECTS = sorted({'cve', 'cryptocurrency', 'decoded', 'domain', 'favicon', 'item', 'pgp', 'screenshot', 'title', 'username'})
|
||||||
|
|
||||||
def get_ail_uuid():
|
def get_ail_uuid():
|
||||||
ail_uuid = r_serv_db.get('ail:uuid')
|
ail_uuid = r_serv_db.get('ail:uuid')
|
||||||
|
|
|
@ -44,8 +44,9 @@ CORRELATION_TYPES_BY_OBJ = {
|
||||||
"cryptocurrency": ["domain", "item"],
|
"cryptocurrency": ["domain", "item"],
|
||||||
"cve": ["domain", "item"],
|
"cve": ["domain", "item"],
|
||||||
"decoded": ["domain", "item"],
|
"decoded": ["domain", "item"],
|
||||||
"domain": ["cve", "cryptocurrency", "decoded", "item", "pgp", "title", "screenshot", "username"],
|
"domain": ["cve", "cryptocurrency", "decoded", "favicon", "item", "pgp", "title", "screenshot", "username"],
|
||||||
"item": ["cve", "cryptocurrency", "decoded", "domain", "pgp", "screenshot", "title", "username"],
|
"favicon": ["domain", "item"], # TODO Decoded
|
||||||
|
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
|
||||||
"pgp": ["domain", "item"],
|
"pgp": ["domain", "item"],
|
||||||
"screenshot": ["domain", "item"],
|
"screenshot": ["domain", "item"],
|
||||||
"title": ["domain", "item"],
|
"title": ["domain", "item"],
|
||||||
|
|
|
@ -141,9 +141,11 @@ def get_favicon_from_html(html, domain, url):
|
||||||
return favicon_urls
|
return favicon_urls
|
||||||
|
|
||||||
def extract_favicon_from_html(html, url):
|
def extract_favicon_from_html(html, url):
|
||||||
favicon_urls = set()
|
favicons = set()
|
||||||
|
favicons_urls = set()
|
||||||
|
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
set_icons = set()
|
all_icons = set()
|
||||||
# If there are multiple <link rel="icon">s, the browser uses their media,
|
# If there are multiple <link rel="icon">s, the browser uses their media,
|
||||||
# type, and sizes attributes to select the most appropriate icon.
|
# type, and sizes attributes to select the most appropriate icon.
|
||||||
# If several icons are equally appropriate, the last one is used.
|
# If several icons are equally appropriate, the last one is used.
|
||||||
|
@ -159,27 +161,65 @@ def extract_favicon_from_html(html, url):
|
||||||
# - <meta name="msapplication-TileColor" content="#aaaaaa"> <meta name="theme-color" content="#ffffff">
|
# - <meta name="msapplication-TileColor" content="#aaaaaa"> <meta name="theme-color" content="#ffffff">
|
||||||
# - <meta name="msapplication-config" content="/icons/browserconfig.xml">
|
# - <meta name="msapplication-config" content="/icons/browserconfig.xml">
|
||||||
|
|
||||||
# desktop browser 'shortcut icon' (older browser), 'icon'
|
|
||||||
for favicon_tag in ['icon', 'shortcut icon']:
|
|
||||||
if soup.head:
|
|
||||||
for icon in soup.head.find_all('link', attrs={'rel': lambda x : x and x.lower() == favicon_tag, 'href': True}):
|
|
||||||
set_icons.add(icon)
|
|
||||||
|
|
||||||
# # TODO: handle base64 favicon
|
# Root Favicon
|
||||||
for tag in set_icons:
|
f = get_faup()
|
||||||
|
f.decode(url)
|
||||||
|
url_decoded = f.get()
|
||||||
|
root_domain = f"{url_decoded['scheme']}://{url_decoded['domain']}"
|
||||||
|
default_icon = f'{root_domain}/favicon.ico'
|
||||||
|
favicons_urls.add(default_icon)
|
||||||
|
# print(default_icon)
|
||||||
|
|
||||||
|
# shortcut
|
||||||
|
for shortcut in soup.find_all('link', rel='shortcut icon'):
|
||||||
|
all_icons.add(shortcut)
|
||||||
|
# icons
|
||||||
|
for icon in soup.find_all('link', rel='icon'):
|
||||||
|
all_icons.add(icon)
|
||||||
|
|
||||||
|
for mask_icon in soup.find_all('link', rel='mask-icon'):
|
||||||
|
all_icons.add(mask_icon)
|
||||||
|
for apple_touche_icon in soup.find_all('link', rel='apple-touch-icon'):
|
||||||
|
all_icons.add(apple_touche_icon)
|
||||||
|
for msapplication in soup.find_all('meta', attrs={'name': 'msapplication-TileImage'}): # msapplication-TileColor
|
||||||
|
all_icons.add(msapplication)
|
||||||
|
|
||||||
|
# msapplication-TileImage
|
||||||
|
|
||||||
|
# print(all_icons)
|
||||||
|
for tag in all_icons:
|
||||||
icon_url = tag.get('href')
|
icon_url = tag.get('href')
|
||||||
if icon_url:
|
if icon_url:
|
||||||
if icon_url.startswith('//'):
|
|
||||||
icon_url = icon_url.replace('//', '/')
|
|
||||||
if icon_url.startswith('data:'):
|
if icon_url.startswith('data:'):
|
||||||
# # TODO: handle base64 favicon
|
data = icon_url.split(',', 1)
|
||||||
pass
|
if len(data) > 1:
|
||||||
|
data = ''.join(data[1].split())
|
||||||
|
favicon = base64.b64decode(data)
|
||||||
|
if favicon:
|
||||||
|
favicons.add(favicon)
|
||||||
else:
|
else:
|
||||||
icon_url = urljoin(url, icon_url)
|
favicon_url = urljoin(url, icon_url)
|
||||||
icon_url = urlparse(icon_url, scheme=urlparse(url).scheme).geturl()
|
favicons_urls.add(favicon_url)
|
||||||
favicon_urls.add(icon_url)
|
elif tag.get('name') == 'msapplication-TileImage':
|
||||||
return favicon_urls
|
icon_url = tag.get('content')
|
||||||
|
if icon_url:
|
||||||
|
if icon_url.startswith('data:'):
|
||||||
|
data = icon_url.split(',', 1)
|
||||||
|
if len(data) > 1:
|
||||||
|
data = ''.join(data[1].split())
|
||||||
|
favicon = base64.b64decode(data)
|
||||||
|
if favicon:
|
||||||
|
favicons.add(favicon)
|
||||||
|
else:
|
||||||
|
favicon_url = urljoin(url, icon_url)
|
||||||
|
favicons_urls.add(favicon_url)
|
||||||
|
print(favicon_url)
|
||||||
|
|
||||||
|
# print(favicons_urls)
|
||||||
|
return favicons_urls, favicons
|
||||||
|
|
||||||
|
# mmh3.hash(favicon)
|
||||||
|
|
||||||
# # # - - # # #
|
# # # - - # # #
|
||||||
|
|
||||||
|
@ -1755,7 +1795,9 @@ def test_ail_crawlers():
|
||||||
load_blacklist()
|
load_blacklist()
|
||||||
|
|
||||||
# if __name__ == '__main__':
|
# if __name__ == '__main__':
|
||||||
# item = Item('crawled/2023/03/06/foo.bec50a87b5-0c21-4ed4-9cb2-2d717a7a6507')
|
# item_id = 'crawled/2023/02/20/data.gz'
|
||||||
|
# item = Item(item_id)
|
||||||
# content = item.get_content()
|
# content = item.get_content()
|
||||||
# r = extract_author_from_html(content)
|
# temp_url = ''
|
||||||
|
# r = extract_favicon_from_html(content, temp_url)
|
||||||
# print(r)
|
# print(r)
|
||||||
|
|
126
bin/lib/objects/Favicons.py
Executable file
126
bin/lib/objects/Favicons.py
Executable file
|
@ -0,0 +1,126 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import mmh3
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from flask import url_for
|
||||||
|
|
||||||
|
from pymisp import MISPObject
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
|
||||||
|
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
|
|
||||||
|
class Favicon(AbstractDaterangeObject):
|
||||||
|
"""
|
||||||
|
AIL Favicon Object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, id):
|
||||||
|
super(Favicon, self).__init__('favicon', id)
|
||||||
|
|
||||||
|
# def get_ail_2_ail_payload(self):
|
||||||
|
# payload = {'raw': self.get_gzip_content(b64=True),
|
||||||
|
# 'compress': 'gzip'}
|
||||||
|
# return payload
|
||||||
|
|
||||||
|
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||||
|
def delete(self):
|
||||||
|
# # TODO:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_content(self, r_type='str'):
|
||||||
|
if r_type == 'str':
|
||||||
|
return self._get_field('content')
|
||||||
|
|
||||||
|
def get_link(self, flask_context=False):
|
||||||
|
if flask_context:
|
||||||
|
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
|
||||||
|
else:
|
||||||
|
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
||||||
|
return url
|
||||||
|
|
||||||
|
# TODO # CHANGE COLOR
|
||||||
|
def get_svg_icon(self):
|
||||||
|
return {'style': 'fas', 'icon': '\uf20a', 'color': '#1E88E5', 'radius': 5} # f0c8 f45c
|
||||||
|
|
||||||
|
def get_misp_object(self):
|
||||||
|
obj_attrs = []
|
||||||
|
obj = MISPObject('favicon')
|
||||||
|
first_seen = self.get_first_seen()
|
||||||
|
last_seen = self.get_last_seen()
|
||||||
|
if first_seen:
|
||||||
|
obj.first_seen = first_seen
|
||||||
|
if last_seen:
|
||||||
|
obj.last_seen = last_seen
|
||||||
|
if not first_seen or not last_seen:
|
||||||
|
self.logger.warning(
|
||||||
|
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
|
||||||
|
|
||||||
|
obj_attrs.append(obj.add_attribute('favicon-mmh3', value=self.id))
|
||||||
|
obj_attrs.append(obj.add_attribute('favicon', value=self.get_content(r_type='bytes')))
|
||||||
|
for obj_attr in obj_attrs:
|
||||||
|
for tag in self.get_tags():
|
||||||
|
obj_attr.add_tag(tag)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def get_meta(self, options=set()):
|
||||||
|
meta = self._get_meta(options=options)
|
||||||
|
meta['id'] = self.id
|
||||||
|
meta['tags'] = self.get_tags(r_list=True)
|
||||||
|
if 'content' in options:
|
||||||
|
meta['content'] = self.get_content()
|
||||||
|
return meta
|
||||||
|
|
||||||
|
# def get_links(self):
|
||||||
|
# # TODO GET ALL URLS FROM CORRELATED ITEMS
|
||||||
|
|
||||||
|
def add(self, date, item_id): # TODO correlation base 64 -> calc md5
|
||||||
|
self._add(date, item_id)
|
||||||
|
|
||||||
|
def create(self, content, _first_seen=None, _last_seen=None):
|
||||||
|
if not isinstance(content, str):
|
||||||
|
content = content.decode()
|
||||||
|
self._set_field('content', content)
|
||||||
|
self._create()
|
||||||
|
|
||||||
|
|
||||||
|
def create_favicon(content, url=None): # TODO URL ????
|
||||||
|
if isinstance(content, str):
|
||||||
|
content = content.encode()
|
||||||
|
favicon_id = mmh3.hash_bytes(content)
|
||||||
|
favicon = Favicon(favicon_id)
|
||||||
|
if not favicon.exists():
|
||||||
|
favicon.create(content)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO ADD SEARCH FUNCTION
|
||||||
|
|
||||||
|
class Favicons(AbstractDaterangeObjects):
|
||||||
|
"""
|
||||||
|
Favicons Objects
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__('favicon')
|
||||||
|
|
||||||
|
def get_metas(self, obj_ids, options=set()):
|
||||||
|
return self._get_metas(Favicon, obj_ids, options=options)
|
||||||
|
|
||||||
|
def sanitize_name_to_search(self, name_to_search):
|
||||||
|
return name_to_search # TODO
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
# name_to_search = '98'
|
||||||
|
# print(search_cves_by_name(name_to_search))
|
|
@ -18,6 +18,7 @@ from lib.objects import CryptoCurrencies
|
||||||
from lib.objects.Cves import Cve
|
from lib.objects.Cves import Cve
|
||||||
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
|
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
|
||||||
from lib.objects.Domains import Domain
|
from lib.objects.Domains import Domain
|
||||||
|
from lib.objects.Favicons import Favicon
|
||||||
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
|
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
|
||||||
from lib.objects import Pgps
|
from lib.objects import Pgps
|
||||||
from lib.objects.Screenshots import Screenshot
|
from lib.objects.Screenshots import Screenshot
|
||||||
|
@ -54,6 +55,8 @@ def get_object(obj_type, subtype, id):
|
||||||
return Decoded(id)
|
return Decoded(id)
|
||||||
elif obj_type == 'cve':
|
elif obj_type == 'cve':
|
||||||
return Cve(id)
|
return Cve(id)
|
||||||
|
elif obj_type == 'favicon':
|
||||||
|
return Favicon(id)
|
||||||
elif obj_type == 'screenshot':
|
elif obj_type == 'screenshot':
|
||||||
return Screenshot(id)
|
return Screenshot(id)
|
||||||
elif obj_type == 'cryptocurrency':
|
elif obj_type == 'cryptocurrency':
|
||||||
|
@ -163,7 +166,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
|
||||||
obj = get_object(obj_type, subtype, id)
|
obj = get_object(obj_type, subtype, id)
|
||||||
meta = obj.get_meta()
|
meta = obj.get_meta()
|
||||||
meta['icon'] = obj.get_svg_icon()
|
meta['icon'] = obj.get_svg_icon()
|
||||||
if subtype or obj_type == 'cve' or obj_type == 'title':
|
if subtype or obj_type == 'cve' or obj_type == 'title' or obj_type == 'favicon':
|
||||||
meta['sparkline'] = obj.get_sparkline()
|
meta['sparkline'] = obj.get_sparkline()
|
||||||
if obj_type == 'cve':
|
if obj_type == 'cve':
|
||||||
meta['cve_search'] = obj.get_cve_search()
|
meta['cve_search'] = obj.get_cve_search()
|
||||||
|
|
Loading…
Reference in a new issue