diff --git a/bin/importer/FeederImporter.py b/bin/importer/FeederImporter.py index e7a06132..021a52e2 100755 --- a/bin/importer/FeederImporter.py +++ b/bin/importer/FeederImporter.py @@ -87,13 +87,16 @@ class FeederImporter(AbstractImporter): feeder_name = feeder.get_name() print(f'importing: {feeder_name} feeder') - item_id = feeder.get_item_id() + item_id = feeder.get_item_id() # TODO replace me with object global id # process meta if feeder.get_json_meta(): feeder.process_meta() - gzip64_content = feeder.get_gzip64_content() - return f'{feeder_name} {item_id} {gzip64_content}' + if feeder_name == 'telegram': + return item_id # TODO support UI dashboard + else: + gzip64_content = feeder.get_gzip64_content() + return f'{feeder_name} {item_id} {gzip64_content}' class FeederModuleImporter(AbstractModule): diff --git a/bin/importer/feeders/Telegram.py b/bin/importer/feeders/Telegram.py index c9c448ef..52eb0a75 100755 --- a/bin/importer/feeders/Telegram.py +++ b/bin/importer/feeders/Telegram.py @@ -16,9 +16,30 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from importer.feeders.Default import DefaultFeeder +from lib.ConfigLoader import ConfigLoader +from lib.objects.Chats import Chat +from lib.objects import Messages +from lib.objects import UsersAccount from lib.objects.Usernames import Username from lib import item_basic +import base64 +import io +import gzip +def gunzip_bytes_obj(bytes_obj): + gunzipped_bytes_obj = None + try: + in_ = io.BytesIO() + in_.write(bytes_obj) + in_.seek(0) + + with gzip.GzipFile(fileobj=in_, mode='rb') as fo: + gunzipped_bytes_obj = fo.read() + except Exception as e: + print(f'Global; Invalid Gzip file: {e}') + + return gunzipped_bytes_obj + class TelegramFeeder(DefaultFeeder): def __init__(self, json_data): @@ -26,14 +47,17 @@ class TelegramFeeder(DefaultFeeder): self.name = 'telegram' # define item id - def get_item_id(self): - # TODO use telegram message date - date = datetime.date.today().strftime("%Y/%m/%d") - channel_id = str(self.json_data['meta']['chat']['id']) + def get_item_id(self): # TODO rename self.item_id + # Get message date + timestamp = self.json_data['meta']['date']['timestamp'] # TODO CREATE DEFAULT TIMESTAMP + # if self.json_data['meta'].get('date'): + # date = datetime.datetime.fromtimestamp( self.json_data['meta']['date']['timestamp']) + # date = date.strftime('%Y/%m/%d') + # else: + # date = datetime.date.today().strftime("%Y/%m/%d") + chat_id = str(self.json_data['meta']['chat']['id']) message_id = str(self.json_data['meta']['id']) - item_id = f'{channel_id}_{message_id}' - item_id = os.path.join('telegram', date, item_id) - self.item_id = f'{item_id}.gz' + self.item_id = Messages.create_obj_id('telegram', chat_id, message_id, timestamp) return self.item_id def process_meta(self): @@ -42,19 +66,67 @@ class TelegramFeeder(DefaultFeeder): """ # message chat meta = self.json_data['meta'] + mess_id = self.json_data['meta']['id'] + if meta.get('reply_to'): + reply_to_id = meta['reply_to'] + else: + reply_to_id = None + + timestamp = meta['date']['timestamp'] + date = datetime.datetime.fromtimestamp(timestamp) + date = date.strftime('%Y%m%d') + if meta.get('chat'): - if meta['chat'].get('username'): - user = meta['chat']['username'] - if user: - date = item_basic.get_item_date(self.item_id) - username = Username(user, 'telegram') - username.add(date, self.item_id) + chat = Chat(meta['chat']['id'], 'telegram') + + if meta['chat'].get('username'): # TODO USE ID AND SAVE USERNAME + chat_username = meta['chat']['username'] + + # Chat---Message + chat.add(date, self.item_id) # TODO modify to accept file objects + # message meta ????? who is the user if two user ???? + + if self.json_data.get('translation'): + translation = self.json_data['translation'] + else: + translation = None + decoded = base64.standard_b64decode(self.json_data['data']) + content = gunzip_bytes_obj(decoded) + Messages.create(self.item_id, content, translation=translation) + + chat.add_message(self.item_id, timestamp, mess_id, reply_id=reply_to_id) + else: + chat = None + # message sender - if meta.get('sender'): + if meta.get('sender'): # TODO handle message channel forward + user_id = meta['sender']['id'] + user_account = UsersAccount.UserAccount(user_id, 'telegram') + # UserAccount---Message + user_account.add(date, self.item_id) + # UserAccount---Chat + user_account.add_correlation(chat.type, chat.get_subtype(r_str=True), chat.id) + + if meta['sender'].get('firstname'): + user_account.set_first_name(meta['sender']['firstname']) + if meta['sender'].get('lastname'): + user_account.set_last_name(meta['sender']['lastname']) + if meta['sender'].get('phone'): + user_account.set_phone(meta['sender']['phone']) + if meta['sender'].get('username'): - user = meta['sender']['username'] - if user: - date = item_basic.get_item_date(self.item_id) - username = Username(user, 'telegram') - username.add(date, self.item_id) + username = Username(meta['sender']['username'], 'telegram') + user_account.add_correlation(username.type, username.get_subtype(r_str=True), username.id) + + # Username---Message + username.add(date, self.item_id) # TODO #################################################################### + if chat: + chat.add_correlation(username.type, username.get_subtype(r_str=True), username.id) + + # if meta.get('fwd_from'): + # if meta['fwd_from'].get('post_author') # user first name + + # TODO reply threads ???? + + return None diff --git a/bin/lib/ail_core.py b/bin/lib/ail_core.py index 75520a2b..eeb83a98 100755 --- a/bin/lib/ail_core.py +++ b/bin/lib/ail_core.py @@ -15,8 +15,8 @@ config_loader = ConfigLoader() r_serv_db = config_loader.get_db_conn("Kvrocks_DB") config_loader = None -AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'hhhash', 'item', - 'pgp', 'screenshot', 'title', 'username'}) +AIL_OBJECTS = sorted({'chat', 'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'hhhash', 'item', + 'pgp', 'screenshot', 'title', 'user-account', 'username'}) def get_ail_uuid(): ail_uuid = r_serv_db.get('ail:uuid') @@ -38,9 +38,11 @@ def get_all_objects(): return AIL_OBJECTS def get_objects_with_subtypes(): - return ['cryptocurrency', 'pgp', 'username'] + return ['chat', 'cryptocurrency', 'pgp', 'username'] def get_object_all_subtypes(obj_type): + if obj_type == 'chat': + return ['discord', 'jabber', 'telegram'] if obj_type == 'cryptocurrency': return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash'] if obj_type == 'pgp': @@ -66,6 +68,14 @@ def get_all_objects_with_subtypes_tuple(): str_objs.append((obj_type, '')) return str_objs +def unpack_obj_global_id(global_id, r_type='tuple'): + if r_type == 'dict': + obj = global_id.split(':', 2) + return {'type': obj[0], 'subtype': obj[1], 'id': obj['2']} + else: # tuple(type, subtype, id) + return global_id.split(':', 2) + + ##-- AIL OBJECTS --## #### Redis #### diff --git a/bin/lib/correlations_engine.py b/bin/lib/correlations_engine.py index 609aa8c6..94a06773 100755 --- a/bin/lib/correlations_engine.py +++ b/bin/lib/correlations_engine.py @@ -41,6 +41,7 @@ config_loader = None ################################## CORRELATION_TYPES_BY_OBJ = { + "chat": ["item", "username"], # item ??? "cookie-name": ["domain"], "cryptocurrency": ["domain", "item"], "cve": ["domain", "item"], @@ -49,11 +50,11 @@ CORRELATION_TYPES_BY_OBJ = { "etag": ["domain"], "favicon": ["domain", "item"], # TODO Decoded "hhhash": ["domain"], - "item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], + "item": ["chat", "cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], "pgp": ["domain", "item"], "screenshot": ["domain", "item"], "title": ["domain", "item"], - "username": ["domain", "item"], + "username": ["chat", "domain", "item"], } def get_obj_correl_types(obj_type): @@ -65,6 +66,8 @@ def sanityze_obj_correl_types(obj_type, correl_types): correl_types = set(correl_types).intersection(obj_correl_types) if not correl_types: correl_types = obj_correl_types + if not correl_types: + return [] return correl_types def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type): diff --git a/bin/lib/objects/Chats.py b/bin/lib/objects/Chats.py new file mode 100755 index 00000000..438acf51 --- /dev/null +++ b/bin/lib/objects/Chats.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys + +from datetime import datetime + +from flask import url_for +# from pymisp import MISPObject + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ail_core +from lib.ConfigLoader import ConfigLoader +from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id +from lib.data_retention_engine import update_obj_date +from lib.objects import ail_objects +from lib import item_basic + +from lib.correlations_engine import get_correlation_by_correl_type + +config_loader = ConfigLoader() +baseurl = config_loader.get_config_str("Notifications", "ail_domain") +r_object = config_loader.get_db_conn("Kvrocks_Objects") +r_cache = config_loader.get_redis_conn("Redis_Cache") +config_loader = None + + +################################################################################ +################################################################################ +################################################################################ + +class Chat(AbstractSubtypeObject): # TODO # ID == username ????? + """ + AIL Chat Object. (strings) + """ + + def __init__(self, id, subtype): + super(Chat, self).__init__('chat', id, subtype) + + # def get_ail_2_ail_payload(self): + # payload = {'raw': self.get_gzip_content(b64=True), + # 'compress': 'gzip'} + # return payload + + # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ + def delete(self): + # # TODO: + pass + + def get_link(self, flask_context=False): + if flask_context: + url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id) + else: + url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}' + return url + + def get_svg_icon(self): # TODO + # if self.subtype == 'telegram': + # style = 'fab' + # icon = '\uf2c6' + # elif self.subtype == 'discord': + # style = 'fab' + # icon = '\uf099' + # else: + # style = 'fas' + # icon = '\uf007' + style = 'fas' + icon = '\uf086' + return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5} + + def get_meta(self, options=set()): + meta = self._get_meta(options=options) + meta['id'] = self.id + meta['subtype'] = self.subtype + meta['tags'] = self.get_tags(r_list=True) + return meta + + def get_misp_object(self): + # obj_attrs = [] + # if self.subtype == 'telegram': + # obj = MISPObject('telegram-account', standalone=True) + # obj_attrs.append(obj.add_attribute('username', value=self.id)) + # + # elif self.subtype == 'twitter': + # obj = MISPObject('twitter-account', standalone=True) + # obj_attrs.append(obj.add_attribute('name', value=self.id)) + # + # else: + # obj = MISPObject('user-account', standalone=True) + # obj_attrs.append(obj.add_attribute('username', value=self.id)) + # + # first_seen = self.get_first_seen() + # last_seen = self.get_last_seen() + # if first_seen: + # obj.first_seen = first_seen + # if last_seen: + # obj.last_seen = last_seen + # if not first_seen or not last_seen: + # self.logger.warning( + # f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}') + # + # for obj_attr in obj_attrs: + # for tag in self.get_tags(): + # obj_attr.add_tag(tag) + # return obj + return + + ############################################################################ + ############################################################################ + + # others optional metas, ... -> # TODO ALL meta in hset + + def get_name(self): # get username ???? + pass + + # return username correlation + def get_users(self): # get participants ??? -> passive users ??? + pass + + # def get_last_message_id(self): + # + # return r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'last:message:id') + + def get_obj_message_id(self, obj_id): + if obj_id.endswith('.gz'): + obj_id = obj_id[:-3] + return int(obj_id.split('_')[-1]) + + def _get_message_timestamp(self, obj_global_id): + return r_object.zscore(f'messages:{self.type}:{self.subtype}:{self.id}', obj_global_id) + + def _get_messages(self): + return r_object.zrange(f'messages:{self.type}:{self.subtype}:{self.id}', 0, -1, withscores=True) + + def get_message_meta(self, obj_global_id, parent=True, mess_datetime=None): + obj = ail_objects.get_obj_from_global_id(obj_global_id) + mess_dict = obj.get_meta(options={'content', 'link', 'parent'}) + if mess_dict.get('parent') and parent: + mess_dict['reply_to'] = self.get_message_meta(mess_dict['parent'], parent=False) + mess_dict['username'] = {} + user = obj.get_correlation('username').get('username') + if user: + subtype, user = user.pop().split(':', 1) + mess_dict['username']['type'] = 'telegram' + mess_dict['username']['subtype'] = subtype + mess_dict['username']['id'] = user + else: + mess_dict['username']['id'] = 'UNKNOWN' + + if not mess_datetime: + obj_mess_id = self._get_message_timestamp(obj_global_id) + mess_datetime = datetime.fromtimestamp(obj_mess_id) + mess_dict['date'] = mess_datetime.isoformat(' ') + mess_dict['hour'] = mess_datetime.strftime('%H:%M:%S') + return mess_dict + + + def get_messages(self, start=0, page=1, nb=500): # TODO limit nb returned, # TODO add replies + start = 0 + stop = -1 + # r_object.delete(f'messages:{self.type}:{self.subtype}:{self.id}') + + # TODO chat without username ???? -> chat ID ???? + + messages = {} + curr_date = None + for message in self._get_messages(): + date = datetime.fromtimestamp(message[1]) + date_day = date.strftime('%Y/%m/%d') + if date_day != curr_date: + messages[date_day] = [] + curr_date = date_day + mess_dict = self.get_message_meta(message[0], parent=True, mess_datetime=date) + messages[date_day].append(mess_dict) + return messages + + # Zset with ID ??? id -> item id ??? multiple id == media + text + # id -> media id + # How do we handle reply/thread ??? -> separate with new chats name/id ZSET ??? + # Handle media ??? + + # list of message id -> obj_id + # list of obj_id -> + # abuse parent children ??? + + # def add(self, timestamp, obj_id, mess_id=0, username=None, user_id=None): + # date = # TODO get date from object + # self.update_daterange(date) + # update_obj_date(date, self.type, self.subtype) + # + # + # # daily + # r_object.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1) + # # all subtypes + # r_object.zincrby(f'{self.type}_all:{self.subtype}', 1, self.id) + # + # ####################################################################### + # ####################################################################### + # + # # Correlations + # self.add_correlation('item', '', item_id) + # # domain + # if is_crawled(item_id): + # domain = get_item_domain(item_id) + # self.add_correlation('domain', '', domain) + + # TODO kvrocks exception if key don't exists + def get_obj_by_message_id(self, mess_id): + return r_object.hget(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id) + + # importer -> use cache for previous reply SET to_add_id: previously_imported : expire SET key -> 30 mn + def add_message(self, obj_global_id, timestamp, mess_id, reply_id=None): + r_object.hset(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id, obj_global_id) + r_object.zadd(f'messages:{self.type}:{self.subtype}:{self.id}', {obj_global_id: timestamp}) + + if reply_id: + reply_obj = self.get_obj_by_message_id(reply_id) + if reply_obj: + self.add_obj_children(reply_obj, obj_global_id) + else: + self.add_message_cached_reply(reply_id, mess_id) + + # ADD cached replies + for reply_obj in self.get_cached_message_reply(mess_id): + self.add_obj_children(obj_global_id, reply_obj) + + def _get_message_cached_reply(self, message_id): + return r_cache.smembers(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{message_id}') + + def get_cached_message_reply(self, message_id): + objs_global_id = [] + for mess_id in self._get_message_cached_reply(message_id): + obj_global_id = self.get_obj_by_message_id(mess_id) + if obj_global_id: + objs_global_id.append(obj_global_id) + return objs_global_id + + def add_message_cached_reply(self, reply_to_id, message_id): + r_cache.sadd(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', message_id) + r_cache.expire(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', 600) + + # TODO nb replies = nb son ???? what if it create a onion item ??? -> need source filtering + + +# TODO factorize +def get_all_subtypes(): + return ail_core.get_object_all_subtypes('chat') + +def get_all(): + objs = {} + for subtype in get_all_subtypes(): + objs[subtype] = get_all_by_subtype(subtype) + return objs + +def get_all_by_subtype(subtype): + return get_all_id('chat', subtype) + +# # TODO FILTER NAME + Key + mail +# def sanitize_username_name_to_search(name_to_search, subtype): # TODO FILTER NAME +# +# return name_to_search +# +# def search_usernames_by_name(name_to_search, subtype, r_pos=False): +# usernames = {} +# # for subtype in subtypes: +# r_name = sanitize_username_name_to_search(name_to_search, subtype) +# if not name_to_search or isinstance(r_name, dict): +# # break +# return usernames +# r_name = re.compile(r_name) +# for user_name in get_all_usernames_by_subtype(subtype): +# res = re.search(r_name, user_name) +# if res: +# usernames[user_name] = {} +# if r_pos: +# usernames[user_name]['hl-start'] = res.start() +# usernames[user_name]['hl-end'] = res.end() +# return usernames + + +if __name__ == '__main__': + chat = Chat('test', 'telegram') + r = chat.get_messages() + print(r) diff --git a/bin/lib/objects/Items.py b/bin/lib/objects/Items.py index 03c6f2cd..c2edbb40 100755 --- a/bin/lib/objects/Items.py +++ b/bin/lib/objects/Items.py @@ -288,6 +288,8 @@ class Item(AbstractObject): meta['mimetype'] = self.get_mimetype(content=content) if 'investigations' in options: meta['investigations'] = self.get_investigations() + if 'link' in options: + meta['link'] = self.get_link(flask_context=True) # meta['encoding'] = None return meta diff --git a/bin/lib/objects/Messages.py b/bin/lib/objects/Messages.py new file mode 100755 index 00000000..98cc838f --- /dev/null +++ b/bin/lib/objects/Messages.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import cld3 +import html2text + +from datetime import datetime + +from pymisp import MISPObject + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ail_core import get_ail_uuid +from lib.objects.abstract_object import AbstractObject +from lib.ConfigLoader import ConfigLoader +from lib.data_retention_engine import update_obj_date, get_obj_date_first +# TODO Set all messages ??? + + +from flask import url_for + +config_loader = ConfigLoader() +r_cache = config_loader.get_redis_conn("Redis_Cache") +r_object = config_loader.get_db_conn("Kvrocks_Objects") +r_content = config_loader.get_db_conn("Kvrocks_Content") +baseurl = config_loader.get_config_str("Notifications", "ail_domain") +config_loader = None + + +# TODO SAVE OR EXTRACT MESSAGE SOURCE FOR ICON ????????? +# TODO iterate on all objects +# TODO also add support for small objects ???? + +# CAN Message exists without CHAT -> no convert it to object + +# ID: source:chat_id:message_id ???? +# +# /!\ handle null chat and message id -> chat = uuid and message = timestamp ??? + + +class Message(AbstractObject): + """ + AIL Message Object. (strings) + """ + + def __init__(self, id): # TODO subtype or use source ???? + super(Message, self).__init__('message', id) # message::< telegram/1692189934.380827/ChatID_MessageID > + + def exists(self): + if self.subtype is None: + return r_object.exists(f'meta:{self.type}:{self.id}') + else: + return r_object.exists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}') + + def get_source(self): + """ + Returns source/feeder name + """ + l_source = self.id.split('/')[:-4] + return os.path.join(*l_source) + + def get_basename(self): + return os.path.basename(self.id) + + def get_content(self, r_type='str'): # TODO ADD cache # TODO Compress content ??????? + """ + Returns content + """ + content = self._get_field('content') + if r_type == 'str': + return content + elif r_type == 'bytes': + return content.encode() + + def get_date(self): + timestamp = self.get_timestamp() + return datetime.fromtimestamp(timestamp).strftime('%Y%m%d') + + def get_timestamp(self): + dirs = self.id.split('/') + return dirs[-2] + + def get_message_id(self): # TODO optimize + message_id = self.get_basename().rsplit('_', 1)[1] + # if message_id.endswith('.gz'): + # message_id = message_id[:-3] + return message_id + + def get_chat_id(self): # TODO optimize + chat_id = self.get_basename().rsplit('_', 1)[0] + # if chat_id.endswith('.gz'): + # chat_id = chat_id[:-3] + return chat_id + + # Update value on import + # reply to -> parent ? + # reply/comment - > children ? + # nb views + # reactions + # nb fowards + # room ??? + # message from channel ??? + # message media + + def get_translation(self): # TODO support multiple translated languages ????? + """ + Returns translated content + """ + return self._get_field('translated') # TODO multiples translation ... -> use set + + def _set_translation(self, translation): + """ + Set translated content + """ + return self._set_field('translated', translation) # translation by hash ??? -> avoid translating multiple time + + def get_html2text_content(self, content=None, ignore_links=False): + if not content: + content = self.get_content() + h = html2text.HTML2Text() + h.ignore_links = ignore_links + h.ignore_images = ignore_links + return h.handle(content) + + # def get_ail_2_ail_payload(self): + # payload = {'raw': self.get_gzip_content(b64=True)} + # return payload + + def get_link(self, flask_context=False): + if flask_context: + url = url_for('correlation.show_correlation', type=self.type, id=self.id) + else: + url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}' + return url + + def get_svg_icon(self): + return {'style': 'fas', 'icon': 'fa-comment-dots', 'color': '#4dffff', 'radius': 5} + + def get_misp_object(self): # TODO + obj = MISPObject('instant-message', standalone=True) + obj_date = self.get_date() + if obj_date: + obj.first_seen = obj_date + else: + self.logger.warning( + f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={obj_date}') + + # obj_attrs = [obj.add_attribute('first-seen', value=obj_date), + # obj.add_attribute('raw-data', value=self.id, data=self.get_raw_content()), + # obj.add_attribute('sensor', value=get_ail_uuid())] + obj_attrs = [] + for obj_attr in obj_attrs: + for tag in self.get_tags(): + obj_attr.add_tag(tag) + return obj + + # def get_url(self): + # return r_object.hget(f'meta:item::{self.id}', 'url') + + # options: set of optional meta fields + def get_meta(self, options=None): + """ + :type options: set + """ + if options is None: + options = set() + meta = self.get_default_meta(tags=True) + meta['date'] = self.get_date() # TODO replace me by timestamp ?????? + meta['source'] = self.get_source() + # optional meta fields + if 'content' in options: + meta['content'] = self.get_content() + if 'parent' in options: + meta['parent'] = self.get_parent() + if 'investigations' in options: + meta['investigations'] = self.get_investigations() + if 'link' in options: + meta['link'] = self.get_link(flask_context=True) + + # meta['encoding'] = None + return meta + + def _languages_cleaner(self, content=None): + if not content: + content = self.get_content() + # REMOVE URLS + regex = r'\b(?:http://|https://)?(?:[a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*\b' + url_regex = re.compile(regex) + urls = url_regex.findall(content) + urls = sorted(urls, key=len, reverse=True) + for url in urls: + content = content.replace(url, '') + # REMOVE PGP Blocks + regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----' + regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----' + regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----' + re.compile(regex_pgp_public_blocs) + re.compile(regex_pgp_signature) + re.compile(regex_pgp_message) + res = re.findall(regex_pgp_public_blocs, content) + for it in res: + content = content.replace(it, '') + res = re.findall(regex_pgp_signature, content) + for it in res: + content = content.replace(it, '') + res = re.findall(regex_pgp_message, content) + for it in res: + content = content.replace(it, '') + return content + + def detect_languages(self, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7): + languages = [] + ## CLEAN CONTENT ## + content = self.get_html2text_content(ignore_links=True) + content = self._languages_cleaner(content=content) + # REMOVE USELESS SPACE + content = ' '.join(content.split()) + # - CLEAN CONTENT - # + if len(content) >= min_len: + for lang in cld3.get_frequent_languages(content, num_langs=num_langs): + if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable: + languages.append(lang) + return languages + + # def translate(self, content=None): # TODO translation plugin + # # TODO get text language + # if not content: + # content = self.get_content() + # translated = argostranslate.translate.translate(content, 'ru', 'en') + # # Save translation + # self._set_translation(translated) + # return translated + + def create(self, content, translation, tags): + self._set_field('content', content) + r_content.get(f'content:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', content) + if translation: + self._set_translation(translation) + for tag in tags: + self.add_tag(tag) + + # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ + def delete(self): + pass + +def create_obj_id(source, chat_id, message_id, timestamp): + return f'{source}/{timestamp}/{chat_id}_{message_id}' + +# TODO Check if already exists +# def create(source, chat_id, message_id, timestamp, content, tags=[]): +def create(obj_id, content, translation=None, tags=[]): + message = Message(obj_id) + if not message.exists(): + message.create(content, translation, tags) + return message + + +# TODO Encode translation + + +if __name__ == '__main__': + r = 'test' + print(r) diff --git a/bin/lib/objects/UsersAccount.py b/bin/lib/objects/UsersAccount.py new file mode 100755 index 00000000..0355806e --- /dev/null +++ b/bin/lib/objects/UsersAccount.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import re + +from flask import url_for +from pymisp import MISPObject + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ail_core +from lib.ConfigLoader import ConfigLoader +from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id + +config_loader = ConfigLoader() +baseurl = config_loader.get_config_str("Notifications", "ail_domain") +config_loader = None + + +################################################################################ +################################################################################ +################################################################################ + +class UserAccount(AbstractSubtypeObject): + """ + AIL User Object. (strings) + """ + + def __init__(self, id, subtype): + super(UserAccount, self).__init__('user-account', id, subtype) + + # def get_ail_2_ail_payload(self): + # payload = {'raw': self.get_gzip_content(b64=True), + # 'compress': 'gzip'} + # return payload + + # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ + def delete(self): + # # TODO: + pass + + def get_link(self, flask_context=False): + if flask_context: + url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id) + else: + url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}' + return url + + def get_svg_icon(self): # TODO change icon/color + if self.subtype == 'telegram': + style = 'fab' + icon = '\uf2c6' + elif self.subtype == 'twitter': + style = 'fab' + icon = '\uf099' + else: + style = 'fas' + icon = '\uf007' + return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5} + + def get_first_name(self): + return self._get_field('firstname') + + def get_last_name(self): + return self._get_field('lastname') + + def get_phone(self): + return self._get_field('phone') + + def set_first_name(self, firstname): + return self._set_field('firstname', firstname) + + def set_last_name(self, lastname): + return self._set_field('lastname', lastname) + + def set_phone(self, phone): + return self._set_field('phone', phone) + + # TODO REWRITE ADD FUNCTION + + def get_username(self): + return '' + + def get_usernames(self): + usernames = [] + correl = self.get_correlation('username') + for partial_id in correl.get('username', []): + usernames.append(f'username:{partial_id}') + return usernames + + def get_meta(self, options=set()): + meta = self._get_meta(options=options) + meta['id'] = self.id + meta['subtype'] = self.subtype + meta['tags'] = self.get_tags(r_list=True) + if 'username' in options: + meta['username'] = self.get_username() + if 'usernames' in options: + meta['usernames'] = self.get_usernames() + return meta + + def get_misp_object(self): + obj_attrs = [] + if self.subtype == 'telegram': + obj = MISPObject('telegram-account', standalone=True) + obj_attrs.append(obj.add_attribute('username', value=self.id)) + + elif self.subtype == 'twitter': + obj = MISPObject('twitter-account', standalone=True) + obj_attrs.append(obj.add_attribute('name', value=self.id)) + + else: + obj = MISPObject('user-account', standalone=True) + obj_attrs.append(obj.add_attribute('username', value=self.id)) + + first_seen = self.get_first_seen() + last_seen = self.get_last_seen() + if first_seen: + obj.first_seen = first_seen + if last_seen: + obj.last_seen = last_seen + if not first_seen or not last_seen: + self.logger.warning( + f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}') + + for obj_attr in obj_attrs: + for tag in self.get_tags(): + obj_attr.add_tag(tag) + return obj + +def get_user_by_username(): + pass + +def get_all_subtypes(): + return ail_core.get_object_all_subtypes('user-account') + +def get_all(): + users = {} + for subtype in get_all_subtypes(): + users[subtype] = get_all_by_subtype(subtype) + return users + +def get_all_by_subtype(subtype): + return get_all_id('user-account', subtype) + + +# if __name__ == '__main__': +# name_to_search = 'co' +# subtype = 'telegram' +# print(search_usernames_by_name(name_to_search, subtype)) diff --git a/bin/lib/objects/abstract_daterange_object.py b/bin/lib/objects/abstract_daterange_object.py index 5ec103d0..98aa49c2 100755 --- a/bin/lib/objects/abstract_daterange_object.py +++ b/bin/lib/objects/abstract_daterange_object.py @@ -45,10 +45,10 @@ class AbstractDaterangeObject(AbstractObject, ABC): def exists(self): return r_object.exists(f'meta:{self.type}:{self.id}') - def _get_field(self, field): + def _get_field(self, field): # TODO remove me (NEW in abstract) return r_object.hget(f'meta:{self.type}:{self.id}', field) - def _set_field(self, field, value): + def _set_field(self, field, value): # TODO remove me (NEW in abstract) return r_object.hset(f'meta:{self.type}:{self.id}', field, value) def get_first_seen(self, r_int=False): diff --git a/bin/lib/objects/abstract_object.py b/bin/lib/objects/abstract_object.py index 2423a294..59a7e968 100755 --- a/bin/lib/objects/abstract_object.py +++ b/bin/lib/objects/abstract_object.py @@ -20,6 +20,7 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from lib import ail_logger from lib import Tag +from lib.ConfigLoader import ConfigLoader from lib import Duplicate from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations @@ -27,6 +28,11 @@ from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers logging.config.dictConfig(ail_logger.get_config(name='ail')) +config_loader = ConfigLoader() +# r_cache = config_loader.get_redis_conn("Redis_Cache") +r_object = config_loader.get_db_conn("Kvrocks_Objects") +config_loader = None + class AbstractObject(ABC): """ Abstract Object @@ -67,6 +73,18 @@ class AbstractObject(ABC): dict_meta['tags'] = self.get_tags() return dict_meta + def _get_field(self, field): + if self.subtype is None: + return r_object.hget(f'meta:{self.type}:{self.id}', field) + else: + return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field) + + def _set_field(self, field, value): + if self.subtype is None: + return r_object.hset(f'meta:{self.type}:{self.id}', field, value) + else: + return r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field, value) + ## Tags ## def get_tags(self, r_list=False): tags = Tag.get_object_tags(self.type, self.id, self.get_subtype(r_str=True)) @@ -198,6 +216,8 @@ class AbstractObject(ABC): else: return [] + ## Correlation ## + def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type): """ Get object correlation @@ -253,3 +273,39 @@ class AbstractObject(ABC): Get object correlations """ delete_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2) + + ## -Correlation- ## + + ## Parent ## + + def is_parent(self): + return r_object.exists(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}') + + def is_children(self): + return r_object.hexists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent') + + def get_parent(self): + return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent') + + def get_children(self): + return r_object.smembers(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}') + + def set_parent(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ###################### + if not obj_global_id: + if obj_subtype is None: + obj_subtype = '' + obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}' + r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent', obj_global_id) + + def add_children(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ###################### + if not obj_global_id: + if obj_subtype is None: + obj_subtype = '' + obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}' + r_object.sadd(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', obj_global_id) + + def add_obj_children(self, parent_global_id, son_global_id): + r_object.sadd(f'child:{parent_global_id}', son_global_id) + r_object.hset(f'meta:{son_global_id}', 'parent', parent_global_id) + + ## Parent ## diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py index f12708fb..cd2f7225 100755 --- a/bin/lib/objects/ail_objects.py +++ b/bin/lib/objects/ail_objects.py @@ -13,6 +13,7 @@ from lib import correlations_engine from lib import btc_ail from lib import Tag +from lib.objects import Chats from lib.objects import CryptoCurrencies from lib.objects import CookiesNames from lib.objects.Cves import Cve @@ -55,6 +56,8 @@ def get_object(obj_type, subtype, id): return Domain(id) elif obj_type == 'decoded': return Decoded(id) + elif obj_type == 'chat': + return Chats.Chat(id, subtype) elif obj_type == 'cookie-name': return CookiesNames.CookieName(id) elif obj_type == 'cve': diff --git a/bin/lib/timeline_engine.py b/bin/lib/timeline_engine.py new file mode 100755 index 00000000..405e7a50 --- /dev/null +++ b/bin/lib/timeline_engine.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys + +from uuid import uuid4 + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader + +config_loader = ConfigLoader() +r_meta = config_loader.get_db_conn("Kvrocks_Timeline") +config_loader = None + +# CORRELATION_TYPES_BY_OBJ = { +# "chat": ["item", "username"], # item ??? +# "cookie-name": ["domain"], +# "cryptocurrency": ["domain", "item"], +# "cve": ["domain", "item"], +# "decoded": ["domain", "item"], +# "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"], +# "etag": ["domain"], +# "favicon": ["domain", "item"], +# "hhhash": ["domain"], +# "item": ["chat", "cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], +# "pgp": ["domain", "item"], +# "screenshot": ["domain", "item"], +# "title": ["domain", "item"], +# "username": ["chat", "domain", "item"], +# } +# +# def get_obj_correl_types(obj_type): +# return CORRELATION_TYPES_BY_OBJ.get(obj_type) + +# def sanityze_obj_correl_types(obj_type, correl_types): +# obj_correl_types = get_obj_correl_types(obj_type) +# if correl_types: +# correl_types = set(correl_types).intersection(obj_correl_types) +# if not correl_types: +# correl_types = obj_correl_types +# if not correl_types: +# return [] +# return correl_types + +# TODO rename all function + add missing parameters + +def get_bloc_obj_global_id(bloc): + return r_meta.hget('hset:key', bloc) + +def set_bloc_obj_global_id(bloc, global_id): + return r_meta.hset('hset:key', bloc, global_id) + +def get_bloc_timestamp(bloc, position): + return r_meta.zscore('key', f'{position}:{bloc}') + +def add_bloc(global_id, timestamp, end=None): + if end: + timestamp_end = end + else: + timestamp_end = timestamp + new_bloc = str(uuid4()) + r_meta.zadd('key', {f'start:{new_bloc}': timestamp, f'end:{new_bloc}': timestamp_end}) + set_bloc_obj_global_id(new_bloc, global_id) + return new_bloc + +def _update_bloc(bloc, position, timestamp): + r_meta.zadd('key', {f'{position}:{bloc}': timestamp}) + +# score = timestamp +def get_nearest_bloc_inf(timestamp): + return r_meta.zrevrangebyscore('key', timestamp, 0, num=1) + +def get_nearest_bloc_sup(timestamp): + return r_meta.zrangebyscore('key', timestamp, 0, num=1) + +####################################################################################### + +def add_timestamp(timestamp, obj_global_id): + inf = get_nearest_bloc_inf(timestamp) + sup = get_nearest_bloc_sup(timestamp) + if not inf and not sup: + # create new bloc + new_bloc = add_bloc(obj_global_id, timestamp) + return new_bloc + # timestamp < first_seen + elif not inf: + sup_pos, sup_id = inf.split(':') + sup_obj = get_bloc_obj_global_id(sup_pos) + if sup_obj == obj_global_id: + _update_bloc(sup_id, 'start', timestamp) + # create new bloc + else: + new_bloc = add_bloc(obj_global_id, timestamp) + return new_bloc + + # timestamp > first_seen + elif not sup: + inf_pos, inf_id = inf.split(':') + inf_obj = get_bloc_obj_global_id(inf_id) + if inf_obj == obj_global_id: + _update_bloc(inf_id, 'end', timestamp) + # create new bloc + else: + new_bloc = add_bloc(obj_global_id, timestamp) + return new_bloc + + else: + inf_pos, inf_id = inf.split(':') + sup_pos, sup_id = inf.split(':') + inf_obj = get_bloc_obj_global_id(inf_id) + + if inf_id == sup_id: + # reduce bloc + create two new bloc + if obj_global_id != inf_obj: + # get end timestamp + sup_timestamp = get_bloc_timestamp(sup_id, 'end') + # reduce original bloc + _update_bloc(inf_id, 'end', timestamp - 1) + # Insert new bloc + new_bloc = add_bloc(obj_global_id, timestamp) + # Recreate end of the first bloc by a new bloc + add_bloc(inf_obj, timestamp + 1, end=sup_timestamp) + return new_bloc + + # timestamp in existing bloc + else: + return inf_id + + # different blocs: expend sup/inf bloc or create a new bloc if + elif inf_pos == 'end' and sup_pos == 'start': + # Extend inf bloc + if obj_global_id == inf_obj: + _update_bloc(inf_id, 'end', timestamp) + return inf_id + + sup_obj = get_bloc_obj_global_id(sup_pos) + # Extend sup bloc + if obj_global_id == sup_obj: + _update_bloc(sup_id, 'start', timestamp) + return sup_id + + # create new bloc + new_bloc = add_bloc(obj_global_id, timestamp) + return new_bloc + + # inf_pos == 'start' and sup_pos == 'end' + # else raise error ??? + + + + + + diff --git a/configs/6383.conf b/configs/6383.conf index c730003c..a06d4e69 100644 --- a/configs/6383.conf +++ b/configs/6383.conf @@ -663,6 +663,7 @@ namespace.crawl ail_crawlers namespace.db ail_datas namespace.dup ail_dups namespace.obj ail_objs +namespace.tl ail_tls namespace.stat ail_stats namespace.tag ail_tags namespace.track ail_trackers diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample index 3278033f..8185b8f7 100644 --- a/configs/core.cfg.sample +++ b/configs/core.cfg.sample @@ -190,6 +190,11 @@ host = localhost port = 6383 password = ail_objs +[Kvrocks_Timeline] +host = localhost +port = 6383 +password = ail_tls + [Kvrocks_Stats] host = localhost port = 6383 diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index e6a99350..c330443b 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -50,6 +50,7 @@ from blueprints.objects_title import objects_title from blueprints.objects_cookie_name import objects_cookie_name from blueprints.objects_etag import objects_etag from blueprints.objects_hhhash import objects_hhhash +from blueprints.objects_chat import objects_chat Flask_dir = os.environ['AIL_FLASK'] @@ -107,6 +108,7 @@ app.register_blueprint(objects_title, url_prefix=baseUrl) app.register_blueprint(objects_cookie_name, url_prefix=baseUrl) app.register_blueprint(objects_etag, url_prefix=baseUrl) app.register_blueprint(objects_hhhash, url_prefix=baseUrl) +app.register_blueprint(objects_chat, url_prefix=baseUrl) # ========= =========# diff --git a/var/www/blueprints/objects_chat.py b/var/www/blueprints/objects_chat.py new file mode 100644 index 00000000..8a1db11f --- /dev/null +++ b/var/www/blueprints/objects_chat.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys +import json + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file +from flask_login import login_required, current_user + +# Import Role_Manager +from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ail_core +from lib.objects import abstract_subtype_object +from lib.objects import ail_objects +from lib.objects import Chats +from packages import Date + +# ============ BLUEPRINT ============ +objects_chat = Blueprint('objects_chat', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/chat')) + +# ============ VARIABLES ============ +bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] + +def create_json_response(data, status_code): + return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code + +# ============ FUNCTIONS ============ + +# ============= ROUTES ============== + + +@objects_chat.route("/objects/chat/messages", methods=['GET']) +@login_required +@login_read_only +def objects_dashboard_chat(): + chat = request.args.get('id') + subtype = request.args.get('subtype') + chat = Chats.Chat(chat, subtype) + if chat.exists(): + messages = chat.get_messages() + meta = chat.get_meta({'icon'}) + print(meta) + return render_template('ChatMessages.html', meta=meta, messages=messages, bootstrap_label=bootstrap_label) + else: + return abort(404) + + + diff --git a/var/www/blueprints/objects_subtypes.py b/var/www/blueprints/objects_subtypes.py index dc97ffa8..a41066a4 100644 --- a/var/www/blueprints/objects_subtypes.py +++ b/var/www/blueprints/objects_subtypes.py @@ -91,6 +91,12 @@ def subtypes_objects_dashboard(obj_type, f_request): # ============= ROUTES ============== +@objects_subtypes.route("/objects/chats", methods=['GET']) +@login_required +@login_read_only +def objects_dashboard_chat(): + return subtypes_objects_dashboard('chat', request) + @objects_subtypes.route("/objects/cryptocurrencies", methods=['GET']) @login_required @login_read_only diff --git a/var/www/templates/objects/chat/ChatMessages.html b/var/www/templates/objects/chat/ChatMessages.html new file mode 100644 index 00000000..b89a447a --- /dev/null +++ b/var/www/templates/objects/chat/ChatMessages.html @@ -0,0 +1,190 @@ + + + + + Chat Messages - AIL + + + + + + +{# #} + + + + + + + +{# + #} + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'sidebars/sidebar_objects.html' %} + +
+ +
+
+

{{ meta["id"] }} :

+
    +
  • +
    +
    + + + + + + + + + + + + + + + + + +
    Object subtypeFirst seenLast seenNb seen
    + + + + {{ meta["icon"]["icon"] }} + + + {{ meta["subtype"] }} + {{ meta['first_seen'] }}{{ meta['last_seen'] }}{{ meta['nb_seen'] }}
    +
    +
    +
    +
    +
    +
  • +
  • +
    +
    + Tags: + {% for tag in meta['tags'] %} + + {% endfor %} + +
    +
  • +
+ + {% with obj_type='chat', obj_id=meta['id'], obj_subtype=meta['subtype'] %} + {% include 'modals/investigations_register_obj.html' %} + {% endwith %} + + +
+
+ +
+
+ + {% for date in messages %} +

{{ date }}

+ {% for mess in messages[date] %} + +
+
+ {{ mess['username']['id'] }} +
{{ mess['hour'] }}
+
+
+
{{ mess['username']['id'] }}
+ {% if mess['reply_to'] %} +
+
{{ mess['reply_to']['username']['id'] }}
+
{{ mess['reply_to']['content'] }}
+ {% for tag in mess['reply_to']['tags'] %} + {{ tag }} + {% endfor %} +
{{ mess['reply_to']['date'] }}
+{#
#} +{# #} +{# #} +{#
#} +
+ {% endif %} +
{{ mess['content'] }}
+ {% for tag in mess['tags'] %} + {{ tag }} + {% endfor %} +
+ + +
+
+
+ + {% endfor %} +
+ {% endfor %} + +
+
+ +
+ +
+
+ + + + + +