2023-02-03 15:13:57 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
|
|
|
The Telegram Feeder Importer Module
|
|
|
|
================
|
|
|
|
|
|
|
|
Process Telegram JSON
|
|
|
|
|
|
|
|
"""
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import datetime
|
|
|
|
|
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
|
|
|
from importer.feeders.Default import DefaultFeeder
|
2023-08-18 09:05:21 +00:00
|
|
|
from lib.ConfigLoader import ConfigLoader
|
2023-10-05 14:24:28 +00:00
|
|
|
from lib.objects import ail_objects
|
2023-08-18 09:05:21 +00:00
|
|
|
from lib.objects.Chats import Chat
|
|
|
|
from lib.objects import Messages
|
|
|
|
from lib.objects import UsersAccount
|
2023-02-03 15:13:57 +00:00
|
|
|
from lib.objects.Usernames import Username
|
|
|
|
|
2023-08-18 09:05:21 +00:00
|
|
|
import base64
|
|
|
|
import io
|
|
|
|
import gzip
|
2023-10-05 14:24:28 +00:00
|
|
|
|
2023-10-11 10:06:01 +00:00
|
|
|
# TODO remove compression ???
|
2023-08-18 09:05:21 +00:00
|
|
|
def gunzip_bytes_obj(bytes_obj):
|
|
|
|
gunzipped_bytes_obj = None
|
|
|
|
try:
|
|
|
|
in_ = io.BytesIO()
|
|
|
|
in_.write(bytes_obj)
|
|
|
|
in_.seek(0)
|
|
|
|
|
|
|
|
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
|
|
|
|
gunzipped_bytes_obj = fo.read()
|
|
|
|
except Exception as e:
|
|
|
|
print(f'Global; Invalid Gzip file: {e}')
|
|
|
|
|
|
|
|
return gunzipped_bytes_obj
|
|
|
|
|
2023-02-03 15:13:57 +00:00
|
|
|
class TelegramFeeder(DefaultFeeder):
|
|
|
|
|
|
|
|
def __init__(self, json_data):
|
|
|
|
super().__init__(json_data)
|
|
|
|
self.name = 'telegram'
|
|
|
|
|
2023-10-05 14:24:28 +00:00
|
|
|
def get_obj(self): # TODO handle others objects -> images, pdf, ...
|
2023-08-18 09:05:21 +00:00
|
|
|
# Get message date
|
|
|
|
timestamp = self.json_data['meta']['date']['timestamp'] # TODO CREATE DEFAULT TIMESTAMP
|
|
|
|
# if self.json_data['meta'].get('date'):
|
|
|
|
# date = datetime.datetime.fromtimestamp( self.json_data['meta']['date']['timestamp'])
|
|
|
|
# date = date.strftime('%Y/%m/%d')
|
|
|
|
# else:
|
|
|
|
# date = datetime.date.today().strftime("%Y/%m/%d")
|
|
|
|
chat_id = str(self.json_data['meta']['chat']['id'])
|
2023-08-02 13:49:12 +00:00
|
|
|
message_id = str(self.json_data['meta']['id'])
|
2023-10-05 14:24:28 +00:00
|
|
|
obj_id = Messages.create_obj_id('telegram', chat_id, message_id, timestamp)
|
|
|
|
obj_id = f'message:telegram:{obj_id}'
|
|
|
|
self.obj = ail_objects.get_obj_from_global_id(obj_id)
|
|
|
|
return self.obj
|
2023-02-03 15:13:57 +00:00
|
|
|
|
|
|
|
def process_meta(self):
|
|
|
|
"""
|
|
|
|
Process JSON meta field.
|
|
|
|
"""
|
2023-08-02 13:49:12 +00:00
|
|
|
# message chat
|
|
|
|
meta = self.json_data['meta']
|
2023-08-18 09:05:21 +00:00
|
|
|
mess_id = self.json_data['meta']['id']
|
|
|
|
if meta.get('reply_to'):
|
2023-10-04 12:40:13 +00:00
|
|
|
reply_to_id = int(meta['reply_to'])
|
2023-08-18 09:05:21 +00:00
|
|
|
else:
|
|
|
|
reply_to_id = None
|
|
|
|
|
|
|
|
timestamp = meta['date']['timestamp']
|
|
|
|
date = datetime.datetime.fromtimestamp(timestamp)
|
|
|
|
date = date.strftime('%Y%m%d')
|
|
|
|
|
2023-08-28 14:29:38 +00:00
|
|
|
if self.json_data.get('translation'):
|
|
|
|
translation = self.json_data['translation']
|
|
|
|
else:
|
|
|
|
translation = None
|
|
|
|
decoded = base64.standard_b64decode(self.json_data['data'])
|
|
|
|
content = gunzip_bytes_obj(decoded)
|
2023-10-05 14:24:28 +00:00
|
|
|
message = Messages.create(self.obj.id, content, translation=translation)
|
2023-08-28 14:29:38 +00:00
|
|
|
|
2023-08-02 13:49:12 +00:00
|
|
|
if meta.get('chat'):
|
2023-08-18 09:05:21 +00:00
|
|
|
chat = Chat(meta['chat']['id'], 'telegram')
|
|
|
|
|
2023-08-28 14:29:38 +00:00
|
|
|
if meta['chat'].get('username'):
|
|
|
|
chat_username = Username(meta['chat']['username'], 'telegram')
|
|
|
|
chat.update_username_timeline(chat_username.get_global_id(), timestamp)
|
2023-08-18 09:05:21 +00:00
|
|
|
|
|
|
|
# Chat---Message
|
2023-08-28 14:29:38 +00:00
|
|
|
chat.add(date)
|
|
|
|
chat.add_message(message.get_global_id(), timestamp, mess_id, reply_id=reply_to_id)
|
2023-08-18 09:05:21 +00:00
|
|
|
else:
|
|
|
|
chat = None
|
|
|
|
|
2023-08-02 13:49:12 +00:00
|
|
|
# message sender
|
2023-08-23 14:13:20 +00:00
|
|
|
if meta.get('sender'): # TODO handle message channel forward - check if is user
|
2023-08-18 09:05:21 +00:00
|
|
|
user_id = meta['sender']['id']
|
|
|
|
user_account = UsersAccount.UserAccount(user_id, 'telegram')
|
|
|
|
# UserAccount---Message
|
2023-08-28 14:29:38 +00:00
|
|
|
user_account.add(date, obj=message)
|
2023-08-18 09:05:21 +00:00
|
|
|
# UserAccount---Chat
|
|
|
|
user_account.add_correlation(chat.type, chat.get_subtype(r_str=True), chat.id)
|
|
|
|
|
|
|
|
if meta['sender'].get('firstname'):
|
|
|
|
user_account.set_first_name(meta['sender']['firstname'])
|
|
|
|
if meta['sender'].get('lastname'):
|
|
|
|
user_account.set_last_name(meta['sender']['lastname'])
|
|
|
|
if meta['sender'].get('phone'):
|
|
|
|
user_account.set_phone(meta['sender']['phone'])
|
|
|
|
|
2023-08-02 13:49:12 +00:00
|
|
|
if meta['sender'].get('username'):
|
2023-08-18 09:05:21 +00:00
|
|
|
username = Username(meta['sender']['username'], 'telegram')
|
2023-08-28 14:29:38 +00:00
|
|
|
# TODO timeline or/and correlation ????
|
2023-08-18 09:05:21 +00:00
|
|
|
user_account.add_correlation(username.type, username.get_subtype(r_str=True), username.id)
|
2023-08-28 14:29:38 +00:00
|
|
|
user_account.update_username_timeline(username.get_global_id(), timestamp)
|
2023-08-18 09:05:21 +00:00
|
|
|
|
|
|
|
# Username---Message
|
2023-08-28 14:29:38 +00:00
|
|
|
username.add(date) # TODO # correlation message ???
|
2023-08-23 14:13:20 +00:00
|
|
|
|
2023-08-28 14:29:38 +00:00
|
|
|
# if chat: # TODO Chat---Username correlation ???
|
|
|
|
# # Chat---Username
|
|
|
|
# chat.add_correlation(username.type, username.get_subtype(r_str=True), username.id)
|
2023-08-18 09:05:21 +00:00
|
|
|
|
|
|
|
# if meta.get('fwd_from'):
|
|
|
|
# if meta['fwd_from'].get('post_author') # user first name
|
|
|
|
|
|
|
|
# TODO reply threads ????
|
2023-08-28 14:29:38 +00:00
|
|
|
# message edit ????
|
2023-08-18 09:05:21 +00:00
|
|
|
|
2023-02-03 15:13:57 +00:00
|
|
|
return None
|