chg: [Telegram module] refactor module + fix str format

This commit is contained in:
Terrtia 2022-01-19 16:20:18 +01:00
parent 9c561d4827
commit 3d8d18bbe1
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
6 changed files with 178 additions and 179 deletions

View file

@ -1,175 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Tools Module
============================
Search tools outpout
"""
from Helper import Process
from pubsublogger import publisher
import os
import re
import sys
import time
import redis
import signal
from urllib.parse import urlparse
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import telegram
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
# https://github.com/LonamiWebs/Telethon/wiki/Special-links
regex_telegram_link = r'(telegram\.me|t\.me|telegram\.dog|telesco\.pe)/([^\.\",\s]+)'
regex_tg_link = re.compile(r'tg://.+')
regex_username = re.compile(r'[0-9a-zA-z_]+')
regex_join_hash = re.compile(r'[0-9a-zA-z-]+')
max_execution_time = 60
def extract_data_from_telegram_url(item_id, item_date, base_url, url_path):
invite_code_found = False
#url = urlparse(url_path)
url_path = url_path.split('/')
# username len > 5, a-z A-Z _
if len(url_path) == 1:
username = url_path[0].lower()
username = regex_username.search(username)
if username:
username = username[0].replace('\\', '')
if len(username) > 5:
print('username: {}'.format(username))
telegram.save_item_correlation(username, item_id, item_date)
elif url_path[0] == 'joinchat':
invite_hash = regex_join_hash.search(url_path[1])
if invite_hash:
invite_hash = invite_hash[0]
telegram.save_telegram_invite_hash(invite_hash, item_id)
print('invite code: {}'.format(invite_hash))
invite_code_found = True
return invite_code_found
# # TODO:
# Add openmessafe
# Add passport ?
# Add confirmphone
# Add user
def extract_data_from_tg_url(item_id, item_date, tg_link):
invite_code_found = False
url = urlparse(tg_link)
# username len > 5, a-z A-Z _
if url.netloc == 'resolve' and len(url.query) > 7:
if url.query[:7] == 'domain=':
# remove domain=
username = url.query[7:]
username = regex_username.search(username)
if username:
username = username[0].replace('\\', '')
if len(username) > 5:
print('username: {}'.format(username))
telegram.save_item_correlation(username, item_id, item_date)
elif url.netloc == 'join' and len(url.query) > 7:
if url.query[:7] == 'invite=':
invite_hash = url.query[7:]
invite_hash = regex_join_hash.search(invite_hash)
if invite_hash:
invite_hash = invite_hash[0]
telegram.save_telegram_invite_hash(invite_hash, item_id)
print('invite code: {}'.format(invite_hash))
invite_code_found = True
elif url.netloc == 'login' and len(url.query) > 5:
login_code = url.query[5:]
print('login code: {}').format(login_code)
else:
print(url)
return invite_code_found
def search_telegram(item_id, item_date, item_content):
# telegram links
signal.alarm(max_execution_time)
try:
telegram_links = re.findall(regex_telegram_link, item_content)
except TimeoutException:
telegram_links = []
p.incr_module_timeout_statistic() # add encoder type
print ("{0} processing timeout".format(item_id))
else:
signal.alarm(0)
invite_code_found = False
for telegram_link in telegram_links:
res = extract_data_from_telegram_url(item_id, item_date, telegram_link[0], telegram_link[1])
if res:
invite_code_found = True
# tg links
signal.alarm(max_execution_time)
try:
tg_links = re.findall(regex_tg_link, item_content)
except TimeoutException:
tg_links = []
p.incr_module_timeout_statistic() # add encoder type
print ("{0} processing timeout".format(item_id))
else:
signal.alarm(0)
for tg_link in tg_links:
res = extract_data_from_tg_url(item_id, item_date, tg_link)
if res:
invite_code_found = True
if invite_code_found:
#tags
msg = 'infoleak:automatic-detection="telegram-invite-hash";{}'.format(item_id)
p.populate_set_out(msg, 'Tags')
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Telegram'
# # TODO: add duplicate
# Setup the I/O queues
p = Process(config_section)
# Sent to the logging a description of the module
publisher.info("Run Telegram module ")
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
item_id = p.get_from_set()
if item_id is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
# Do something with the message from the queue
item_content = Item.get_item_content(item_id)
item_date = Item.get_item_date(item_id)
search_telegram(item_id, item_date, item_content)

View file

@ -29,7 +29,8 @@ publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
def generate_redis_cache_key(module_name): def generate_redis_cache_key(module_name):
return '{}_extracted:{}'.format(module_name, str(uuid.uuid4())) new_uuid = str(uuid.uuid4())
return f'{module_name}_extracted:{new_uuid}'
def _regex_findall(redis_key, regex, item_content, r_set): def _regex_findall(redis_key, regex, item_content, r_set):
all_items = re.findall(regex, item_content) all_items = re.findall(regex, item_content)
@ -57,7 +58,7 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
if proc.is_alive(): if proc.is_alive():
proc.terminate() proc.terminate()
Statistics.incr_module_timeout_statistic(module_name) Statistics.incr_module_timeout_statistic(module_name)
err_mess = "{}: processing timeout: {}".format(module_name, item_id) err_mess = f"{module_name}: processing timeout: {item_id}"
print(err_mess) print(err_mess)
publisher.info(err_mess) publisher.info(err_mess)
return [] return []
@ -87,7 +88,7 @@ def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=
if proc.is_alive(): if proc.is_alive():
proc.terminate() proc.terminate()
Statistics.incr_module_timeout_statistic(module_name) Statistics.incr_module_timeout_statistic(module_name)
err_mess = "{}: processing timeout: {}".format(module_name, item_id) err_mess = f"{module_name}: processing timeout: {item_id}"
print(err_mess) print(err_mess)
publisher.info(err_mess) publisher.info(err_mess)
return None return None

View file

@ -2,8 +2,10 @@
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import os import os
import re
import sys import sys
import redis
from urllib.parse import urlparse
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader import ConfigLoader
@ -13,8 +15,70 @@ config_loader = ConfigLoader.ConfigLoader()
r_serv_crawler = config_loader.get_redis_conn("ARDB_Onion") r_serv_crawler = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None config_loader = None
REGEX_USERNAME = re.compile(r'[0-9a-zA-z_]+')
REGEX_JOIN_HASH = re.compile(r'[0-9a-zA-z-]+')
## ##
def save_item_correlation(username, item_id, item_date): def save_item_correlation(username, item_id, item_date):
Username.save_item_correlation('telegram', username, item_id, item_date) Username.save_item_correlation('telegram', username, item_id, item_date)
def save_telegram_invite_hash(invite_hash, item_id): def save_telegram_invite_hash(invite_hash, item_id):
r_serv_crawler.sadd('telegram:invite_code', '{};{}'.format(invite_hash, item_id)) r_serv_crawler.sadd('telegram:invite_code', '{};{}'.format(invite_hash, item_id))
def get_data_from_telegram_url(base_url, url_path):
dict_url = {}
url_path = url_path.split('/')
# username len > 5, a-z A-Z _
if len(url_path) == 1:
username = url_path[0].lower()
username = REGEX_USERNAME.search(username)
if username:
username = username[0].replace('\\', '')
if len(username) > 5:
dict_url['username'] = username
elif url_path[0] == 'joinchat':
invite_hash = REGEX_JOIN_HASH.search(url_path[1])
if invite_hash:
invite_hash = invite_hash[0]
dict_url['invite_hash'] = invite_hash
return dict_url
# # TODO:
# Add openmessafe
# Add passport ?
# Add confirmphone
# Add user
def get_data_from_tg_url(tg_link):
dict_url = {}
url = urlparse(tg_link)
# username len > 5, a-z A-Z _
if url.netloc == 'resolve' and len(url.query) > 7:
if url.query[:7] == 'domain=':
# remove domain=
username = url.query[7:]
username = REGEX_USERNAME.search(username)
if username:
username = username[0].replace('\\', '')
if len(username) > 5:
dict_url['username'] = username
elif url.netloc == 'join' and len(url.query) > 7:
if url.query[:7] == 'invite=':
invite_hash = url.query[7:]
invite_hash = REGEX_JOIN_HASH.search(invite_hash)
if invite_hash:
invite_hash = invite_hash[0]
dict_url['invite_hash'] = invite_hash
elif url.netloc == 'login' and len(url.query) > 5:
login_code = url.query[5:]
if login_code:
dict_url['login_code'] = login_code
else:
# # TODO: log invalid URL ???????
print(url)
return dict_url

86
bin/modules/Telegram.py Executable file
View file

@ -0,0 +1,86 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Telegram Module
============================
Search telegram username,channel and invite code
"""
import os
import re
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib import regex_helper
from lib import telegram
class Telegram(AbstractModule):
"""Telegram module for AIL framework"""
def __init__(self):
super(Telegram, self).__init__()
# https://github.com/LonamiWebs/Telethon/wiki/Special-links
self.re_telegram_link = r'(telegram\.me|t\.me|telegram\.dog|telesco\.pe)/([^\.\",\s]+)'
self.re_tg_link = r'tg://.+'
re.compile(self.re_telegram_link)
re.compile(self.re_tg_link)
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
self.max_execution_time = 60
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message, r_result=False):
# messsage = item_id
item = Item(message)
item_content = item.get_content()
item_date = item.get_date()
invite_code_found = False
# extract telegram links
telegram_links = self.regex_findall(self.re_telegram_link, item.get_id(), item_content)
for telegram_link_tuple in telegram_links:
base_url, url_path = telegram_link_tuple[2:-2].split("', '", 1)
dict_url = telegram.get_data_from_telegram_url(base_url, url_path)
if dict_url.get('username'):
telegram.save_item_correlation(dict_url['username'], item.get_id(), item_date)
print(f'username: {dict_url["username"]}')
if dict_url.get('invite_hash'):
telegram.save_telegram_invite_hash(dict_url['invite_hash'], item.get_id())
print(f'invite code: {dict_url["invite_hash"]}')
invite_code_found = True
# extract tg links
tg_links = self.regex_findall(self.re_tg_link, item.get_id(), item_content)
for tg_link in tg_links:
dict_url = telegram.get_data_from_tg_url(tg_link)
if dict_url.get('username'):
telegram.save_item_correlation(dict_url['username'], item.get_id(), item_date)
print(f'username: {dict_url["username"]}')
if dict_url.get('invite_hash'):
telegram.save_telegram_invite_hash(dict_url['invite_hash'], item.get_id())
print(f'invite code: {dict_url["invite_hash"]}')
invite_code_found = True
if dict_url.get('login_code'):
print(f'login code: {dict_url["login_code"]}')
# CREATE TAG
if invite_code_found:
#tags
msg = f'infoleak:automatic-detection="telegram-invite-hash";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags')
if __name__ == "__main__":
module = Telegram()
module.run()

View file

@ -15,6 +15,7 @@ import traceback
################################## ##################################
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
from lib import regex_helper
class AbstractModule(ABC): class AbstractModule(ABC):
""" """
@ -74,6 +75,17 @@ class AbstractModule(ABC):
self.process.populate_set_out(message, queue_name) self.process.populate_set_out(message, queue_name)
# add to new set_module # add to new set_module
def regex_findall(self, regex, id, content):
"""
regex findall helper (force timeout)
:param regex: compiled regex
:param id: object id
:param content: object content
ex: send_to_queue(item_id, 'Global')
"""
return regex_helper.regex_findall(self.module_name, self.redis_cache_key, regex, id, content, max_time=self.max_execution_time)
def run(self): def run(self):
""" """
Run Module endless process Run Module endless process

View file

@ -19,6 +19,7 @@ from modules.DomClassifier import DomClassifier
from modules.Global import Global from modules.Global import Global
from modules.Keys import Keys from modules.Keys import Keys
from modules.Onion import Onion from modules.Onion import Onion
from modules.Telegram import Telegram
# project packages # project packages
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
@ -169,5 +170,15 @@ class Test_Module_Onion(unittest.TestCase):
# # TODO: check warning logs # # TODO: check warning logs
pass pass
class Test_Module_Telegram(unittest.TestCase):
def setUp(self):
self.module_obj = Telegram()
def test_module(self):
item_id = 'tests/2021/01/01/keys.gz'
# # TODO: check results
result = self.module_obj.compute(item_id)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()