mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
chg: [Telegram module] refactor module + fix str format
This commit is contained in:
parent
9c561d4827
commit
3d8d18bbe1
6 changed files with 178 additions and 179 deletions
175
bin/Telegram.py
175
bin/Telegram.py
|
@ -1,175 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
"""
|
||||
Tools Module
|
||||
============================
|
||||
|
||||
Search tools outpout
|
||||
|
||||
"""
|
||||
|
||||
from Helper import Process
|
||||
from pubsublogger import publisher
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import redis
|
||||
import signal
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
|
||||
import Item
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
import telegram
|
||||
|
||||
class TimeoutException(Exception):
|
||||
pass
|
||||
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutException
|
||||
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
|
||||
# https://github.com/LonamiWebs/Telethon/wiki/Special-links
|
||||
regex_telegram_link = r'(telegram\.me|t\.me|telegram\.dog|telesco\.pe)/([^\.\",\s]+)'
|
||||
regex_tg_link = re.compile(r'tg://.+')
|
||||
|
||||
regex_username = re.compile(r'[0-9a-zA-z_]+')
|
||||
regex_join_hash = re.compile(r'[0-9a-zA-z-]+')
|
||||
|
||||
max_execution_time = 60
|
||||
|
||||
def extract_data_from_telegram_url(item_id, item_date, base_url, url_path):
|
||||
invite_code_found = False
|
||||
|
||||
#url = urlparse(url_path)
|
||||
url_path = url_path.split('/')
|
||||
# username len > 5, a-z A-Z _
|
||||
if len(url_path) == 1:
|
||||
username = url_path[0].lower()
|
||||
username = regex_username.search(username)
|
||||
if username:
|
||||
username = username[0].replace('\\', '')
|
||||
if len(username) > 5:
|
||||
print('username: {}'.format(username))
|
||||
telegram.save_item_correlation(username, item_id, item_date)
|
||||
elif url_path[0] == 'joinchat':
|
||||
invite_hash = regex_join_hash.search(url_path[1])
|
||||
if invite_hash:
|
||||
invite_hash = invite_hash[0]
|
||||
telegram.save_telegram_invite_hash(invite_hash, item_id)
|
||||
print('invite code: {}'.format(invite_hash))
|
||||
invite_code_found = True
|
||||
return invite_code_found
|
||||
|
||||
|
||||
# # TODO:
|
||||
# Add openmessafe
|
||||
# Add passport ?
|
||||
# Add confirmphone
|
||||
# Add user
|
||||
def extract_data_from_tg_url(item_id, item_date, tg_link):
|
||||
invite_code_found = False
|
||||
|
||||
url = urlparse(tg_link)
|
||||
# username len > 5, a-z A-Z _
|
||||
if url.netloc == 'resolve' and len(url.query) > 7:
|
||||
if url.query[:7] == 'domain=':
|
||||
# remove domain=
|
||||
username = url.query[7:]
|
||||
username = regex_username.search(username)
|
||||
if username:
|
||||
username = username[0].replace('\\', '')
|
||||
if len(username) > 5:
|
||||
print('username: {}'.format(username))
|
||||
telegram.save_item_correlation(username, item_id, item_date)
|
||||
elif url.netloc == 'join' and len(url.query) > 7:
|
||||
if url.query[:7] == 'invite=':
|
||||
invite_hash = url.query[7:]
|
||||
invite_hash = regex_join_hash.search(invite_hash)
|
||||
if invite_hash:
|
||||
invite_hash = invite_hash[0]
|
||||
telegram.save_telegram_invite_hash(invite_hash, item_id)
|
||||
print('invite code: {}'.format(invite_hash))
|
||||
invite_code_found = True
|
||||
|
||||
elif url.netloc == 'login' and len(url.query) > 5:
|
||||
login_code = url.query[5:]
|
||||
print('login code: {}').format(login_code)
|
||||
|
||||
else:
|
||||
print(url)
|
||||
|
||||
return invite_code_found
|
||||
|
||||
def search_telegram(item_id, item_date, item_content):
|
||||
# telegram links
|
||||
signal.alarm(max_execution_time)
|
||||
try:
|
||||
telegram_links = re.findall(regex_telegram_link, item_content)
|
||||
except TimeoutException:
|
||||
telegram_links = []
|
||||
p.incr_module_timeout_statistic() # add encoder type
|
||||
print ("{0} processing timeout".format(item_id))
|
||||
else:
|
||||
signal.alarm(0)
|
||||
|
||||
invite_code_found = False
|
||||
|
||||
for telegram_link in telegram_links:
|
||||
res = extract_data_from_telegram_url(item_id, item_date, telegram_link[0], telegram_link[1])
|
||||
if res:
|
||||
invite_code_found = True
|
||||
|
||||
# tg links
|
||||
signal.alarm(max_execution_time)
|
||||
try:
|
||||
tg_links = re.findall(regex_tg_link, item_content)
|
||||
except TimeoutException:
|
||||
tg_links = []
|
||||
p.incr_module_timeout_statistic() # add encoder type
|
||||
print ("{0} processing timeout".format(item_id))
|
||||
else:
|
||||
signal.alarm(0)
|
||||
|
||||
for tg_link in tg_links:
|
||||
res = extract_data_from_tg_url(item_id, item_date, tg_link)
|
||||
if res:
|
||||
invite_code_found = True
|
||||
|
||||
if invite_code_found:
|
||||
#tags
|
||||
msg = 'infoleak:automatic-detection="telegram-invite-hash";{}'.format(item_id)
|
||||
p.populate_set_out(msg, 'Tags')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
publisher.port = 6380
|
||||
publisher.channel = "Script"
|
||||
|
||||
config_section = 'Telegram'
|
||||
# # TODO: add duplicate
|
||||
|
||||
# Setup the I/O queues
|
||||
p = Process(config_section)
|
||||
|
||||
# Sent to the logging a description of the module
|
||||
publisher.info("Run Telegram module ")
|
||||
|
||||
# Endless loop getting messages from the input queue
|
||||
while True:
|
||||
# Get one message from the input queue
|
||||
item_id = p.get_from_set()
|
||||
if item_id is None:
|
||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
# Do something with the message from the queue
|
||||
item_content = Item.get_item_content(item_id)
|
||||
item_date = Item.get_item_date(item_id)
|
||||
search_telegram(item_id, item_date, item_content)
|
|
@ -29,7 +29,8 @@ publisher.port = 6380
|
|||
publisher.channel = "Script"
|
||||
|
||||
def generate_redis_cache_key(module_name):
|
||||
return '{}_extracted:{}'.format(module_name, str(uuid.uuid4()))
|
||||
new_uuid = str(uuid.uuid4())
|
||||
return f'{module_name}_extracted:{new_uuid}'
|
||||
|
||||
def _regex_findall(redis_key, regex, item_content, r_set):
|
||||
all_items = re.findall(regex, item_content)
|
||||
|
@ -57,7 +58,7 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
|
|||
if proc.is_alive():
|
||||
proc.terminate()
|
||||
Statistics.incr_module_timeout_statistic(module_name)
|
||||
err_mess = "{}: processing timeout: {}".format(module_name, item_id)
|
||||
err_mess = f"{module_name}: processing timeout: {item_id}"
|
||||
print(err_mess)
|
||||
publisher.info(err_mess)
|
||||
return []
|
||||
|
@ -87,7 +88,7 @@ def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=
|
|||
if proc.is_alive():
|
||||
proc.terminate()
|
||||
Statistics.incr_module_timeout_statistic(module_name)
|
||||
err_mess = "{}: processing timeout: {}".format(module_name, item_id)
|
||||
err_mess = f"{module_name}: processing timeout: {item_id}"
|
||||
print(err_mess)
|
||||
publisher.info(err_mess)
|
||||
return None
|
||||
|
|
|
@ -2,8 +2,10 @@
|
|||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import redis
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
|
@ -13,8 +15,70 @@ config_loader = ConfigLoader.ConfigLoader()
|
|||
r_serv_crawler = config_loader.get_redis_conn("ARDB_Onion")
|
||||
config_loader = None
|
||||
|
||||
REGEX_USERNAME = re.compile(r'[0-9a-zA-z_]+')
|
||||
REGEX_JOIN_HASH = re.compile(r'[0-9a-zA-z-]+')
|
||||
|
||||
## ##
|
||||
|
||||
def save_item_correlation(username, item_id, item_date):
|
||||
Username.save_item_correlation('telegram', username, item_id, item_date)
|
||||
|
||||
def save_telegram_invite_hash(invite_hash, item_id):
|
||||
r_serv_crawler.sadd('telegram:invite_code', '{};{}'.format(invite_hash, item_id))
|
||||
|
||||
def get_data_from_telegram_url(base_url, url_path):
|
||||
dict_url = {}
|
||||
url_path = url_path.split('/')
|
||||
|
||||
# username len > 5, a-z A-Z _
|
||||
if len(url_path) == 1:
|
||||
username = url_path[0].lower()
|
||||
username = REGEX_USERNAME.search(username)
|
||||
if username:
|
||||
username = username[0].replace('\\', '')
|
||||
if len(username) > 5:
|
||||
dict_url['username'] = username
|
||||
elif url_path[0] == 'joinchat':
|
||||
invite_hash = REGEX_JOIN_HASH.search(url_path[1])
|
||||
if invite_hash:
|
||||
invite_hash = invite_hash[0]
|
||||
dict_url['invite_hash'] = invite_hash
|
||||
return dict_url
|
||||
|
||||
# # TODO:
|
||||
# Add openmessafe
|
||||
# Add passport ?
|
||||
# Add confirmphone
|
||||
# Add user
|
||||
def get_data_from_tg_url(tg_link):
|
||||
dict_url = {}
|
||||
|
||||
url = urlparse(tg_link)
|
||||
# username len > 5, a-z A-Z _
|
||||
if url.netloc == 'resolve' and len(url.query) > 7:
|
||||
if url.query[:7] == 'domain=':
|
||||
# remove domain=
|
||||
username = url.query[7:]
|
||||
username = REGEX_USERNAME.search(username)
|
||||
if username:
|
||||
username = username[0].replace('\\', '')
|
||||
if len(username) > 5:
|
||||
dict_url['username'] = username
|
||||
|
||||
elif url.netloc == 'join' and len(url.query) > 7:
|
||||
if url.query[:7] == 'invite=':
|
||||
invite_hash = url.query[7:]
|
||||
invite_hash = REGEX_JOIN_HASH.search(invite_hash)
|
||||
if invite_hash:
|
||||
invite_hash = invite_hash[0]
|
||||
dict_url['invite_hash'] = invite_hash
|
||||
|
||||
elif url.netloc == 'login' and len(url.query) > 5:
|
||||
login_code = url.query[5:]
|
||||
if login_code:
|
||||
dict_url['login_code'] = login_code
|
||||
else:
|
||||
# # TODO: log invalid URL ???????
|
||||
print(url)
|
||||
|
||||
return dict_url
|
||||
|
|
86
bin/modules/Telegram.py
Executable file
86
bin/modules/Telegram.py
Executable file
|
@ -0,0 +1,86 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
"""
|
||||
Telegram Module
|
||||
============================
|
||||
|
||||
Search telegram username,channel and invite code
|
||||
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib import regex_helper
|
||||
from lib import telegram
|
||||
|
||||
class Telegram(AbstractModule):
|
||||
"""Telegram module for AIL framework"""
|
||||
|
||||
def __init__(self):
|
||||
super(Telegram, self).__init__()
|
||||
|
||||
# https://github.com/LonamiWebs/Telethon/wiki/Special-links
|
||||
self.re_telegram_link = r'(telegram\.me|t\.me|telegram\.dog|telesco\.pe)/([^\.\",\s]+)'
|
||||
self.re_tg_link = r'tg://.+'
|
||||
|
||||
re.compile(self.re_telegram_link)
|
||||
re.compile(self.re_tg_link)
|
||||
|
||||
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
|
||||
self.max_execution_time = 60
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||
|
||||
def compute(self, message, r_result=False):
|
||||
# messsage = item_id
|
||||
item = Item(message)
|
||||
item_content = item.get_content()
|
||||
item_date = item.get_date()
|
||||
|
||||
invite_code_found = False
|
||||
|
||||
# extract telegram links
|
||||
telegram_links = self.regex_findall(self.re_telegram_link, item.get_id(), item_content)
|
||||
for telegram_link_tuple in telegram_links:
|
||||
base_url, url_path = telegram_link_tuple[2:-2].split("', '", 1)
|
||||
dict_url = telegram.get_data_from_telegram_url(base_url, url_path)
|
||||
if dict_url.get('username'):
|
||||
telegram.save_item_correlation(dict_url['username'], item.get_id(), item_date)
|
||||
print(f'username: {dict_url["username"]}')
|
||||
if dict_url.get('invite_hash'):
|
||||
telegram.save_telegram_invite_hash(dict_url['invite_hash'], item.get_id())
|
||||
print(f'invite code: {dict_url["invite_hash"]}')
|
||||
invite_code_found = True
|
||||
|
||||
# extract tg links
|
||||
tg_links = self.regex_findall(self.re_tg_link, item.get_id(), item_content)
|
||||
for tg_link in tg_links:
|
||||
dict_url = telegram.get_data_from_tg_url(tg_link)
|
||||
if dict_url.get('username'):
|
||||
telegram.save_item_correlation(dict_url['username'], item.get_id(), item_date)
|
||||
print(f'username: {dict_url["username"]}')
|
||||
if dict_url.get('invite_hash'):
|
||||
telegram.save_telegram_invite_hash(dict_url['invite_hash'], item.get_id())
|
||||
print(f'invite code: {dict_url["invite_hash"]}')
|
||||
invite_code_found = True
|
||||
if dict_url.get('login_code'):
|
||||
print(f'login code: {dict_url["login_code"]}')
|
||||
|
||||
# CREATE TAG
|
||||
if invite_code_found:
|
||||
#tags
|
||||
msg = f'infoleak:automatic-detection="telegram-invite-hash";{item.get_id()}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
module = Telegram()
|
||||
module.run()
|
|
@ -15,6 +15,7 @@ import traceback
|
|||
##################################
|
||||
from pubsublogger import publisher
|
||||
from Helper import Process
|
||||
from lib import regex_helper
|
||||
|
||||
class AbstractModule(ABC):
|
||||
"""
|
||||
|
@ -74,6 +75,17 @@ class AbstractModule(ABC):
|
|||
self.process.populate_set_out(message, queue_name)
|
||||
# add to new set_module
|
||||
|
||||
def regex_findall(self, regex, id, content):
|
||||
"""
|
||||
regex findall helper (force timeout)
|
||||
:param regex: compiled regex
|
||||
:param id: object id
|
||||
:param content: object content
|
||||
|
||||
ex: send_to_queue(item_id, 'Global')
|
||||
"""
|
||||
return regex_helper.regex_findall(self.module_name, self.redis_cache_key, regex, id, content, max_time=self.max_execution_time)
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Run Module endless process
|
||||
|
|
|
@ -19,6 +19,7 @@ from modules.DomClassifier import DomClassifier
|
|||
from modules.Global import Global
|
||||
from modules.Keys import Keys
|
||||
from modules.Onion import Onion
|
||||
from modules.Telegram import Telegram
|
||||
|
||||
# project packages
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
|
@ -169,5 +170,15 @@ class Test_Module_Onion(unittest.TestCase):
|
|||
# # TODO: check warning logs
|
||||
pass
|
||||
|
||||
class Test_Module_Telegram(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = Telegram()
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/keys.gz'
|
||||
# # TODO: check results
|
||||
result = self.module_obj.compute(item_id)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
Loading…
Reference in a new issue