mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
chg: [Phone module] Filter Invalid Phone numbers + UI Show extracted
This commit is contained in:
parent
7a52aec884
commit
353b290899
6 changed files with 101 additions and 36 deletions
|
@ -249,6 +249,8 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "PgpDump" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./PgpDump.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Telegram" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Telegram.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Tools" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Tools.py; read x"
|
||||
|
@ -290,8 +292,6 @@ function launching_scripts {
|
|||
##################################
|
||||
# DISABLED MODULES #
|
||||
##################################
|
||||
# screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x"
|
||||
# sleep 0.1
|
||||
# screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x"
|
||||
# sleep 0.1
|
||||
# screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x"
|
||||
|
|
|
@ -25,6 +25,7 @@ from modules.CreditCards import CreditCards
|
|||
from modules.Iban import Iban
|
||||
from modules.Mail import Mail
|
||||
from modules.Onion import Onion
|
||||
from modules.Phone import Phone
|
||||
from modules.Tools import Tools
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
|
@ -40,6 +41,7 @@ MODULES = {
|
|||
'infoleak:automatic-detection="iban"': Iban(queue=False),
|
||||
'infoleak:automatic-detection="mail"': Mail(queue=False),
|
||||
'infoleak:automatic-detection="onion"': Onion(queue=False),
|
||||
'infoleak:automatic-detection="phone-number"': Phone(queue=False),
|
||||
# APIkey ???
|
||||
# Credentials
|
||||
# Zerobins
|
||||
|
|
|
@ -7,14 +7,13 @@ Regex Helper
|
|||
|
||||
import os
|
||||
import logging.config
|
||||
import phonenumbers
|
||||
import re
|
||||
import sys
|
||||
import uuid
|
||||
|
||||
from multiprocessing import Process as Proc
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
|
@ -65,7 +64,6 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
|
|||
proc.terminate()
|
||||
# Statistics.incr_module_timeout_statistic(module_name)
|
||||
err_mess = f"{module_name}: processing timeout: {item_id}"
|
||||
print(err_mess)
|
||||
logger.info(err_mess)
|
||||
return []
|
||||
else:
|
||||
|
@ -99,7 +97,6 @@ def regex_finditer(r_key, regex, item_id, content, max_time=30):
|
|||
proc.terminate()
|
||||
# Statistics.incr_module_timeout_statistic(r_key)
|
||||
err_mess = f"{r_key}: processing timeout: {item_id}"
|
||||
print(err_mess)
|
||||
logger.info(err_mess)
|
||||
return []
|
||||
else:
|
||||
|
@ -130,7 +127,6 @@ def regex_search(r_key, regex, item_id, content, max_time=30):
|
|||
proc.terminate()
|
||||
# Statistics.incr_module_timeout_statistic(r_key)
|
||||
err_mess = f"{r_key}: processing timeout: {item_id}"
|
||||
print(err_mess)
|
||||
logger.info(err_mess)
|
||||
return False
|
||||
else:
|
||||
|
@ -144,3 +140,40 @@ def regex_search(r_key, regex, item_id, content, max_time=30):
|
|||
print("Caught KeyboardInterrupt, terminating regex worker")
|
||||
proc.terminate()
|
||||
sys.exit(0)
|
||||
|
||||
## Phone Regexs ##
|
||||
def _regex_phone_iter(r_key, country_code, content):
|
||||
iterator = phonenumbers.PhoneNumberMatcher(content, country_code)
|
||||
for match in iterator:
|
||||
value = match.raw_string
|
||||
# PhoneNumberFormat.E164
|
||||
# value = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.INTERNATIONAL)
|
||||
start = match.start
|
||||
end = match.end
|
||||
r_serv_cache.rpush(r_key, f'{start}:{end}:{value}')
|
||||
r_serv_cache.expire(r_key, 360)
|
||||
|
||||
def regex_phone_iter(r_key, country_code, item_id, content, max_time=30):
|
||||
proc = Proc(target=_regex_phone_iter, args=(r_key, country_code, content))
|
||||
try:
|
||||
proc.start()
|
||||
proc.join(max_time)
|
||||
if proc.is_alive():
|
||||
proc.terminate()
|
||||
# Statistics.incr_module_timeout_statistic(r_key)
|
||||
err_mess = f"{r_key}: processing timeout: {item_id}"
|
||||
logger.info(err_mess)
|
||||
return []
|
||||
else:
|
||||
res = r_serv_cache.lrange(r_key, 0, -1)
|
||||
r_serv_cache.delete(r_key)
|
||||
proc.terminate()
|
||||
all_match = []
|
||||
for match in res:
|
||||
start, end, value = match.split(':', 2)
|
||||
all_match.append((int(start), int(end), value))
|
||||
return all_match
|
||||
except KeyboardInterrupt:
|
||||
print("Caught KeyboardInterrupt, terminating regex worker")
|
||||
proc.terminate()
|
||||
sys.exit(0)
|
|
@ -15,7 +15,6 @@ It apply phone number regexes on item content and warn if above a threshold.
|
|||
# Import External packages
|
||||
##################################
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import phonenumbers
|
||||
|
||||
|
@ -34,44 +33,65 @@ class Phone(AbstractModule):
|
|||
|
||||
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
|
||||
# reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
|
||||
REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
||||
# REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
||||
|
||||
def __init__(self):
|
||||
super(Phone, self).__init__()
|
||||
def __init__(self, queue=True):
|
||||
super(Phone, self).__init__(queue=queue)
|
||||
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
extracted = []
|
||||
phones = self.regex_phone_iter('US', obj_id, content)
|
||||
for phone in phones:
|
||||
extracted.append([phone[0], phone[1], phone[2], f'tag:{tag}'])
|
||||
return extracted
|
||||
|
||||
def compute(self, message):
|
||||
item = Item(message)
|
||||
content = item.get_content()
|
||||
# List of the regex results in the Item, may be null
|
||||
results = self.REG_PHONE.findall(content)
|
||||
|
||||
# If the list is greater than 4, we consider the Item may contain a list of phone numbers
|
||||
if len(results) > 4:
|
||||
self.logger.debug(results)
|
||||
self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)')
|
||||
# TODO use language detection to choose the country code ?
|
||||
results = self.regex_phone_iter('US', item.id, content)
|
||||
for phone in results:
|
||||
print(phone[2])
|
||||
|
||||
if results:
|
||||
# TAGS
|
||||
msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}'
|
||||
self.add_message_to_queue(msg, 'Tags')
|
||||
|
||||
stats = {}
|
||||
for phone_number in results:
|
||||
try:
|
||||
x = phonenumbers.parse(phone_number, None)
|
||||
country_code = x.country_code
|
||||
if stats.get(country_code) is None:
|
||||
stats[country_code] = 1
|
||||
else:
|
||||
stats[country_code] = stats[country_code] + 1
|
||||
except:
|
||||
pass
|
||||
for country_code in stats:
|
||||
if stats[country_code] > 4:
|
||||
self.redis_logger.warning(f'{item.get_id()} contains Phone numbers with country code {country_code}')
|
||||
self.redis_logger.warning(f'{item.get_id()} contains {len(phone)} Phone numbers')
|
||||
|
||||
# # List of the regex results in the Item, may be null
|
||||
# results = self.REG_PHONE.findall(content)
|
||||
#
|
||||
# # If the list is greater than 4, we consider the Item may contain a list of phone numbers
|
||||
# if len(results) > 4:
|
||||
# self.logger.debug(results)
|
||||
# self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)')
|
||||
#
|
||||
# msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}'
|
||||
# self.add_message_to_queue(msg, 'Tags')
|
||||
#
|
||||
# stats = {}
|
||||
# for phone_number in results:
|
||||
# try:
|
||||
# x = phonenumbers.parse(phone_number, None)
|
||||
# country_code = x.country_code
|
||||
# if stats.get(country_code) is None:
|
||||
# stats[country_code] = 1
|
||||
# else:
|
||||
# stats[country_code] = stats[country_code] + 1
|
||||
# except:
|
||||
# pass
|
||||
# for country_code in stats:
|
||||
# if stats[country_code] > 4:
|
||||
# self.redis_logger.warning(f'{item.get_id()} contains Phone numbers with country code {country_code}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
module = Phone()
|
||||
module.run()
|
||||
# module.run()
|
||||
module.compute('crawled/2023/02/21/circl.luc90be694-a559-4d77-bfa4-9c54ea8bc2f7')
|
||||
|
|
|
@ -110,6 +110,17 @@ class AbstractModule(ABC):
|
|||
return regex_helper.regex_findall(self.module_name, self.r_cache_key, regex, obj_id, content,
|
||||
max_time=self.max_execution_time, r_set=r_set)
|
||||
|
||||
def regex_phone_iter(self, country_code, obj_id, content):
|
||||
"""
|
||||
regex findall helper (force timeout)
|
||||
:param regex: compiled regex
|
||||
:param obj_id: object id
|
||||
:param content: object content
|
||||
:param r_set: return result as set
|
||||
"""
|
||||
return regex_helper.regex_phone_iter(self.r_cache_key, country_code, obj_id, content,
|
||||
max_time=self.max_execution_time)
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Run Module endless process
|
||||
|
|
|
@ -128,10 +128,9 @@ publish = Duplicate,Tags
|
|||
subscribe = Cve
|
||||
publish = Tags
|
||||
|
||||
# Disabled
|
||||
#[Phone]
|
||||
#subscribe = Item
|
||||
#publish = Tags
|
||||
[Phone]
|
||||
subscribe = Item
|
||||
publish = Tags
|
||||
|
||||
[Keys]
|
||||
subscribe = Item
|
||||
|
|
Loading…
Reference in a new issue