chg: [Hosts] improve perf + regex timeout + cache DNS results

This commit is contained in:
terrtia 2024-01-30 10:28:50 +01:00
parent a10119fb6a
commit 2db8587d03
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
2 changed files with 37 additions and 29 deletions

View file

@ -41,7 +41,13 @@ class DomClassifier(AbstractModule):
addr_dns = config_loader.get_config_str("DomClassifier", "dns")
self.c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
redis_host = config_loader.get_config_str('Redis_Cache', 'host')
redis_port = config_loader.get_config_int('Redis_Cache', 'port')
redis_db = config_loader.get_config_int('Redis_Cache', 'db')
self.dom_classifier = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns],
redis_host=redis_host,
redis_port=redis_port, redis_db=redis_db,
re_timeout=30)
self.cc = config_loader.get_config_str("DomClassifier", "cc")
self.cc_tld = config_loader.get_config_str("DomClassifier", "cc_tld")
@ -58,34 +64,34 @@ class DomClassifier(AbstractModule):
item_source = item.get_source()
try:
self.c.text(rawtext=host)
if not self.c.domain:
self.dom_classifier.text(rawtext=host)
if not self.dom_classifier.domain:
return
print(self.c.domain)
self.c.validdomain(passive_dns=True, extended=False)
# self.logger.debug(self.c.vdomain)
print(self.dom_classifier.domain)
self.dom_classifier.validdomain(passive_dns=True, extended=False)
# self.logger.debug(self.dom_classifier.vdomain)
print(self.c.vdomain)
print(self.dom_classifier.vdomain)
print()
if self.c.vdomain and d4.is_passive_dns_enabled():
for dns_record in self.c.vdomain:
if self.dom_classifier.vdomain and d4.is_passive_dns_enabled():
for dns_record in self.dom_classifier.vdomain:
self.add_message_to_queue(obj=None, message=dns_record)
if self.cc_tld:
localizeddomains = self.c.include(expression=self.cc_tld)
localizeddomains = self.dom_classifier.include(expression=self.cc_tld)
if localizeddomains:
print(localizeddomains)
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}")
if self.cc:
localizeddomains = self.c.localizedomain(cc=self.cc)
localizeddomains = self.dom_classifier.localizedomain(cc=self.cc)
if localizeddomains:
print(localizeddomains)
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}")
if r_result:
return self.c.vdomain
return self.dom_classifier.vdomain
except IOError as err:
self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")

View file

@ -18,13 +18,14 @@ import os
import re
import sys
import DomainClassifier.domainclassifier
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from lib.objects.Items import Item
class Hosts(AbstractModule):
"""
@ -43,28 +44,29 @@ class Hosts(AbstractModule):
# Waiting time in seconds between to message processed
self.pending_seconds = 1
self.host_regex = r'\b([a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)\b'
re.compile(self.host_regex)
redis_host = config_loader.get_config_str('Redis_Cache', 'host')
redis_port = config_loader.get_config_int('Redis_Cache', 'port')
redis_db = config_loader.get_config_int('Redis_Cache', 'db')
self.dom_classifier = DomainClassifier.domainclassifier.Extract(rawtext="",
redis_host=redis_host,
redis_port=redis_port,
redis_db=redis_db,
re_timeout=30)
self.logger.info(f"Module: {self.module_name} Launched")
def compute(self, message):
item = self.get_obj()
obj = self.get_obj()
# mimetype = item_basic.get_item_mimetype(item.get_id())
# if mimetype.split('/')[0] == "text":
content = item.get_content()
hosts = self.regex_findall(self.host_regex, item.get_id(), content, r_set=True)
if hosts:
print(f'{len(hosts)} host {item.get_id()}')
for host in hosts:
# print(host)
if not host.endswith('.onion'):
self.add_message_to_queue(message=str(host), queue='Host')
content = obj.get_content()
self.dom_classifier.text(content)
if self.dom_classifier.domain:
print(f'{len(self.dom_classifier.domain)} host {obj.get_id()}')
# print(self.dom_classifier.domain)
for domain in self.dom_classifier.domain:
if domain:
self.add_message_to_queue(message=domain, queue='Host')
if __name__ == '__main__':
module = Hosts()
module.run()