mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
chg: [Hosts] improve perf + regex timeout + cache DNS results
This commit is contained in:
parent
a10119fb6a
commit
2db8587d03
2 changed files with 37 additions and 29 deletions
|
@ -41,7 +41,13 @@ class DomClassifier(AbstractModule):
|
||||||
|
|
||||||
addr_dns = config_loader.get_config_str("DomClassifier", "dns")
|
addr_dns = config_loader.get_config_str("DomClassifier", "dns")
|
||||||
|
|
||||||
self.c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
|
redis_host = config_loader.get_config_str('Redis_Cache', 'host')
|
||||||
|
redis_port = config_loader.get_config_int('Redis_Cache', 'port')
|
||||||
|
redis_db = config_loader.get_config_int('Redis_Cache', 'db')
|
||||||
|
self.dom_classifier = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns],
|
||||||
|
redis_host=redis_host,
|
||||||
|
redis_port=redis_port, redis_db=redis_db,
|
||||||
|
re_timeout=30)
|
||||||
|
|
||||||
self.cc = config_loader.get_config_str("DomClassifier", "cc")
|
self.cc = config_loader.get_config_str("DomClassifier", "cc")
|
||||||
self.cc_tld = config_loader.get_config_str("DomClassifier", "cc_tld")
|
self.cc_tld = config_loader.get_config_str("DomClassifier", "cc_tld")
|
||||||
|
@ -58,34 +64,34 @@ class DomClassifier(AbstractModule):
|
||||||
item_source = item.get_source()
|
item_source = item.get_source()
|
||||||
try:
|
try:
|
||||||
|
|
||||||
self.c.text(rawtext=host)
|
self.dom_classifier.text(rawtext=host)
|
||||||
if not self.c.domain:
|
if not self.dom_classifier.domain:
|
||||||
return
|
return
|
||||||
print(self.c.domain)
|
print(self.dom_classifier.domain)
|
||||||
self.c.validdomain(passive_dns=True, extended=False)
|
self.dom_classifier.validdomain(passive_dns=True, extended=False)
|
||||||
# self.logger.debug(self.c.vdomain)
|
# self.logger.debug(self.dom_classifier.vdomain)
|
||||||
|
|
||||||
print(self.c.vdomain)
|
print(self.dom_classifier.vdomain)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
if self.c.vdomain and d4.is_passive_dns_enabled():
|
if self.dom_classifier.vdomain and d4.is_passive_dns_enabled():
|
||||||
for dns_record in self.c.vdomain:
|
for dns_record in self.dom_classifier.vdomain:
|
||||||
self.add_message_to_queue(obj=None, message=dns_record)
|
self.add_message_to_queue(obj=None, message=dns_record)
|
||||||
|
|
||||||
if self.cc_tld:
|
if self.cc_tld:
|
||||||
localizeddomains = self.c.include(expression=self.cc_tld)
|
localizeddomains = self.dom_classifier.include(expression=self.cc_tld)
|
||||||
if localizeddomains:
|
if localizeddomains:
|
||||||
print(localizeddomains)
|
print(localizeddomains)
|
||||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}")
|
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}")
|
||||||
|
|
||||||
if self.cc:
|
if self.cc:
|
||||||
localizeddomains = self.c.localizedomain(cc=self.cc)
|
localizeddomains = self.dom_classifier.localizedomain(cc=self.cc)
|
||||||
if localizeddomains:
|
if localizeddomains:
|
||||||
print(localizeddomains)
|
print(localizeddomains)
|
||||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}")
|
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}")
|
||||||
|
|
||||||
if r_result:
|
if r_result:
|
||||||
return self.c.vdomain
|
return self.dom_classifier.vdomain
|
||||||
|
|
||||||
except IOError as err:
|
except IOError as err:
|
||||||
self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
|
self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
|
||||||
|
|
|
@ -18,13 +18,14 @@ import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
import DomainClassifier.domainclassifier
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
# Import Project packages
|
# Import Project packages
|
||||||
##################################
|
##################################
|
||||||
from modules.abstract_module import AbstractModule
|
from modules.abstract_module import AbstractModule
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
from lib.objects.Items import Item
|
|
||||||
|
|
||||||
class Hosts(AbstractModule):
|
class Hosts(AbstractModule):
|
||||||
"""
|
"""
|
||||||
|
@ -43,28 +44,29 @@ class Hosts(AbstractModule):
|
||||||
# Waiting time in seconds between to message processed
|
# Waiting time in seconds between to message processed
|
||||||
self.pending_seconds = 1
|
self.pending_seconds = 1
|
||||||
|
|
||||||
self.host_regex = r'\b([a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)\b'
|
redis_host = config_loader.get_config_str('Redis_Cache', 'host')
|
||||||
re.compile(self.host_regex)
|
redis_port = config_loader.get_config_int('Redis_Cache', 'port')
|
||||||
|
redis_db = config_loader.get_config_int('Redis_Cache', 'db')
|
||||||
|
self.dom_classifier = DomainClassifier.domainclassifier.Extract(rawtext="",
|
||||||
|
redis_host=redis_host,
|
||||||
|
redis_port=redis_port,
|
||||||
|
redis_db=redis_db,
|
||||||
|
re_timeout=30)
|
||||||
self.logger.info(f"Module: {self.module_name} Launched")
|
self.logger.info(f"Module: {self.module_name} Launched")
|
||||||
|
|
||||||
def compute(self, message):
|
def compute(self, message):
|
||||||
item = self.get_obj()
|
obj = self.get_obj()
|
||||||
|
|
||||||
# mimetype = item_basic.get_item_mimetype(item.get_id())
|
content = obj.get_content()
|
||||||
# if mimetype.split('/')[0] == "text":
|
self.dom_classifier.text(content)
|
||||||
|
if self.dom_classifier.domain:
|
||||||
content = item.get_content()
|
print(f'{len(self.dom_classifier.domain)} host {obj.get_id()}')
|
||||||
hosts = self.regex_findall(self.host_regex, item.get_id(), content, r_set=True)
|
# print(self.dom_classifier.domain)
|
||||||
if hosts:
|
for domain in self.dom_classifier.domain:
|
||||||
print(f'{len(hosts)} host {item.get_id()}')
|
if domain:
|
||||||
for host in hosts:
|
self.add_message_to_queue(message=domain, queue='Host')
|
||||||
# print(host)
|
|
||||||
if not host.endswith('.onion'):
|
|
||||||
self.add_message_to_queue(message=str(host), queue='Host')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
module = Hosts()
|
module = Hosts()
|
||||||
module.run()
|
module.run()
|
||||||
|
|
Loading…
Reference in a new issue