chg: [extractor] add cache + UI extractor + word/set extractor

This commit is contained in:
Terrtia 2023-02-23 16:25:15 +01:00
parent ab24343b48
commit 0fa27c6a51
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
8 changed files with 208 additions and 69 deletions

View file

@ -64,7 +64,7 @@ class CreditCards(AbstractModule):
for card in cards:
start, end, value = card
if self.get_valid_card(value):
extracted.append(card)
extracted.append([start, end, value, f'tag:{tag}'])
return extracted
def compute(self, message, r_result=False):

View file

@ -69,8 +69,7 @@ class Iban(AbstractModule):
start, end, value = iban
value = ''.join(e for e in value if e.isalnum())
if self.is_valid_iban(value):
print(value)
extracted.append(iban)
extracted.append([start, end, value, f'tag:{tag}'])
return extracted
def compute(self, message):

View file

@ -130,7 +130,7 @@ class Mail(AbstractModule):
mxdomains[mxdomain].append(mail)
for mx in self.check_mx_record(mxdomains.keys()):
for row in mxdomains[mx]:
extracted.append(row)
extracted.append([row[0], row[1], row[2], f'tag:{tag}'])
return extracted
# # TODO: sanitize mails

View file

@ -62,7 +62,7 @@ class Onion(AbstractModule):
url_unpack = crawlers.unpack_url(value)
domain = url_unpack['domain']
if crawlers.is_valid_onion_domain(domain):
extracted.append(onion)
extracted.append([start, end, value, f'tag:{tag}'])
return extracted
def compute(self, message):

View file

@ -409,8 +409,12 @@ class Tools(AbstractModule):
return TOOLS.keys()
def extract(self, obj_id, content, tag):
extracted = []
tool_name = tag.rsplit('"', 2)[1][:-5]
return self.regex_finditer(TOOLS[tool_name]['regex'], obj_id, content)
tools = self.regex_finditer(TOOLS[tool_name]['regex'], obj_id, content)
for tool in tools:
extracted.append([tool[0], tool[1], tool[2], f'tag:{tag}'])
return extracted
def compute(self, message):
item = Item(message)