mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
chg: [modules] add tests: CreditCard, Global, DomClassifier
This commit is contained in:
parent
20727fff77
commit
0c29e1e4fa
14 changed files with 199 additions and 129 deletions
|
@ -62,7 +62,7 @@ class ApiKey(AbstractModule):
|
|||
self.redis_logger.warning(f'{to_print}Checked {len(google_api_key)} found Google API Key;{item.get_id()}')
|
||||
|
||||
msg = f'infoleak:automatic-detection="google-api-key";{item.get_id()}'
|
||||
self.send_message_to_queue('Tags', msg)
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
# # TODO: # FIXME: AWS regex/validate/sanityze KEY + SECRET KEY
|
||||
if aws_access_key:
|
||||
|
@ -73,14 +73,14 @@ class ApiKey(AbstractModule):
|
|||
self.redis_logger.warning(f'{to_print}Checked {len(aws_secret_key)} found AWS secret Key;{item.get_id()}')
|
||||
|
||||
msg = 'infoleak:automatic-detection="aws-key";{}'.format(item.get_id())
|
||||
self.send_message_to_queue('Tags', msg)
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
# Tags
|
||||
msg = f'infoleak:automatic-detection="api-key";{item.get_id()}'
|
||||
self.send_message_to_queue('Tags', msg)
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
# Send to duplicate
|
||||
self.send_message_to_queue('Duplicate', item.get_id())
|
||||
self.send_message_to_queue(item.get_id(), 'Duplicate')
|
||||
|
||||
if r_result:
|
||||
return (google_api_key, aws_access_key, aws_secret_key)
|
||||
|
|
|
@ -11,7 +11,6 @@ It apply IBAN regexes on item content and warn if above a threshold.
|
|||
|
||||
import redis
|
||||
import time
|
||||
import redis
|
||||
import datetime
|
||||
import re
|
||||
import string
|
||||
|
|
|
@ -93,7 +93,7 @@ class Categ(AbstractModule):
|
|||
|
||||
# Export message to categ queue
|
||||
print(msg, categ)
|
||||
self.send_message_to_queue(categ, msg)
|
||||
self.send_message_to_queue(msg, categ)
|
||||
|
||||
self.redis_logger.info(
|
||||
f'Categ;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {lenfound} as {categ};{item.get_id()}')
|
||||
|
|
|
@ -7,27 +7,23 @@ The CreditCards Module
|
|||
|
||||
This module is consuming the Redis-list created by the Categ module.
|
||||
|
||||
It apply credit card regexes on paste content and warn if above a threshold.
|
||||
It apply credit card regexes on item content and warn if a valid card number is found.
|
||||
|
||||
"""
|
||||
|
||||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import pprint
|
||||
import time
|
||||
from pubsublogger import publisher
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from packages import Paste
|
||||
from packages.Item import Item
|
||||
from packages import lib_refine
|
||||
from Helper import Process
|
||||
|
||||
|
||||
class CreditCards(AbstractModule):
|
||||
"""
|
||||
|
@ -56,39 +52,39 @@ class CreditCards(AbstractModule):
|
|||
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
filename, score = message.split()
|
||||
paste = Paste.Paste(filename)
|
||||
content = paste.get_p_content()
|
||||
def compute(self, message, r_result=False):
|
||||
id, score = message.split()
|
||||
item = Item(id)
|
||||
content = item.get_content()
|
||||
all_cards = re.findall(self.regex, content)
|
||||
|
||||
if len(all_cards) > 0:
|
||||
self.redis_logger.debug(f'All matching {all_cards}')
|
||||
#self.redis_logger.debug(f'All matching {all_cards}')
|
||||
creditcard_set = set([])
|
||||
|
||||
for card in all_cards:
|
||||
clean_card = re.sub('[^0-9]', '', card)
|
||||
# TODO purpose of this assignation ?
|
||||
clean_card = clean_card
|
||||
if lib_refine.is_luhn_valid(clean_card):
|
||||
self.redis_logger.debug(f'{clean_card} is valid')
|
||||
creditcard_set.add(clean_card)
|
||||
|
||||
pprint.pprint(creditcard_set)
|
||||
to_print = f'CreditCard;{paste.p_source};{paste.p_date};{paste.p_name};'
|
||||
|
||||
#pprint.pprint(creditcard_set)
|
||||
to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
|
||||
if (len(creditcard_set) > 0):
|
||||
self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{paste.p_rel_path}')
|
||||
self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}')
|
||||
|
||||
#Send to duplicate
|
||||
self.process.populate_set_out(filename, 'Duplicate')
|
||||
self.send_message_to_queue(item.get_id(), 'Duplicate')
|
||||
|
||||
msg = f'infoleak:automatic-detection="credit-card";{filename}'
|
||||
self.process.populate_set_out(msg, 'Tags')
|
||||
msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
if r_result:
|
||||
return creditcard_set
|
||||
else:
|
||||
self.redis_logger.info(f'{to_print}CreditCard related;{paste.p_rel_path}')
|
||||
self.redis_logger.info(f'{to_print}CreditCard related;{item.get_id()}')
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
module = CreditCards()
|
||||
module.run()
|
||||
|
|
|
@ -16,14 +16,13 @@ the out output of the Global module.
|
|||
import os
|
||||
import sys
|
||||
import time
|
||||
from pubsublogger import publisher
|
||||
import DomainClassifier.domainclassifier
|
||||
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from Helper import Process
|
||||
from packages.Item import Item
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
import d4
|
||||
|
@ -43,48 +42,51 @@ class DomClassifier(AbstractModule):
|
|||
|
||||
addr_dns = self.process.config.get("DomClassifier", "dns")
|
||||
|
||||
self.redis_logger.info("""ZMQ DomainClassifier is Running""")
|
||||
|
||||
self.c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
|
||||
|
||||
self.cc = self.process.config.get("DomClassifier", "cc")
|
||||
self.cc_tld = self.process.config.get("DomClassifier", "cc_tld")
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info("Module %s initialized" % (self.module_name))
|
||||
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
def compute(self, message, r_result=False):
|
||||
item = Item(message)
|
||||
|
||||
item_content = item.get_content()
|
||||
item_basename = item.get_basename()
|
||||
item_date = item.get_date()
|
||||
item_source = item.get_source()
|
||||
try:
|
||||
item_content = item_basic.get_item_content(message)
|
||||
mimetype = item_basic.get_item_mimetype(message)
|
||||
item_basename = item_basic.get_basename(message)
|
||||
item_source = item_basic.get_source(message)
|
||||
item_date = item_basic.get_item_date(message)
|
||||
mimetype = item_basic.get_item_mimetype(item.get_id())
|
||||
|
||||
if mimetype.split('/')[0] == "text":
|
||||
self.c.text(rawtext=item_content)
|
||||
self.c.potentialdomain()
|
||||
self.c.validdomain(passive_dns=True, extended=False)
|
||||
self.redis_logger.debug(self.c.vdomain)
|
||||
#self.redis_logger.debug(self.c.vdomain)
|
||||
|
||||
if self.c.vdomain and d4.is_passive_dns_enabled():
|
||||
for dns_record in self.c.vdomain:
|
||||
self.process.populate_set_out(dns_record)
|
||||
self.send_message_to_queue(dns_record)
|
||||
|
||||
localizeddomains = self.c.include(expression=self.cc_tld)
|
||||
if localizeddomains:
|
||||
self.redis_logger.debug(localizeddomains)
|
||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{message}")
|
||||
localizeddomains = self.c.localizedomain(cc=self.cc)
|
||||
print(localizeddomains)
|
||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}")
|
||||
|
||||
localizeddomains = self.c.localizedomain(cc=self.cc)
|
||||
if localizeddomains:
|
||||
self.redis_logger.debug(localizeddomains)
|
||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{message}")
|
||||
print(localizeddomains)
|
||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}")
|
||||
|
||||
if r_result:
|
||||
return self.c.vdomain
|
||||
|
||||
except IOError as err:
|
||||
self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
|
||||
raise Exception(f"CRC Checksum Failed on: {message}")
|
||||
raise Exception(f"CRC Checksum Failed on: {item.get_id()}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -5,10 +5,9 @@ The ZMQ_Feed_Q Module
|
|||
=====================
|
||||
|
||||
This module is consuming the Redis-list created by the ZMQ_Feed_Q Module,
|
||||
And save the paste on disk to allow others modules to work on them.
|
||||
And save the item on disk to allow others modules to work on them.
|
||||
|
||||
..todo:: Be able to choose to delete or not the saved paste after processing.
|
||||
..todo:: Store the empty paste (unprocessed) somewhere in Redis.
|
||||
..todo:: Be able to choose to delete or not the saved item after processing.
|
||||
|
||||
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
||||
the same Subscriber name in both of them.
|
||||
|
@ -17,7 +16,7 @@ Requirements
|
|||
------------
|
||||
|
||||
*Need running Redis instances.
|
||||
*Need the ZMQ_Feed_Q Module running to be able to work properly.
|
||||
*Need the Mixer or the Importer Module running to be able to work properly.
|
||||
|
||||
"""
|
||||
|
||||
|
@ -31,36 +30,34 @@ import gzip
|
|||
import os
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
import datetime
|
||||
import redis
|
||||
from pubsublogger import publisher
|
||||
|
||||
from hashlib import md5
|
||||
from uuid import uuid4
|
||||
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
from Helper import Process
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
|
||||
|
||||
class Global(AbstractModule):
|
||||
"""
|
||||
Global module for AIL framework
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
super(Global, self).__init__()
|
||||
|
||||
self.r_stats = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Statistics")
|
||||
|
||||
self.processed_paste = 0
|
||||
# TODO rename time_1 explicitely
|
||||
self.time_1 = time.time()
|
||||
self.r_stats = ConfigLoader().get_redis_conn("ARDB_Statistics")
|
||||
|
||||
# Get and sanityze PASTE DIRECTORY
|
||||
self.processed_item = 0
|
||||
self.time_last_stats = time.time()
|
||||
|
||||
# Get and sanityze ITEM DIRECTORY
|
||||
# # TODO: rename PASTE => ITEM
|
||||
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], self.process.config.get("Directories", "pastes"))
|
||||
self.PASTES_FOLDERS = self.PASTES_FOLDER + '/'
|
||||
self.PASTES_FOLDERS = os.path.join(os.path.realpath(self.PASTES_FOLDERS), '')
|
||||
|
@ -73,50 +70,49 @@ class Global(AbstractModule):
|
|||
|
||||
|
||||
def computeNone(self):
|
||||
difftime = time.time() - self.time_1
|
||||
difftime = time.time() - self.time_last_stats
|
||||
if int(difftime) > 30:
|
||||
to_print = f'Global; ; ; ;glob Processed {self.processed_paste} paste(s) in {difftime} s'
|
||||
to_print = f'Global; ; ; ;glob Processed {self.processed_item} item(s) in {difftime} s'
|
||||
print(to_print)
|
||||
self.redis_logger.debug(to_print)
|
||||
|
||||
self.time_1 = time.time()
|
||||
self.processed_paste = 0
|
||||
self.time_last_stats = time.time()
|
||||
self.processed_item = 0
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
def compute(self, message, r_result=False):
|
||||
# Recovering the streamed message informations
|
||||
splitted = message.split()
|
||||
|
||||
if len(splitted) == 2:
|
||||
paste, gzip64encoded = splitted
|
||||
item, gzip64encoded = splitted
|
||||
|
||||
# Remove PASTES_FOLDER from item path (crawled item + submited)
|
||||
if self.PASTES_FOLDERS in paste:
|
||||
paste = paste.replace(self.PASTES_FOLDERS, '', 1)
|
||||
if self.PASTES_FOLDERS in item:
|
||||
item = item.replace(self.PASTES_FOLDERS, '', 1)
|
||||
|
||||
file_name_paste = paste.split('/')[-1]
|
||||
if len(file_name_paste) > 255:
|
||||
new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4()))
|
||||
paste = self.rreplace(paste, file_name_paste, new_file_name_paste, 1)
|
||||
file_name_item = item.split('/')[-1]
|
||||
if len(file_name_item) > 255:
|
||||
new_file_name_item = '{}{}.gz'.format(file_name_item[:215], str(uuid4()))
|
||||
item = self.rreplace(item, file_name_item, new_file_name_item, 1)
|
||||
|
||||
# Creating the full filepath
|
||||
filename = os.path.join(self.PASTES_FOLDER, paste)
|
||||
filename = os.path.join(self.PASTES_FOLDER, item)
|
||||
filename = os.path.realpath(filename)
|
||||
|
||||
# Incorrect filename
|
||||
if not os.path.commonprefix([filename, self.PASTES_FOLDER]) == self.PASTES_FOLDER:
|
||||
self.redis_logger.warning(f'Global; Path traversal detected {filename}')
|
||||
|
||||
|
||||
else:
|
||||
# Decode compressed base64
|
||||
decoded = base64.standard_b64decode(gzip64encoded)
|
||||
new_file_content = self.gunzip_bytes_obj(decoded)
|
||||
|
||||
if new_file_content:
|
||||
|
||||
filename = self.check_filename(filename, new_file_content)
|
||||
|
||||
if filename:
|
||||
|
||||
# create subdir
|
||||
dirname = os.path.dirname(filename)
|
||||
if not os.path.exists(dirname):
|
||||
|
@ -125,17 +121,18 @@ class Global(AbstractModule):
|
|||
with open(filename, 'wb') as f:
|
||||
f.write(decoded)
|
||||
|
||||
paste = filename
|
||||
item_id = filename
|
||||
# remove self.PASTES_FOLDER from
|
||||
if self.PASTES_FOLDERS in paste:
|
||||
paste = paste.replace(self.PASTES_FOLDERS, '', 1)
|
||||
if self.PASTES_FOLDERS in item_id:
|
||||
item_id = item_id.replace(self.PASTES_FOLDERS, '', 1)
|
||||
|
||||
self.process.populate_set_out(paste)
|
||||
self.processed_paste+=1
|
||||
self.send_message_to_queue(item_id)
|
||||
self.processed_item+=1
|
||||
if r_result:
|
||||
return item_id
|
||||
|
||||
else:
|
||||
# TODO Store the name of the empty paste inside a Redis-list
|
||||
self.redis_logger.debug(f"Empty Paste: {message} not processed")
|
||||
self.redis_logger.debug(f"Empty Item: {message} not processed")
|
||||
|
||||
|
||||
def check_filename(self, filename, new_file_content):
|
||||
|
@ -153,8 +150,8 @@ class Global(AbstractModule):
|
|||
|
||||
if curr_file_content:
|
||||
# Compare file content with message content with MD5 checksums
|
||||
curr_file_md5 = hashlib.md5(curr_file_content).hexdigest()
|
||||
new_file_md5 = hashlib.md5(new_file_content).hexdigest()
|
||||
curr_file_md5 = md5(curr_file_content).hexdigest()
|
||||
new_file_md5 = md5(new_file_content).hexdigest()
|
||||
|
||||
if new_file_md5 != curr_file_md5:
|
||||
# MD5 are not equals, verify filename
|
||||
|
@ -162,7 +159,6 @@ class Global(AbstractModule):
|
|||
filename = f'{filename[:-3]}_{new_file_md5}.gz'
|
||||
else:
|
||||
filename = f'{filename}_{new_file_md5}'
|
||||
|
||||
self.redis_logger.debug(f'new file to check: {filename}')
|
||||
|
||||
if os.path.isfile(filename):
|
||||
|
@ -174,12 +170,12 @@ class Global(AbstractModule):
|
|||
# Ignore duplicate checksum equals
|
||||
self.redis_logger.debug(f'ignore duplicated file {filename}')
|
||||
filename = None
|
||||
|
||||
|
||||
else:
|
||||
# File not unzipped
|
||||
filename = None
|
||||
|
||||
|
||||
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
|
@ -207,15 +203,13 @@ class Global(AbstractModule):
|
|||
|
||||
def gunzip_bytes_obj(self, bytes_obj):
|
||||
gunzipped_bytes_obj = None
|
||||
|
||||
try:
|
||||
in_ = io.BytesIO()
|
||||
in_.write(bytes_obj)
|
||||
in_.seek(0)
|
||||
|
||||
|
||||
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
|
||||
gunzipped_bytes_obj = fo.read()
|
||||
|
||||
except Exception as e:
|
||||
self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}')
|
||||
|
||||
|
@ -224,11 +218,10 @@ class Global(AbstractModule):
|
|||
|
||||
def rreplace(self, s, old, new, occurrence):
|
||||
li = s.rsplit(old, occurrence)
|
||||
|
||||
return new.join(li)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
module = Global()
|
||||
module.run()
|
||||
|
|
|
@ -154,6 +154,8 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "D4_client" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./D4_client.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./DbCleaner.py; read x"
|
||||
sleep 0.1
|
||||
|
||||
|
||||
screen -S "Script_AIL" -X screen -t "ModuleInformation" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModulesInformationV2.py -k 0 -c 1; read x"
|
||||
|
@ -224,8 +226,6 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DbCleaner.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "UpdateBackground" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./update-background.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./submit_paste.py; read x"
|
||||
|
|
|
@ -147,7 +147,7 @@ class Onion(AbstractModule):
|
|||
|
||||
# TAG Item
|
||||
msg = f'infoleak:automatic-detection="onion";{item.get_id()}'
|
||||
self.send_message_to_queue('Tags', msg)
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
if crawlers.is_crawler_activated():
|
||||
for to_crawl in urls_to_crawl:
|
||||
|
|
37
bin/Tags.py
37
bin/Tags.py
|
@ -5,22 +5,19 @@
|
|||
The Tags Module
|
||||
================================
|
||||
|
||||
This module create tags.
|
||||
This module add tags to an item.
|
||||
|
||||
"""
|
||||
|
||||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import time
|
||||
from pubsublogger import publisher
|
||||
|
||||
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from Helper import Process
|
||||
from packages.Item import Item
|
||||
from packages import Tag
|
||||
|
||||
|
||||
|
@ -29,17 +26,6 @@ class Tags(AbstractModule):
|
|||
Tags module for AIL framework
|
||||
"""
|
||||
|
||||
# Channel name to forward message
|
||||
out_channel_name = 'MISP_The_Hive_feeder'
|
||||
|
||||
# Split char in incomming message
|
||||
msg_sep = ';'
|
||||
|
||||
# Tag object type
|
||||
# TODO could be an enum in Tag class
|
||||
tag_type = 'item'
|
||||
|
||||
|
||||
def __init__(self):
|
||||
super(Tags, self).__init__()
|
||||
|
||||
|
@ -51,23 +37,24 @@ class Tags(AbstractModule):
|
|||
|
||||
|
||||
def compute(self, message):
|
||||
self.redis_logger.debug(message)
|
||||
|
||||
if len(message.split(Tags.msg_sep)) == 2:
|
||||
# Extract item ID and tag from message
|
||||
tag, item_id = message.split(Tags.msg_sep)
|
||||
# Extract item ID and tag from message
|
||||
mess_split = message.split(';')
|
||||
if len(mess_split) == 2:
|
||||
tag = mess_split[0]
|
||||
item = Item(mess_split[1])
|
||||
|
||||
# Create a new tag
|
||||
Tag.add_tag(Tags.tag_type, tag, item_id)
|
||||
Tag.add_tag('item', tag, item.get_id())
|
||||
print(f'{item.get_id(): Tagged {tag}}')
|
||||
|
||||
# Forward message to channel
|
||||
self.process.populate_set_out(message, Tags.out_channel_name)
|
||||
self.send_message_to_queue(message, 'MISP_The_Hive_feeder')
|
||||
else:
|
||||
# Malformed message
|
||||
raise Exception(f'too many values to unpack (expected 2) given {len(message.split(Tags.msg_sep))} with message {message}')
|
||||
raise Exception(f'too many values to unpack (expected 2) given {len(mess_split)} with message {message}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
module = Tags()
|
||||
module.run()
|
||||
|
|
|
@ -35,7 +35,7 @@ class AbstractModule(ABC):
|
|||
|
||||
# Init Redis Logger
|
||||
self.redis_logger = publisher
|
||||
|
||||
|
||||
# Port of the redis instance used by pubsublogger
|
||||
self.redis_logger.port = 6380
|
||||
|
||||
|
@ -62,11 +62,11 @@ class AbstractModule(ABC):
|
|||
"""
|
||||
return self.process.get_from_set()
|
||||
|
||||
def send_message_to_queue(self, queue_name, message):
|
||||
def send_message_to_queue(self, message, queue_name=None):
|
||||
"""
|
||||
Send message to queue
|
||||
:param queue_name: queue or module name
|
||||
:param message: message to send in queue
|
||||
:param queue_name: queue or module name
|
||||
|
||||
ex: send_to_queue(item_id, 'Global')
|
||||
"""
|
||||
|
|
|
@ -30,6 +30,7 @@ from item_basic import *
|
|||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
# get and sanityze PASTE DIRECTORY
|
||||
# # TODO: rename PASTES_FOLDER
|
||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
||||
PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
|
||||
|
||||
|
@ -574,12 +575,32 @@ class Item(AbstractObject):
|
|||
def get_basename(self):
|
||||
return os.path.basename(self.id)
|
||||
|
||||
def get_filename(self):
|
||||
# Creating the full filepath
|
||||
filename = os.path.join(PASTES_FOLDER, self.id)
|
||||
filename = os.path.realpath(filename)
|
||||
|
||||
# incorrect filename
|
||||
if not os.path.commonprefix([filename, PASTES_FOLDER]) == PASTES_FOLDER:
|
||||
return None
|
||||
else:
|
||||
return filename
|
||||
|
||||
def get_content(self):
|
||||
"""
|
||||
Returns Item content
|
||||
"""
|
||||
return item_basic.get_item_content(self.id)
|
||||
|
||||
# # TODO:
|
||||
def create(self):
|
||||
pass
|
||||
|
||||
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||
# TODO: DELETE ITEM CORRELATION + TAGS + METADATA + ...
|
||||
def delete(self):
|
||||
os.remove(self.get_filename())
|
||||
|
||||
# if __name__ == '__main__':
|
||||
#
|
||||
# item = Item('')
|
||||
|
|
|
@ -5,15 +5,22 @@ import os
|
|||
import sys
|
||||
import unittest
|
||||
|
||||
import gzip
|
||||
from base64 import b64encode
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
||||
# Modules Classes
|
||||
from ApiKey import ApiKey
|
||||
from Categ import Categ
|
||||
from CreditCards import CreditCards
|
||||
from DomClassifier import DomClassifier
|
||||
from Global import Global
|
||||
from Onion import Onion
|
||||
|
||||
# project packages
|
||||
import lib.crawlers as crawlers
|
||||
import packages.Item as Item
|
||||
|
||||
class Test_Module_ApiKey(unittest.TestCase):
|
||||
|
||||
|
@ -43,6 +50,71 @@ class Test_Module_Categ(unittest.TestCase):
|
|||
result = self.module_obj.compute(item_id, r_result=True)
|
||||
self.assertCountEqual(result, test_categ)
|
||||
|
||||
class Test_Module_CreditCards(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = CreditCards()
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/credit_cards.gz 7'
|
||||
test_cards = ['341039324930797', # American Express
|
||||
'6011613905509166', # Discover Card
|
||||
'3547151714018657', # Japan Credit Bureau (JCB)
|
||||
'5492981206527330', # 16 digits MasterCard
|
||||
'4024007132849695', # '4532525919781' # 16-digit VISA, with separators
|
||||
]
|
||||
|
||||
result = self.module_obj.compute(item_id, r_result=True)
|
||||
self.assertCountEqual(result, test_cards)
|
||||
|
||||
class Test_Module_DomClassifier(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = DomClassifier()
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/domain_classifier.gz'
|
||||
result = self.module_obj.compute(item_id, r_result=True)
|
||||
self.assertTrue(len(result))
|
||||
|
||||
class Test_Module_Global(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = Global()
|
||||
|
||||
def test_module(self):
|
||||
# # TODO: delete item
|
||||
item_id = 'tests/2021/01/01/global.gz'
|
||||
item = Item.Item(item_id)
|
||||
item.delete()
|
||||
|
||||
item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
|
||||
item_content_1 = b64encode(gzip.compress(item_content)).decode()
|
||||
item_content_2 = b64encode(gzip.compress(item_content + b' more text')).decode()
|
||||
message = f'{item_id} {item_content_1}'
|
||||
|
||||
# Test new item
|
||||
result = self.module_obj.compute(message, r_result=True)
|
||||
print(result)
|
||||
self.assertEqual(result, item_id)
|
||||
|
||||
# Test duplicate
|
||||
result = self.module_obj.compute(message, r_result=True)
|
||||
print(result)
|
||||
self.assertIsNone(result)
|
||||
|
||||
# Test same id with != content
|
||||
message = f'{item_id} {item_content_2}'
|
||||
result = self.module_obj.compute(message, r_result=True)
|
||||
print(result)
|
||||
self.assertIn(result, item_id)
|
||||
self.assertNotEqual(result, item_id)
|
||||
|
||||
# cleanup
|
||||
item = Item.Item(result)
|
||||
item.delete()
|
||||
# # TODO: remove from queue
|
||||
|
||||
class Test_Module_Onion(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
|
|
Loading…
Reference in a new issue