chg: [modules] add tests: CreditCard, Global, DomClassifier

This commit is contained in:
Terrtia 2021-05-27 17:28:20 +02:00
parent 20727fff77
commit 0c29e1e4fa
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
14 changed files with 199 additions and 129 deletions

View file

@ -62,7 +62,7 @@ class ApiKey(AbstractModule):
self.redis_logger.warning(f'{to_print}Checked {len(google_api_key)} found Google API Key;{item.get_id()}') self.redis_logger.warning(f'{to_print}Checked {len(google_api_key)} found Google API Key;{item.get_id()}')
msg = f'infoleak:automatic-detection="google-api-key";{item.get_id()}' msg = f'infoleak:automatic-detection="google-api-key";{item.get_id()}'
self.send_message_to_queue('Tags', msg) self.send_message_to_queue(msg, 'Tags')
# # TODO: # FIXME: AWS regex/validate/sanityze KEY + SECRET KEY # # TODO: # FIXME: AWS regex/validate/sanityze KEY + SECRET KEY
if aws_access_key: if aws_access_key:
@ -73,14 +73,14 @@ class ApiKey(AbstractModule):
self.redis_logger.warning(f'{to_print}Checked {len(aws_secret_key)} found AWS secret Key;{item.get_id()}') self.redis_logger.warning(f'{to_print}Checked {len(aws_secret_key)} found AWS secret Key;{item.get_id()}')
msg = 'infoleak:automatic-detection="aws-key";{}'.format(item.get_id()) msg = 'infoleak:automatic-detection="aws-key";{}'.format(item.get_id())
self.send_message_to_queue('Tags', msg) self.send_message_to_queue(msg, 'Tags')
# Tags # Tags
msg = f'infoleak:automatic-detection="api-key";{item.get_id()}' msg = f'infoleak:automatic-detection="api-key";{item.get_id()}'
self.send_message_to_queue('Tags', msg) self.send_message_to_queue(msg, 'Tags')
# Send to duplicate # Send to duplicate
self.send_message_to_queue('Duplicate', item.get_id()) self.send_message_to_queue(item.get_id(), 'Duplicate')
if r_result: if r_result:
return (google_api_key, aws_access_key, aws_secret_key) return (google_api_key, aws_access_key, aws_secret_key)

View file

@ -11,7 +11,6 @@ It apply IBAN regexes on item content and warn if above a threshold.
import redis import redis
import time import time
import redis
import datetime import datetime
import re import re
import string import string

View file

@ -93,7 +93,7 @@ class Categ(AbstractModule):
# Export message to categ queue # Export message to categ queue
print(msg, categ) print(msg, categ)
self.send_message_to_queue(categ, msg) self.send_message_to_queue(msg, categ)
self.redis_logger.info( self.redis_logger.info(
f'Categ;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {lenfound} as {categ};{item.get_id()}') f'Categ;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {lenfound} as {categ};{item.get_id()}')

View file

@ -7,27 +7,23 @@ The CreditCards Module
This module is consuming the Redis-list created by the Categ module. This module is consuming the Redis-list created by the Categ module.
It apply credit card regexes on paste content and warn if above a threshold. It apply credit card regexes on item content and warn if a valid card number is found.
""" """
################################## ##################################
# Import External packages # Import External packages
################################## ##################################
import pprint
import time
from pubsublogger import publisher
import re import re
import sys import sys
import time
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from module.abstract_module import AbstractModule
from packages import Paste from packages.Item import Item
from packages import lib_refine from packages import lib_refine
from Helper import Process
class CreditCards(AbstractModule): class CreditCards(AbstractModule):
""" """
@ -56,37 +52,37 @@ class CreditCards(AbstractModule):
self.redis_logger.info(f"Module {self.module_name} initialized") self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message): def compute(self, message, r_result=False):
filename, score = message.split() id, score = message.split()
paste = Paste.Paste(filename) item = Item(id)
content = paste.get_p_content() content = item.get_content()
all_cards = re.findall(self.regex, content) all_cards = re.findall(self.regex, content)
if len(all_cards) > 0: if len(all_cards) > 0:
self.redis_logger.debug(f'All matching {all_cards}') #self.redis_logger.debug(f'All matching {all_cards}')
creditcard_set = set([]) creditcard_set = set([])
for card in all_cards: for card in all_cards:
clean_card = re.sub('[^0-9]', '', card) clean_card = re.sub('[^0-9]', '', card)
# TODO purpose of this assignation ?
clean_card = clean_card
if lib_refine.is_luhn_valid(clean_card): if lib_refine.is_luhn_valid(clean_card):
self.redis_logger.debug(f'{clean_card} is valid') self.redis_logger.debug(f'{clean_card} is valid')
creditcard_set.add(clean_card) creditcard_set.add(clean_card)
pprint.pprint(creditcard_set) #pprint.pprint(creditcard_set)
to_print = f'CreditCard;{paste.p_source};{paste.p_date};{paste.p_name};' to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
if (len(creditcard_set) > 0): if (len(creditcard_set) > 0):
self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{paste.p_rel_path}') self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}')
#Send to duplicate #Send to duplicate
self.process.populate_set_out(filename, 'Duplicate') self.send_message_to_queue(item.get_id(), 'Duplicate')
msg = f'infoleak:automatic-detection="credit-card";{filename}' msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}'
self.process.populate_set_out(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')
if r_result:
return creditcard_set
else: else:
self.redis_logger.info(f'{to_print}CreditCard related;{paste.p_rel_path}') self.redis_logger.info(f'{to_print}CreditCard related;{item.get_id()}')
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -16,14 +16,13 @@ the out output of the Global module.
import os import os
import sys import sys
import time import time
from pubsublogger import publisher
import DomainClassifier.domainclassifier import DomainClassifier.domainclassifier
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from module.abstract_module import AbstractModule
from Helper import Process from packages.Item import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import d4 import d4
@ -43,48 +42,51 @@ class DomClassifier(AbstractModule):
addr_dns = self.process.config.get("DomClassifier", "dns") addr_dns = self.process.config.get("DomClassifier", "dns")
self.redis_logger.info("""ZMQ DomainClassifier is Running""")
self.c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) self.c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
self.cc = self.process.config.get("DomClassifier", "cc") self.cc = self.process.config.get("DomClassifier", "cc")
self.cc_tld = self.process.config.get("DomClassifier", "cc_tld") self.cc_tld = self.process.config.get("DomClassifier", "cc_tld")
# Send module state to logs # Send module state to logs
self.redis_logger.info("Module %s initialized" % (self.module_name)) self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message): def compute(self, message, r_result=False):
item = Item(message)
item_content = item.get_content()
item_basename = item.get_basename()
item_date = item.get_date()
item_source = item.get_source()
try: try:
item_content = item_basic.get_item_content(message) mimetype = item_basic.get_item_mimetype(item.get_id())
mimetype = item_basic.get_item_mimetype(message)
item_basename = item_basic.get_basename(message)
item_source = item_basic.get_source(message)
item_date = item_basic.get_item_date(message)
if mimetype.split('/')[0] == "text": if mimetype.split('/')[0] == "text":
self.c.text(rawtext=item_content) self.c.text(rawtext=item_content)
self.c.potentialdomain() self.c.potentialdomain()
self.c.validdomain(passive_dns=True, extended=False) self.c.validdomain(passive_dns=True, extended=False)
self.redis_logger.debug(self.c.vdomain) #self.redis_logger.debug(self.c.vdomain)
if self.c.vdomain and d4.is_passive_dns_enabled(): if self.c.vdomain and d4.is_passive_dns_enabled():
for dns_record in self.c.vdomain: for dns_record in self.c.vdomain:
self.process.populate_set_out(dns_record) self.send_message_to_queue(dns_record)
localizeddomains = self.c.include(expression=self.cc_tld) localizeddomains = self.c.include(expression=self.cc_tld)
if localizeddomains: if localizeddomains:
self.redis_logger.debug(localizeddomains) print(localizeddomains)
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{message}") self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}")
localizeddomains = self.c.localizedomain(cc=self.cc)
localizeddomains = self.c.localizedomain(cc=self.cc)
if localizeddomains: if localizeddomains:
self.redis_logger.debug(localizeddomains) print(localizeddomains)
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{message}") self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}")
if r_result:
return self.c.vdomain
except IOError as err: except IOError as err:
self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed") self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
raise Exception(f"CRC Checksum Failed on: {message}") raise Exception(f"CRC Checksum Failed on: {item.get_id()}")
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -5,10 +5,9 @@ The ZMQ_Feed_Q Module
===================== =====================
This module is consuming the Redis-list created by the ZMQ_Feed_Q Module, This module is consuming the Redis-list created by the ZMQ_Feed_Q Module,
And save the paste on disk to allow others modules to work on them. And save the item on disk to allow others modules to work on them.
..todo:: Be able to choose to delete or not the saved paste after processing. ..todo:: Be able to choose to delete or not the saved item after processing.
..todo:: Store the empty paste (unprocessed) somewhere in Redis.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put ..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them. the same Subscriber name in both of them.
@ -17,7 +16,7 @@ Requirements
------------ ------------
*Need running Redis instances. *Need running Redis instances.
*Need the ZMQ_Feed_Q Module running to be able to work properly. *Need the Mixer or the Importer Module running to be able to work properly.
""" """
@ -31,19 +30,17 @@ import gzip
import os import os
import sys import sys
import time import time
import uuid
import datetime import datetime
import redis import redis
from pubsublogger import publisher
from hashlib import md5
from uuid import uuid4
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from module.abstract_module import AbstractModule
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) from lib.ConfigLoader import ConfigLoader
import ConfigLoader
from Helper import Process
class Global(AbstractModule): class Global(AbstractModule):
@ -54,13 +51,13 @@ class Global(AbstractModule):
def __init__(self): def __init__(self):
super(Global, self).__init__() super(Global, self).__init__()
self.r_stats = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Statistics") self.r_stats = ConfigLoader().get_redis_conn("ARDB_Statistics")
self.processed_paste = 0 self.processed_item = 0
# TODO rename time_1 explicitely self.time_last_stats = time.time()
self.time_1 = time.time()
# Get and sanityze PASTE DIRECTORY # Get and sanityze ITEM DIRECTORY
# # TODO: rename PASTE => ITEM
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], self.process.config.get("Directories", "pastes")) self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], self.process.config.get("Directories", "pastes"))
self.PASTES_FOLDERS = self.PASTES_FOLDER + '/' self.PASTES_FOLDERS = self.PASTES_FOLDER + '/'
self.PASTES_FOLDERS = os.path.join(os.path.realpath(self.PASTES_FOLDERS), '') self.PASTES_FOLDERS = os.path.join(os.path.realpath(self.PASTES_FOLDERS), '')
@ -73,33 +70,34 @@ class Global(AbstractModule):
def computeNone(self): def computeNone(self):
difftime = time.time() - self.time_1 difftime = time.time() - self.time_last_stats
if int(difftime) > 30: if int(difftime) > 30:
to_print = f'Global; ; ; ;glob Processed {self.processed_paste} paste(s) in {difftime} s' to_print = f'Global; ; ; ;glob Processed {self.processed_item} item(s) in {difftime} s'
print(to_print)
self.redis_logger.debug(to_print) self.redis_logger.debug(to_print)
self.time_1 = time.time() self.time_last_stats = time.time()
self.processed_paste = 0 self.processed_item = 0
def compute(self, message): def compute(self, message, r_result=False):
# Recovering the streamed message informations # Recovering the streamed message informations
splitted = message.split() splitted = message.split()
if len(splitted) == 2: if len(splitted) == 2:
paste, gzip64encoded = splitted item, gzip64encoded = splitted
# Remove PASTES_FOLDER from item path (crawled item + submited) # Remove PASTES_FOLDER from item path (crawled item + submited)
if self.PASTES_FOLDERS in paste: if self.PASTES_FOLDERS in item:
paste = paste.replace(self.PASTES_FOLDERS, '', 1) item = item.replace(self.PASTES_FOLDERS, '', 1)
file_name_paste = paste.split('/')[-1] file_name_item = item.split('/')[-1]
if len(file_name_paste) > 255: if len(file_name_item) > 255:
new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4())) new_file_name_item = '{}{}.gz'.format(file_name_item[:215], str(uuid4()))
paste = self.rreplace(paste, file_name_paste, new_file_name_paste, 1) item = self.rreplace(item, file_name_item, new_file_name_item, 1)
# Creating the full filepath # Creating the full filepath
filename = os.path.join(self.PASTES_FOLDER, paste) filename = os.path.join(self.PASTES_FOLDER, item)
filename = os.path.realpath(filename) filename = os.path.realpath(filename)
# Incorrect filename # Incorrect filename
@ -112,11 +110,9 @@ class Global(AbstractModule):
new_file_content = self.gunzip_bytes_obj(decoded) new_file_content = self.gunzip_bytes_obj(decoded)
if new_file_content: if new_file_content:
filename = self.check_filename(filename, new_file_content) filename = self.check_filename(filename, new_file_content)
if filename: if filename:
# create subdir # create subdir
dirname = os.path.dirname(filename) dirname = os.path.dirname(filename)
if not os.path.exists(dirname): if not os.path.exists(dirname):
@ -125,17 +121,18 @@ class Global(AbstractModule):
with open(filename, 'wb') as f: with open(filename, 'wb') as f:
f.write(decoded) f.write(decoded)
paste = filename item_id = filename
# remove self.PASTES_FOLDER from # remove self.PASTES_FOLDER from
if self.PASTES_FOLDERS in paste: if self.PASTES_FOLDERS in item_id:
paste = paste.replace(self.PASTES_FOLDERS, '', 1) item_id = item_id.replace(self.PASTES_FOLDERS, '', 1)
self.process.populate_set_out(paste) self.send_message_to_queue(item_id)
self.processed_paste+=1 self.processed_item+=1
if r_result:
return item_id
else: else:
# TODO Store the name of the empty paste inside a Redis-list self.redis_logger.debug(f"Empty Item: {message} not processed")
self.redis_logger.debug(f"Empty Paste: {message} not processed")
def check_filename(self, filename, new_file_content): def check_filename(self, filename, new_file_content):
@ -153,8 +150,8 @@ class Global(AbstractModule):
if curr_file_content: if curr_file_content:
# Compare file content with message content with MD5 checksums # Compare file content with message content with MD5 checksums
curr_file_md5 = hashlib.md5(curr_file_content).hexdigest() curr_file_md5 = md5(curr_file_content).hexdigest()
new_file_md5 = hashlib.md5(new_file_content).hexdigest() new_file_md5 = md5(new_file_content).hexdigest()
if new_file_md5 != curr_file_md5: if new_file_md5 != curr_file_md5:
# MD5 are not equals, verify filename # MD5 are not equals, verify filename
@ -162,7 +159,6 @@ class Global(AbstractModule):
filename = f'{filename[:-3]}_{new_file_md5}.gz' filename = f'{filename[:-3]}_{new_file_md5}.gz'
else: else:
filename = f'{filename}_{new_file_md5}' filename = f'{filename}_{new_file_md5}'
self.redis_logger.debug(f'new file to check: {filename}') self.redis_logger.debug(f'new file to check: {filename}')
if os.path.isfile(filename): if os.path.isfile(filename):
@ -207,7 +203,6 @@ class Global(AbstractModule):
def gunzip_bytes_obj(self, bytes_obj): def gunzip_bytes_obj(self, bytes_obj):
gunzipped_bytes_obj = None gunzipped_bytes_obj = None
try: try:
in_ = io.BytesIO() in_ = io.BytesIO()
in_.write(bytes_obj) in_.write(bytes_obj)
@ -215,7 +210,6 @@ class Global(AbstractModule):
with gzip.GzipFile(fileobj=in_, mode='rb') as fo: with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read() gunzipped_bytes_obj = fo.read()
except Exception as e: except Exception as e:
self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}') self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}')
@ -224,7 +218,6 @@ class Global(AbstractModule):
def rreplace(self, s, old, new, occurrence): def rreplace(self, s, old, new, occurrence):
li = s.rsplit(old, occurrence) li = s.rsplit(old, occurrence)
return new.join(li) return new.join(li)

View file

@ -154,6 +154,8 @@ function launching_scripts {
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "D4_client" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./D4_client.py; read x" screen -S "Script_AIL" -X screen -t "D4_client" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./D4_client.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./DbCleaner.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "ModuleInformation" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModulesInformationV2.py -k 0 -c 1; read x" screen -S "Script_AIL" -X screen -t "ModuleInformation" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModulesInformationV2.py -k 0 -c 1; read x"
@ -224,8 +226,6 @@ function launching_scripts {
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x" screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DbCleaner.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "UpdateBackground" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./update-background.py; read x" screen -S "Script_AIL" -X screen -t "UpdateBackground" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./update-background.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./submit_paste.py; read x" screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./submit_paste.py; read x"

View file

@ -147,7 +147,7 @@ class Onion(AbstractModule):
# TAG Item # TAG Item
msg = f'infoleak:automatic-detection="onion";{item.get_id()}' msg = f'infoleak:automatic-detection="onion";{item.get_id()}'
self.send_message_to_queue('Tags', msg) self.send_message_to_queue(msg, 'Tags')
if crawlers.is_crawler_activated(): if crawlers.is_crawler_activated():
for to_crawl in urls_to_crawl: for to_crawl in urls_to_crawl:

View file

@ -5,22 +5,19 @@
The Tags Module The Tags Module
================================ ================================
This module create tags. This module add tags to an item.
""" """
################################## ##################################
# Import External packages # Import External packages
################################## ##################################
import time
from pubsublogger import publisher
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from module.abstract_module import AbstractModule
from Helper import Process from packages.Item import Item
from packages import Tag from packages import Tag
@ -29,17 +26,6 @@ class Tags(AbstractModule):
Tags module for AIL framework Tags module for AIL framework
""" """
# Channel name to forward message
out_channel_name = 'MISP_The_Hive_feeder'
# Split char in incomming message
msg_sep = ';'
# Tag object type
# TODO could be an enum in Tag class
tag_type = 'item'
def __init__(self): def __init__(self):
super(Tags, self).__init__() super(Tags, self).__init__()
@ -51,20 +37,21 @@ class Tags(AbstractModule):
def compute(self, message): def compute(self, message):
self.redis_logger.debug(message) # Extract item ID and tag from message
mess_split = message.split(';')
if len(message.split(Tags.msg_sep)) == 2: if len(mess_split) == 2:
# Extract item ID and tag from message tag = mess_split[0]
tag, item_id = message.split(Tags.msg_sep) item = Item(mess_split[1])
# Create a new tag # Create a new tag
Tag.add_tag(Tags.tag_type, tag, item_id) Tag.add_tag('item', tag, item.get_id())
print(f'{item.get_id(): Tagged {tag}}')
# Forward message to channel # Forward message to channel
self.process.populate_set_out(message, Tags.out_channel_name) self.send_message_to_queue(message, 'MISP_The_Hive_feeder')
else: else:
# Malformed message # Malformed message
raise Exception(f'too many values to unpack (expected 2) given {len(message.split(Tags.msg_sep))} with message {message}') raise Exception(f'too many values to unpack (expected 2) given {len(mess_split)} with message {message}')
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -62,11 +62,11 @@ class AbstractModule(ABC):
""" """
return self.process.get_from_set() return self.process.get_from_set()
def send_message_to_queue(self, queue_name, message): def send_message_to_queue(self, message, queue_name=None):
""" """
Send message to queue Send message to queue
:param queue_name: queue or module name
:param message: message to send in queue :param message: message to send in queue
:param queue_name: queue or module name
ex: send_to_queue(item_id, 'Global') ex: send_to_queue(item_id, 'Global')
""" """

View file

@ -30,6 +30,7 @@ from item_basic import *
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader.ConfigLoader()
# get and sanityze PASTE DIRECTORY # get and sanityze PASTE DIRECTORY
# # TODO: rename PASTES_FOLDER
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '') PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
@ -574,12 +575,32 @@ class Item(AbstractObject):
def get_basename(self): def get_basename(self):
return os.path.basename(self.id) return os.path.basename(self.id)
def get_filename(self):
# Creating the full filepath
filename = os.path.join(PASTES_FOLDER, self.id)
filename = os.path.realpath(filename)
# incorrect filename
if not os.path.commonprefix([filename, PASTES_FOLDER]) == PASTES_FOLDER:
return None
else:
return filename
def get_content(self): def get_content(self):
""" """
Returns Item content Returns Item content
""" """
return item_basic.get_item_content(self.id) return item_basic.get_item_content(self.id)
# # TODO:
def create(self):
pass
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
# TODO: DELETE ITEM CORRELATION + TAGS + METADATA + ...
def delete(self):
os.remove(self.get_filename())
# if __name__ == '__main__': # if __name__ == '__main__':
# #
# item = Item('') # item = Item('')

View file

@ -5,15 +5,22 @@ import os
import sys import sys
import unittest import unittest
import gzip
from base64 import b64encode
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
# Modules Classes # Modules Classes
from ApiKey import ApiKey from ApiKey import ApiKey
from Categ import Categ from Categ import Categ
from CreditCards import CreditCards
from DomClassifier import DomClassifier
from Global import Global
from Onion import Onion from Onion import Onion
# project packages # project packages
import lib.crawlers as crawlers import lib.crawlers as crawlers
import packages.Item as Item
class Test_Module_ApiKey(unittest.TestCase): class Test_Module_ApiKey(unittest.TestCase):
@ -43,6 +50,71 @@ class Test_Module_Categ(unittest.TestCase):
result = self.module_obj.compute(item_id, r_result=True) result = self.module_obj.compute(item_id, r_result=True)
self.assertCountEqual(result, test_categ) self.assertCountEqual(result, test_categ)
class Test_Module_CreditCards(unittest.TestCase):
def setUp(self):
self.module_obj = CreditCards()
def test_module(self):
item_id = 'tests/2021/01/01/credit_cards.gz 7'
test_cards = ['341039324930797', # American Express
'6011613905509166', # Discover Card
'3547151714018657', # Japan Credit Bureau (JCB)
'5492981206527330', # 16 digits MasterCard
'4024007132849695', # '4532525919781' # 16-digit VISA, with separators
]
result = self.module_obj.compute(item_id, r_result=True)
self.assertCountEqual(result, test_cards)
class Test_Module_DomClassifier(unittest.TestCase):
def setUp(self):
self.module_obj = DomClassifier()
def test_module(self):
item_id = 'tests/2021/01/01/domain_classifier.gz'
result = self.module_obj.compute(item_id, r_result=True)
self.assertTrue(len(result))
class Test_Module_Global(unittest.TestCase):
def setUp(self):
self.module_obj = Global()
def test_module(self):
# # TODO: delete item
item_id = 'tests/2021/01/01/global.gz'
item = Item.Item(item_id)
item.delete()
item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
item_content_1 = b64encode(gzip.compress(item_content)).decode()
item_content_2 = b64encode(gzip.compress(item_content + b' more text')).decode()
message = f'{item_id} {item_content_1}'
# Test new item
result = self.module_obj.compute(message, r_result=True)
print(result)
self.assertEqual(result, item_id)
# Test duplicate
result = self.module_obj.compute(message, r_result=True)
print(result)
self.assertIsNone(result)
# Test same id with != content
message = f'{item_id} {item_content_2}'
result = self.module_obj.compute(message, r_result=True)
print(result)
self.assertIn(result, item_id)
self.assertNotEqual(result, item_id)
# cleanup
item = Item.Item(result)
item.delete()
# # TODO: remove from queue
class Test_Module_Onion(unittest.TestCase): class Test_Module_Onion(unittest.TestCase):
def setUp(self): def setUp(self):