chg: [modules] add tests: CreditCard, Global, DomClassifier

This commit is contained in:
Terrtia 2021-05-27 17:28:20 +02:00
parent 20727fff77
commit 0c29e1e4fa
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
14 changed files with 199 additions and 129 deletions

View file

@ -62,7 +62,7 @@ class ApiKey(AbstractModule):
self.redis_logger.warning(f'{to_print}Checked {len(google_api_key)} found Google API Key;{item.get_id()}')
msg = f'infoleak:automatic-detection="google-api-key";{item.get_id()}'
self.send_message_to_queue('Tags', msg)
self.send_message_to_queue(msg, 'Tags')
# # TODO: # FIXME: AWS regex/validate/sanityze KEY + SECRET KEY
if aws_access_key:
@ -73,14 +73,14 @@ class ApiKey(AbstractModule):
self.redis_logger.warning(f'{to_print}Checked {len(aws_secret_key)} found AWS secret Key;{item.get_id()}')
msg = 'infoleak:automatic-detection="aws-key";{}'.format(item.get_id())
self.send_message_to_queue('Tags', msg)
self.send_message_to_queue(msg, 'Tags')
# Tags
msg = f'infoleak:automatic-detection="api-key";{item.get_id()}'
self.send_message_to_queue('Tags', msg)
self.send_message_to_queue(msg, 'Tags')
# Send to duplicate
self.send_message_to_queue('Duplicate', item.get_id())
self.send_message_to_queue(item.get_id(), 'Duplicate')
if r_result:
return (google_api_key, aws_access_key, aws_secret_key)

View file

@ -11,7 +11,6 @@ It apply IBAN regexes on item content and warn if above a threshold.
import redis
import time
import redis
import datetime
import re
import string

View file

@ -93,7 +93,7 @@ class Categ(AbstractModule):
# Export message to categ queue
print(msg, categ)
self.send_message_to_queue(categ, msg)
self.send_message_to_queue(msg, categ)
self.redis_logger.info(
f'Categ;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {lenfound} as {categ};{item.get_id()}')

View file

@ -7,27 +7,23 @@ The CreditCards Module
This module is consuming the Redis-list created by the Categ module.
It apply credit card regexes on paste content and warn if above a threshold.
It apply credit card regexes on item content and warn if a valid card number is found.
"""
##################################
# Import External packages
##################################
import pprint
import time
from pubsublogger import publisher
import re
import sys
import time
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from packages import Paste
from packages.Item import Item
from packages import lib_refine
from Helper import Process
class CreditCards(AbstractModule):
"""
@ -56,39 +52,39 @@ class CreditCards(AbstractModule):
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message):
filename, score = message.split()
paste = Paste.Paste(filename)
content = paste.get_p_content()
def compute(self, message, r_result=False):
id, score = message.split()
item = Item(id)
content = item.get_content()
all_cards = re.findall(self.regex, content)
if len(all_cards) > 0:
self.redis_logger.debug(f'All matching {all_cards}')
#self.redis_logger.debug(f'All matching {all_cards}')
creditcard_set = set([])
for card in all_cards:
clean_card = re.sub('[^0-9]', '', card)
# TODO purpose of this assignation ?
clean_card = clean_card
if lib_refine.is_luhn_valid(clean_card):
self.redis_logger.debug(f'{clean_card} is valid')
creditcard_set.add(clean_card)
pprint.pprint(creditcard_set)
to_print = f'CreditCard;{paste.p_source};{paste.p_date};{paste.p_name};'
#pprint.pprint(creditcard_set)
to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
if (len(creditcard_set) > 0):
self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{paste.p_rel_path}')
self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}')
#Send to duplicate
self.process.populate_set_out(filename, 'Duplicate')
self.send_message_to_queue(item.get_id(), 'Duplicate')
msg = f'infoleak:automatic-detection="credit-card";{filename}'
self.process.populate_set_out(msg, 'Tags')
msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags')
if r_result:
return creditcard_set
else:
self.redis_logger.info(f'{to_print}CreditCard related;{paste.p_rel_path}')
self.redis_logger.info(f'{to_print}CreditCard related;{item.get_id()}')
if __name__ == '__main__':
module = CreditCards()
module.run()

View file

@ -16,14 +16,13 @@ the out output of the Global module.
import os
import sys
import time
from pubsublogger import publisher
import DomainClassifier.domainclassifier
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from Helper import Process
from packages.Item import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import d4
@ -43,48 +42,51 @@ class DomClassifier(AbstractModule):
addr_dns = self.process.config.get("DomClassifier", "dns")
self.redis_logger.info("""ZMQ DomainClassifier is Running""")
self.c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
self.cc = self.process.config.get("DomClassifier", "cc")
self.cc_tld = self.process.config.get("DomClassifier", "cc_tld")
# Send module state to logs
self.redis_logger.info("Module %s initialized" % (self.module_name))
self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message):
def compute(self, message, r_result=False):
item = Item(message)
item_content = item.get_content()
item_basename = item.get_basename()
item_date = item.get_date()
item_source = item.get_source()
try:
item_content = item_basic.get_item_content(message)
mimetype = item_basic.get_item_mimetype(message)
item_basename = item_basic.get_basename(message)
item_source = item_basic.get_source(message)
item_date = item_basic.get_item_date(message)
mimetype = item_basic.get_item_mimetype(item.get_id())
if mimetype.split('/')[0] == "text":
self.c.text(rawtext=item_content)
self.c.potentialdomain()
self.c.validdomain(passive_dns=True, extended=False)
self.redis_logger.debug(self.c.vdomain)
#self.redis_logger.debug(self.c.vdomain)
if self.c.vdomain and d4.is_passive_dns_enabled():
for dns_record in self.c.vdomain:
self.process.populate_set_out(dns_record)
self.send_message_to_queue(dns_record)
localizeddomains = self.c.include(expression=self.cc_tld)
if localizeddomains:
self.redis_logger.debug(localizeddomains)
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{message}")
localizeddomains = self.c.localizedomain(cc=self.cc)
print(localizeddomains)
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}")
localizeddomains = self.c.localizedomain(cc=self.cc)
if localizeddomains:
self.redis_logger.debug(localizeddomains)
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{message}")
print(localizeddomains)
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}")
if r_result:
return self.c.vdomain
except IOError as err:
self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
raise Exception(f"CRC Checksum Failed on: {message}")
raise Exception(f"CRC Checksum Failed on: {item.get_id()}")
if __name__ == "__main__":

View file

@ -5,10 +5,9 @@ The ZMQ_Feed_Q Module
=====================
This module is consuming the Redis-list created by the ZMQ_Feed_Q Module,
And save the paste on disk to allow others modules to work on them.
And save the item on disk to allow others modules to work on them.
..todo:: Be able to choose to delete or not the saved paste after processing.
..todo:: Store the empty paste (unprocessed) somewhere in Redis.
..todo:: Be able to choose to delete or not the saved item after processing.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
@ -17,7 +16,7 @@ Requirements
------------
*Need running Redis instances.
*Need the ZMQ_Feed_Q Module running to be able to work properly.
*Need the Mixer or the Importer Module running to be able to work properly.
"""
@ -31,36 +30,34 @@ import gzip
import os
import sys
import time
import uuid
import datetime
import redis
from pubsublogger import publisher
from hashlib import md5
from uuid import uuid4
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from Helper import Process
from lib.ConfigLoader import ConfigLoader
class Global(AbstractModule):
"""
Global module for AIL framework
"""
def __init__(self):
super(Global, self).__init__()
self.r_stats = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Statistics")
self.processed_paste = 0
# TODO rename time_1 explicitely
self.time_1 = time.time()
self.r_stats = ConfigLoader().get_redis_conn("ARDB_Statistics")
# Get and sanityze PASTE DIRECTORY
self.processed_item = 0
self.time_last_stats = time.time()
# Get and sanityze ITEM DIRECTORY
# # TODO: rename PASTE => ITEM
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], self.process.config.get("Directories", "pastes"))
self.PASTES_FOLDERS = self.PASTES_FOLDER + '/'
self.PASTES_FOLDERS = os.path.join(os.path.realpath(self.PASTES_FOLDERS), '')
@ -73,50 +70,49 @@ class Global(AbstractModule):
def computeNone(self):
difftime = time.time() - self.time_1
difftime = time.time() - self.time_last_stats
if int(difftime) > 30:
to_print = f'Global; ; ; ;glob Processed {self.processed_paste} paste(s) in {difftime} s'
to_print = f'Global; ; ; ;glob Processed {self.processed_item} item(s) in {difftime} s'
print(to_print)
self.redis_logger.debug(to_print)
self.time_1 = time.time()
self.processed_paste = 0
self.time_last_stats = time.time()
self.processed_item = 0
def compute(self, message):
def compute(self, message, r_result=False):
# Recovering the streamed message informations
splitted = message.split()
if len(splitted) == 2:
paste, gzip64encoded = splitted
item, gzip64encoded = splitted
# Remove PASTES_FOLDER from item path (crawled item + submited)
if self.PASTES_FOLDERS in paste:
paste = paste.replace(self.PASTES_FOLDERS, '', 1)
if self.PASTES_FOLDERS in item:
item = item.replace(self.PASTES_FOLDERS, '', 1)
file_name_paste = paste.split('/')[-1]
if len(file_name_paste) > 255:
new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4()))
paste = self.rreplace(paste, file_name_paste, new_file_name_paste, 1)
file_name_item = item.split('/')[-1]
if len(file_name_item) > 255:
new_file_name_item = '{}{}.gz'.format(file_name_item[:215], str(uuid4()))
item = self.rreplace(item, file_name_item, new_file_name_item, 1)
# Creating the full filepath
filename = os.path.join(self.PASTES_FOLDER, paste)
filename = os.path.join(self.PASTES_FOLDER, item)
filename = os.path.realpath(filename)
# Incorrect filename
if not os.path.commonprefix([filename, self.PASTES_FOLDER]) == self.PASTES_FOLDER:
self.redis_logger.warning(f'Global; Path traversal detected {filename}')
else:
# Decode compressed base64
decoded = base64.standard_b64decode(gzip64encoded)
new_file_content = self.gunzip_bytes_obj(decoded)
if new_file_content:
filename = self.check_filename(filename, new_file_content)
if filename:
# create subdir
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
@ -125,17 +121,18 @@ class Global(AbstractModule):
with open(filename, 'wb') as f:
f.write(decoded)
paste = filename
item_id = filename
# remove self.PASTES_FOLDER from
if self.PASTES_FOLDERS in paste:
paste = paste.replace(self.PASTES_FOLDERS, '', 1)
if self.PASTES_FOLDERS in item_id:
item_id = item_id.replace(self.PASTES_FOLDERS, '', 1)
self.process.populate_set_out(paste)
self.processed_paste+=1
self.send_message_to_queue(item_id)
self.processed_item+=1
if r_result:
return item_id
else:
# TODO Store the name of the empty paste inside a Redis-list
self.redis_logger.debug(f"Empty Paste: {message} not processed")
self.redis_logger.debug(f"Empty Item: {message} not processed")
def check_filename(self, filename, new_file_content):
@ -153,8 +150,8 @@ class Global(AbstractModule):
if curr_file_content:
# Compare file content with message content with MD5 checksums
curr_file_md5 = hashlib.md5(curr_file_content).hexdigest()
new_file_md5 = hashlib.md5(new_file_content).hexdigest()
curr_file_md5 = md5(curr_file_content).hexdigest()
new_file_md5 = md5(new_file_content).hexdigest()
if new_file_md5 != curr_file_md5:
# MD5 are not equals, verify filename
@ -162,7 +159,6 @@ class Global(AbstractModule):
filename = f'{filename[:-3]}_{new_file_md5}.gz'
else:
filename = f'{filename}_{new_file_md5}'
self.redis_logger.debug(f'new file to check: {filename}')
if os.path.isfile(filename):
@ -174,12 +170,12 @@ class Global(AbstractModule):
# Ignore duplicate checksum equals
self.redis_logger.debug(f'ignore duplicated file {filename}')
filename = None
else:
# File not unzipped
filename = None
return filename
@ -207,15 +203,13 @@ class Global(AbstractModule):
def gunzip_bytes_obj(self, bytes_obj):
gunzipped_bytes_obj = None
try:
in_ = io.BytesIO()
in_.write(bytes_obj)
in_.seek(0)
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
except Exception as e:
self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}')
@ -224,11 +218,10 @@ class Global(AbstractModule):
def rreplace(self, s, old, new, occurrence):
li = s.rsplit(old, occurrence)
return new.join(li)
if __name__ == '__main__':
module = Global()
module.run()

View file

@ -154,6 +154,8 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "D4_client" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./D4_client.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./DbCleaner.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "ModuleInformation" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModulesInformationV2.py -k 0 -c 1; read x"
@ -224,8 +226,6 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DbCleaner.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "UpdateBackground" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./update-background.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./submit_paste.py; read x"

View file

@ -147,7 +147,7 @@ class Onion(AbstractModule):
# TAG Item
msg = f'infoleak:automatic-detection="onion";{item.get_id()}'
self.send_message_to_queue('Tags', msg)
self.send_message_to_queue(msg, 'Tags')
if crawlers.is_crawler_activated():
for to_crawl in urls_to_crawl:

View file

@ -5,22 +5,19 @@
The Tags Module
================================
This module create tags.
This module add tags to an item.
"""
##################################
# Import External packages
##################################
import time
from pubsublogger import publisher
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from Helper import Process
from packages.Item import Item
from packages import Tag
@ -29,17 +26,6 @@ class Tags(AbstractModule):
Tags module for AIL framework
"""
# Channel name to forward message
out_channel_name = 'MISP_The_Hive_feeder'
# Split char in incomming message
msg_sep = ';'
# Tag object type
# TODO could be an enum in Tag class
tag_type = 'item'
def __init__(self):
super(Tags, self).__init__()
@ -51,23 +37,24 @@ class Tags(AbstractModule):
def compute(self, message):
self.redis_logger.debug(message)
if len(message.split(Tags.msg_sep)) == 2:
# Extract item ID and tag from message
tag, item_id = message.split(Tags.msg_sep)
# Extract item ID and tag from message
mess_split = message.split(';')
if len(mess_split) == 2:
tag = mess_split[0]
item = Item(mess_split[1])
# Create a new tag
Tag.add_tag(Tags.tag_type, tag, item_id)
Tag.add_tag('item', tag, item.get_id())
print(f'{item.get_id(): Tagged {tag}}')
# Forward message to channel
self.process.populate_set_out(message, Tags.out_channel_name)
self.send_message_to_queue(message, 'MISP_The_Hive_feeder')
else:
# Malformed message
raise Exception(f'too many values to unpack (expected 2) given {len(message.split(Tags.msg_sep))} with message {message}')
raise Exception(f'too many values to unpack (expected 2) given {len(mess_split)} with message {message}')
if __name__ == '__main__':
module = Tags()
module.run()

View file

@ -35,7 +35,7 @@ class AbstractModule(ABC):
# Init Redis Logger
self.redis_logger = publisher
# Port of the redis instance used by pubsublogger
self.redis_logger.port = 6380
@ -62,11 +62,11 @@ class AbstractModule(ABC):
"""
return self.process.get_from_set()
def send_message_to_queue(self, queue_name, message):
def send_message_to_queue(self, message, queue_name=None):
"""
Send message to queue
:param queue_name: queue or module name
:param message: message to send in queue
:param queue_name: queue or module name
ex: send_to_queue(item_id, 'Global')
"""

View file

@ -30,6 +30,7 @@ from item_basic import *
config_loader = ConfigLoader.ConfigLoader()
# get and sanityze PASTE DIRECTORY
# # TODO: rename PASTES_FOLDER
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
@ -574,12 +575,32 @@ class Item(AbstractObject):
def get_basename(self):
return os.path.basename(self.id)
def get_filename(self):
# Creating the full filepath
filename = os.path.join(PASTES_FOLDER, self.id)
filename = os.path.realpath(filename)
# incorrect filename
if not os.path.commonprefix([filename, PASTES_FOLDER]) == PASTES_FOLDER:
return None
else:
return filename
def get_content(self):
"""
Returns Item content
"""
return item_basic.get_item_content(self.id)
# # TODO:
def create(self):
pass
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
# TODO: DELETE ITEM CORRELATION + TAGS + METADATA + ...
def delete(self):
os.remove(self.get_filename())
# if __name__ == '__main__':
#
# item = Item('')

View file

@ -5,15 +5,22 @@ import os
import sys
import unittest
import gzip
from base64 import b64encode
sys.path.append(os.environ['AIL_BIN'])
# Modules Classes
from ApiKey import ApiKey
from Categ import Categ
from CreditCards import CreditCards
from DomClassifier import DomClassifier
from Global import Global
from Onion import Onion
# project packages
import lib.crawlers as crawlers
import packages.Item as Item
class Test_Module_ApiKey(unittest.TestCase):
@ -43,6 +50,71 @@ class Test_Module_Categ(unittest.TestCase):
result = self.module_obj.compute(item_id, r_result=True)
self.assertCountEqual(result, test_categ)
class Test_Module_CreditCards(unittest.TestCase):
def setUp(self):
self.module_obj = CreditCards()
def test_module(self):
item_id = 'tests/2021/01/01/credit_cards.gz 7'
test_cards = ['341039324930797', # American Express
'6011613905509166', # Discover Card
'3547151714018657', # Japan Credit Bureau (JCB)
'5492981206527330', # 16 digits MasterCard
'4024007132849695', # '4532525919781' # 16-digit VISA, with separators
]
result = self.module_obj.compute(item_id, r_result=True)
self.assertCountEqual(result, test_cards)
class Test_Module_DomClassifier(unittest.TestCase):
def setUp(self):
self.module_obj = DomClassifier()
def test_module(self):
item_id = 'tests/2021/01/01/domain_classifier.gz'
result = self.module_obj.compute(item_id, r_result=True)
self.assertTrue(len(result))
class Test_Module_Global(unittest.TestCase):
def setUp(self):
self.module_obj = Global()
def test_module(self):
# # TODO: delete item
item_id = 'tests/2021/01/01/global.gz'
item = Item.Item(item_id)
item.delete()
item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
item_content_1 = b64encode(gzip.compress(item_content)).decode()
item_content_2 = b64encode(gzip.compress(item_content + b' more text')).decode()
message = f'{item_id} {item_content_1}'
# Test new item
result = self.module_obj.compute(message, r_result=True)
print(result)
self.assertEqual(result, item_id)
# Test duplicate
result = self.module_obj.compute(message, r_result=True)
print(result)
self.assertIsNone(result)
# Test same id with != content
message = f'{item_id} {item_content_2}'
result = self.module_obj.compute(message, r_result=True)
print(result)
self.assertIn(result, item_id)
self.assertNotEqual(result, item_id)
# cleanup
item = Item.Item(result)
item.delete()
# # TODO: remove from queue
class Test_Module_Onion(unittest.TestCase):
def setUp(self):