diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 874108b7..6a415daa 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -78,10 +78,10 @@ function helptext { [-k | --killAll] Kill DB + Scripts [-ks | --killscript] Scripts [-u | --update] Update AIL - [-c | --crawler] LAUNCH Crawlers - [-f | --launchFeeder] LAUNCH Pystemon feeder - [-t | --thirdpartyUpdate] Update Web + [-ut | --thirdpartyUpdate] Update Web + [-t | --test] Launch Tests [-rp | --resetPassword] Reset Password + [-f | --launchFeeder] LAUNCH Pystemon feeder [-m | --menu] Display Advanced Menu [-h | --help] Help " @@ -234,34 +234,34 @@ function launching_scripts { } -function launching_crawler { - if [[ ! $iscrawler ]]; then - CONFIG=$AIL_HOME/configs/core.cfg - lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_port/{print $3;exit}' "${CONFIG}") - - IFS='-' read -ra PORTS <<< "$lport" - if [ ${#PORTS[@]} -eq 1 ] - then - first_port=${PORTS[0]} - last_port=${PORTS[0]} - else - first_port=${PORTS[0]} - last_port=${PORTS[1]} - fi - - screen -dmS "Crawler_AIL" - sleep 0.1 - - for ((i=first_port;i<=last_port;i++)); do - screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Crawler.py $i; read x" - sleep 0.1 - done - - echo -e $GREEN"\t* Launching Crawler_AIL scripts"$DEFAULT - else - echo -e $RED"\t* A screen is already launched"$DEFAULT - fi -} +# function launching_crawler { +# if [[ ! $iscrawler ]]; then +# CONFIG=$AIL_HOME/configs/core.cfg +# lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_port/{print $3;exit}' "${CONFIG}") +# +# IFS='-' read -ra PORTS <<< "$lport" +# if [ ${#PORTS[@]} -eq 1 ] +# then +# first_port=${PORTS[0]} +# last_port=${PORTS[0]} +# else +# first_port=${PORTS[0]} +# last_port=${PORTS[1]} +# fi +# +# screen -dmS "Crawler_AIL" +# sleep 0.1 +# +# for ((i=first_port;i<=last_port;i++)); do +# screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Crawler.py $i; read x" +# sleep 0.1 +# done +# +# echo -e $GREEN"\t* Launching Crawler_AIL scripts"$DEFAULT +# else +# echo -e $RED"\t* A screen is already launched"$DEFAULT +# fi +# } function shutting_down_redis { redis_dir=${AIL_HOME}/redis/src/ @@ -490,6 +490,12 @@ function update_thirdparty { fi } +function launch_tests() { + tests_dir=${AIL_HOME}/tests + bin_dir=${AIL_BIN} + python3 `which nosetests` -w $tests_dir --with-coverage --cover-package=$bin_dir -d +} + function reset_password() { echo -e "\t* Reseting UI admin password..." if checking_ardb && checking_redis; then @@ -557,9 +563,6 @@ function menu_display { Flask) launch_flask; ;; - Crawler) - launching_crawler; - ;; Killall) killall; ;; @@ -614,12 +617,12 @@ while [ "$1" != "" ]; do ;; -u | --update ) update "--manual"; ;; - -t | --thirdpartyUpdate ) update_thirdparty; + -t | --test ) launch_tests; + ;; + -ut | --thirdpartyUpdate ) update_thirdparty; ;; -rp | --resetPassword ) reset_password; ;; - -c | --crawler ) launching_crawler; - ;; -f | --launchFeeder ) launch_feeder; ;; -h | --help ) helptext; diff --git a/bin/Onion.py b/bin/Onion.py index 88ced41e..304a001d 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -126,11 +126,9 @@ class Onion(AbstractModule): # list of tuples: (url, subdomains, domain) urls_to_crawl = [] - print(message) id, score = message.split() item = Item(id) item_content = item.get_content() - item_content = 'http://33333333.kingdom7rv6wkfzn.onion?sdsd=ooooo http://2222222.kingdom7rv6wkfzn.onion' # max execution time on regex res = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item_content) @@ -145,10 +143,6 @@ class Onion(AbstractModule): domain = url_unpack['domain'].decode().lower() except Exception as e: domain = url_unpack['domain'].lower() - print('----') - print(url) - print(subdomain) - print(domain) if crawlers.is_valid_onion_domain(domain): urls_to_crawl.append((url, subdomain, domain)) @@ -164,8 +158,10 @@ class Onion(AbstractModule): if crawlers.is_crawler_activated(): for to_crawl in urls_to_crawl: + print(f'{to_crawl[2]} added to crawler queue: {to_crawl[0]}') crawlers.add_item_to_discovery_queue('onion', to_crawl[2], to_crawl[1], to_crawl[0], item.get_id()) else: + print(f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}') self.redis_logger.warning(f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}') # keep manual fetcher ???? ## Manually fetch first page if crawler is disabled @@ -176,11 +172,3 @@ if __name__ == "__main__": module = Onion() module.run() - - - - - - - -########################## diff --git a/bin/lib/ail_objects.py b/bin/lib/ail_objects.py new file mode 100755 index 00000000..565661dc --- /dev/null +++ b/bin/lib/ail_objects.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import uuid +import redis + +from abc import ABC +from flask import url_for + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +class AbstractObject(ABC): + """ + Abstract Object + """ + + # first seen last/seen ?? + # # TODO: - tags + # - handle + refactor coorelations + # - creates others objects + + def __init__(self, obj_type, id): + """ Abstract for all the AIL object + + :param obj_type: object type (item, ...) + :param id: Object ID + """ + self.id = id + self.type = obj_type + + def get_type(self): + return self.type + + def get_id(self): + return self.id + + +config_loader = ConfigLoader.ConfigLoader() +r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +config_loader = None + +def is_valid_object_type(object_type): + if object_type in ['domain', 'item', 'image', 'decoded']: + return True + else: + return False + +def get_all_objects(): + return ['domain', 'paste', 'pgp', 'cryptocurrency', 'decoded', 'screenshot'] + +def get_all_correlation_names(): + ''' + Return a list of all available correlations + ''' + return ['pgp', 'cryptocurrency', 'decoded', 'screenshot'] + +def get_all_correlation_objects(): + ''' + Return a list of all correllated objects + ''' + return ['domain', 'paste'] + +def exist_object(object_type, correlation_id, type_id=None): + if object_type == 'domain': + return Domain.verify_if_domain_exist(correlation_id) + elif object_type == 'paste' or object_type == 'item': + return Item.exist_item(correlation_id) + elif object_type == 'decoded': + return Decoded.exist_decoded(correlation_id) + elif object_type == 'pgp': + return Pgp.pgp._exist_corelation_field(type_id, correlation_id) + elif object_type == 'cryptocurrency': + return Cryptocurrency.cryptocurrency._exist_corelation_field(type_id, correlation_id) + elif object_type == 'screenshot' or object_type == 'image': + return Screenshot.exist_screenshot(correlation_id) + else: + return False + +def get_obj_date(object_type, object_id): + if object_type == "item": + return int(Item.get_item_date(object_id)) + else: + return None + +# request_type => api or ui +def get_object_metadata(object_type, correlation_id, type_id=None): + if object_type == 'domain': + return Domain.Domain(correlation_id).get_domain_metadata(tags=True) + elif object_type == 'paste' or object_type == 'item': + return Item.get_item({"id": correlation_id, "date": True, "date_separator": True, "tags": True})[0] + elif object_type == 'decoded': + return Decoded.get_decoded_metadata(correlation_id, nb_seen=True, size=True, file_type=True, tag=True) + elif object_type == 'pgp': + return Pgp.pgp.get_metadata(type_id, correlation_id) + elif object_type == 'cryptocurrency': + return Cryptocurrency.cryptocurrency.get_metadata(type_id, correlation_id) + elif object_type == 'screenshot' or object_type == 'image': + return Screenshot.get_metadata(correlation_id) + +def get_object_correlation(object_type, value, correlation_names=None, correlation_objects=None, requested_correl_type=None): + if object_type == 'domain': + return Domain.get_domain_all_correlation(value, correlation_names=correlation_names) + elif object_type == 'paste' or object_type == 'item': + return Item.get_item_all_correlation(value, correlation_names=correlation_names) + elif object_type == 'decoded': + return Decoded.get_decoded_correlated_object(value, correlation_objects=correlation_objects) + elif object_type == 'pgp': + return Pgp.pgp.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) + elif object_type == 'cryptocurrency': + return Cryptocurrency.cryptocurrency.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) + elif object_type == 'screenshot' or object_type == 'image': + return Screenshot.get_screenshot_correlated_object(value, correlation_objects=correlation_objects) + return {} + +def get_correlation_node_icon(correlation_name, correlation_type=None, value=None): + ''' + Used in UI Graph. + Return a font awesome icon for a given correlation_name. + + :param correlation_name: correlation name + :param correlation_name: str + :param correlation_type: correlation type + :type correlation_type: str, optional + + :return: a dictionnary {font awesome class, icon_code} + :rtype: dict + ''' + icon_class = 'fas' + icon_text = '' + node_color = "#332288" + node_radius = 6 + if correlation_name == "pgp": + node_color = '#44AA99' + if correlation_type == 'key': + icon_text = '\uf084' + elif correlation_type == 'name': + icon_text = '\uf507' + elif correlation_type == 'mail': + icon_text = '\uf1fa' + else: + icon_text = 'times' + + elif correlation_name == 'cryptocurrency': + node_color = '#DDCC77' + if correlation_type == 'bitcoin': + icon_class = 'fab' + icon_text = '\uf15a' + elif correlation_type == 'monero': + icon_class = 'fab' + icon_text = '\uf3d0' + elif correlation_type == 'ethereum': + icon_class = 'fab' + icon_text = '\uf42e' + else: + icon_text = '\uf51e' + + elif correlation_name == 'decoded': + node_color = '#88CCEE' + correlation_type = Decoded.get_decoded_item_type(value).split('/')[0] + if correlation_type == 'application': + icon_text = '\uf15b' + elif correlation_type == 'audio': + icon_text = '\uf1c7' + elif correlation_type == 'image': + icon_text = '\uf1c5' + elif correlation_type == 'text': + icon_text = '\uf15c' + else: + icon_text = '\uf249' + + elif correlation_name == 'screenshot' or correlation_name == 'image': + node_color = '#E1F5DF' + icon_text = '\uf03e' + + elif correlation_name == 'domain': + node_radius = 5 + node_color = '#3DA760' + if Domain.get_domain_type(value) == 'onion': + icon_text = '\uf06e' + else: + icon_class = 'fab' + icon_text = '\uf13b' + + elif correlation_name == 'paste': + node_radius = 5 + if Item.is_crawled(value): + node_color = 'red' + else: + node_color = '#332288' + + return {"icon_class": icon_class, "icon_text": icon_text, "node_color": node_color, "node_radius": node_radius} + +def get_item_url(correlation_name, value, correlation_type=None): + ''' + Warning: use only in flask + ''' + url = '#' + if correlation_name == "pgp": + endpoint = 'correlation.show_correlation' + url = url_for(endpoint, object_type="pgp", type_id=correlation_type, correlation_id=value) + elif correlation_name == 'cryptocurrency': + endpoint = 'correlation.show_correlation' + url = url_for(endpoint, object_type="cryptocurrency", type_id=correlation_type, correlation_id=value) + elif correlation_name == 'decoded': + endpoint = 'correlation.show_correlation' + url = url_for(endpoint, object_type="decoded", correlation_id=value) + elif correlation_name == 'screenshot' or correlation_name == 'image': ### # TODO: rename me + endpoint = 'correlation.show_correlation' + url = url_for(endpoint, object_type="screenshot", correlation_id=value) + elif correlation_name == 'domain': + endpoint = 'crawler_splash.showDomain' + url = url_for(endpoint, domain=value) + elif correlation_name == 'item': + endpoint = 'showsavedpastes.showsavedpaste' + url = url_for(endpoint, paste=value) + elif correlation_name == 'paste': ### # TODO: remove me + endpoint = 'showsavedpastes.showsavedpaste' + url = url_for(endpoint, paste=value) + return url + +def get_obj_tag_table_keys(object_type): + ''' + Warning: use only in flask (dynamic templates) + ''' + if object_type=="domain": + return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot + + +def create_graph_links(links_set): + graph_links_list = [] + for link in links_set: + graph_links_list.append({"source": link[0], "target": link[1]}) + return graph_links_list + +def create_graph_nodes(nodes_set, root_node_id): + graph_nodes_list = [] + for node_id in nodes_set: + correlation_name, correlation_type, value = node_id.split(';', 3) + dict_node = {"id": node_id} + dict_node['style'] = get_correlation_node_icon(correlation_name, correlation_type, value) + dict_node['text'] = value + if node_id == root_node_id: + dict_node["style"]["node_color"] = 'orange' + dict_node["style"]["node_radius"] = 7 + dict_node['url'] = get_item_url(correlation_name, value, correlation_type) + graph_nodes_list.append(dict_node) + return graph_nodes_list + +def create_node_id(correlation_name, value, correlation_type=''): + if correlation_type is None: + correlation_type = '' + return '{};{};{}'.format(correlation_name, correlation_type, value) + + + +# # TODO: filter by correlation type => bitcoin, mail, ... +def get_graph_node_object_correlation(object_type, root_value, mode, correlation_names, correlation_objects, max_nodes=300, requested_correl_type=None): + links = set() + nodes = set() + + root_node_id = create_node_id(object_type, root_value, requested_correl_type) + nodes.add(root_node_id) + + root_correlation = get_object_correlation(object_type, root_value, correlation_names, correlation_objects, requested_correl_type=requested_correl_type) + for correl in root_correlation: + if correl in ('pgp', 'cryptocurrency'): + for correl_type in root_correlation[correl]: + for correl_val in root_correlation[correl][correl_type]: + + # add correlation + correl_node_id = create_node_id(correl, correl_val, correl_type) + + if mode=="union": + if len(nodes) > max_nodes: + break + nodes.add(correl_node_id) + links.add((root_node_id, correl_node_id)) + + # get second correlation + res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects, requested_correl_type=correl_type) + if res: + for corr_obj in res: + for correl_key_val in res[corr_obj]: + #filter root value + if correl_key_val == root_value: + continue + + if len(nodes) > max_nodes: + break + new_corel_1 = create_node_id(corr_obj, correl_key_val) + new_corel_2 = create_node_id(correl, correl_val, correl_type) + nodes.add(new_corel_1) + nodes.add(new_corel_2) + links.add((new_corel_1, new_corel_2)) + + if mode=="inter": + nodes.add(correl_node_id) + links.add((root_node_id, correl_node_id)) + if correl in ('decoded', 'screenshot', 'domain', 'paste'): + for correl_val in root_correlation[correl]: + + correl_node_id = create_node_id(correl, correl_val) + if mode=="union": + if len(nodes) > max_nodes: + break + nodes.add(correl_node_id) + links.add((root_node_id, correl_node_id)) + + res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects) + if res: + for corr_obj in res: + if corr_obj in ('decoded', 'domain', 'paste', 'screenshot'): + for correl_key_val in res[corr_obj]: + #filter root value + if correl_key_val == root_value: + continue + + if len(nodes) > max_nodes: + break + new_corel_1 = create_node_id(corr_obj, correl_key_val) + new_corel_2 = create_node_id(correl, correl_val) + nodes.add(new_corel_1) + nodes.add(new_corel_2) + links.add((new_corel_1, new_corel_2)) + + if mode=="inter": + nodes.add(correl_node_id) + links.add((root_node_id, correl_node_id)) + + if corr_obj in ('pgp', 'cryptocurrency'): + for correl_key_type in res[corr_obj]: + for correl_key_val in res[corr_obj][correl_key_type]: + #filter root value + if correl_key_val == root_value: + continue + + if len(nodes) > max_nodes: + break + new_corel_1 = create_node_id(corr_obj, correl_key_val, correl_key_type) + new_corel_2 = create_node_id(correl, correl_val) + nodes.add(new_corel_1) + nodes.add(new_corel_2) + links.add((new_corel_1, new_corel_2)) + + if mode=="inter": + nodes.add(correl_node_id) + links.add((root_node_id, correl_node_id)) + + + return {"nodes": create_graph_nodes(nodes, root_node_id), "links": create_graph_links(links)} + + +def get_obj_global_id(obj_type, obj_id, obj_sub_type=None): + if obj_sub_type: + return '{}:{}:{}'.format(obj_type, obj_sub_type, obj_id) + else: + # # TODO: remove me + if obj_type=='paste': + obj_type='item' + # # TODO: remove me + if obj_type=='screenshot': + obj_type='image' + + return '{}:{}'.format(obj_type, obj_id) + +######## API EXPOSED ######## +def sanitize_object_type(object_type): + if not is_valid_object_type(object_type): + return ({'status': 'error', 'reason': 'Incorrect object_type'}, 400) +######## ######## diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index ee0b7379..81837e39 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -843,6 +843,21 @@ def get_all_queues_stats(): dict_stats[queue_type] = get_stats_elem_to_crawl_by_queue_type(queue_type) return dict_stats +def is_domain_in_queue(queue_type, domain): + return r_serv_onion.sismember(f'{queue_type}_domain_crawler_queue', domain) + +def is_item_in_queue(queue_type, url, item_id, queue_name=None): + if queue_name is None: + queues = get_all_queues_keys() + else: + queues = get_queue_key_by_name(queue_name) + + key = f'{url};{item_id}' + for queue in queues: + if r_serv_onion.sismember(queue.format(queue_type), key): + return True + return False + def add_item_to_discovery_queue(queue_type, domain, subdomain, url, item_id): date_month = datetime.now().strftime("%Y%m") date = datetime.now().strftime("%Y%m%d") @@ -868,6 +883,17 @@ def add_item_to_discovery_queue(queue_type, domain, subdomain, url, item_id): r_serv_onion.sadd(f'{queue_type}_crawler_queue', msg) print(f'sent to queue: {subdomain}') +def queue_test_clean_up(queue_type, domain, item_id): + date_month = datetime.now().strftime("%Y%m") + r_serv_onion.srem(f'month_{queue_type}_up:{date_month}', domain) + + # Clean up + r_serv_onion.srem(f'{queue_type}_domain_crawler_queue', domain) + msg = f'{domain};{item_id}' + r_serv_onion.srem(f'{queue_type}_crawler_discovery_queue', msg) + r_serv_onion.srem(f'{queue_type}_crawler_queue', msg) + + def remove_task_from_crawler_queue(queue_name, queue_type, key_to_remove): r_serv_onion.srem(queue_name.format(queue_type), key_to_remove) @@ -1417,7 +1443,7 @@ def test_ail_crawlers(): #### ---- #### -if __name__ == '__main__': +#if __name__ == '__main__': # res = get_splash_manager_version() # res = test_ail_crawlers() # res = is_test_ail_crawlers_successful() diff --git a/samples/2021/01/01/keys_certificat_sample.gz b/samples/2021/01/01/keys_certificat_sample.gz new file mode 100644 index 00000000..d3427e10 Binary files /dev/null and b/samples/2021/01/01/keys_certificat_sample.gz differ diff --git a/samples/2021/01/01/onion.gz b/samples/2021/01/01/onion.gz new file mode 100644 index 00000000..6972d697 Binary files /dev/null and b/samples/2021/01/01/onion.gz differ diff --git a/tests/test_modules.py b/tests/test_modules.py new file mode 100644 index 00000000..23fa03be --- /dev/null +++ b/tests/test_modules.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import sys +import unittest + +sys.path.append(os.environ['AIL_BIN']) + +# Modules Classes +from Onion import Onion + +# projects packages +import lib.crawlers as crawlers + +class Test_Module_Onion(unittest.TestCase): + + def setUp(self): + self.module_obj = Onion() + + def test_module(self): + item_id = 'tests/2021/01/01/onion.gz' + domain_1 = 'eswpccgr5xyovsahffkehgleqthrasfpfdblwbs4lstd345dwq5qumqd.onion' + domain_2 = 'www.facebookcorewwwi.onion' + crawlers.queue_test_clean_up('onion', domain_1, 'tests/2021/01/01/onion.gz') + + self.module_obj.compute(f'{item_id} 3') + if crawlers.is_crawler_activated(): + ## check domain queues + # all domains queue + self.assertTrue(crawlers.is_domain_in_queue('onion', domain_1)) + # all url/item queue + self.assertTrue(crawlers.is_item_in_queue('onion', f'http://{domain_1}', item_id)) + # domain blacklist + self.assertFalse(crawlers.is_domain_in_queue('onion', domain_2)) + # invalid onion + self.assertFalse(crawlers.is_domain_in_queue('onion', 'invalid.onion')) + + # clean DB + crawlers.queue_test_clean_up('onion', domain_1, 'tests/2021/01/01/onion.gz') + else: + # # TODO: check warning logs + pass