diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 19e82fc8..2ca79326 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -210,6 +210,10 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Tracker_Yara" bash -c "cd ${AIL_BIN}/trackers; ${ENV_PY} ./Tracker_Yara.py; read x" sleep 0.1 + screen -S "Script_AIL" -X screen -t "Retro_Hunt" bash -c "cd ${AIL_BIN}/trackers; ${ENV_PY} ./Retro_Hunt.py; read x" + sleep 0.1 + screen -S "Script_AIL" -X screen -t "Retro_Hunt" bash -c "cd ${AIL_BIN}/trackers; ${ENV_PY} ./Retro_Hunt.py; read x" + sleep 0.1 ################################## # DISABLED MODULES # diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index 6907d42e..c733cdd3 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -20,6 +20,8 @@ import ConfigLoader import item_basic config_loader = ConfigLoader.ConfigLoader() +r_cache = config_loader.get_redis_conn("Redis_Cache") + r_serv_db = config_loader.get_redis_conn("ARDB_DB") r_serv_tracker = config_loader.get_redis_conn("ARDB_Tracker") config_loader = None @@ -139,12 +141,14 @@ def get_tracker_sparkline(tracker_uuid, num_day=6): sparklines_value.append(int(nb_seen_this_day)) return sparklines_value -def add_tracked_item(tracker_uuid, item_id, item_date): +def add_tracked_item(tracker_uuid, item_id): + item_date = item_basic.get_item_date(id) # track item res = r_serv_tracker.sadd(f'tracker:item:{tracker_uuid}:{item_date}', item_id) # track nb item by date if res == 1: - r_serv_tracker.zadd('tracker:stat:{}'.format(tracker_uuid), item_date, int(item_date)) + r_serv_tracker.zincrby('tracker:stat:{}'.format(tracker_uuid), int(item_date), 1) + def get_email_subject(tracker_uuid): tracker_description = get_tracker_description(tracker_uuid) @@ -563,12 +567,526 @@ def api_get_default_rule_content(default_yara_rule): ##-- YARA --## -if __name__ == '__main__': +###################### +#### RETRO - HUNT #### + +# state: pending/running/completed/paused + +# task keys: +## tracker:retro_hunt:task:{task_uuid} state +# start_time +# end_time +# date_from +# date_to +# creator +# timeout +# date +# type + +## ? ? ? +# set tags +# set mails +# limit mail + +# SET Retro Hunts + +def get_all_retro_hunt_tasks(): + return r_serv_tracker.smembers('tracker:retro_hunt:task:all') + +def get_all_pending_retro_hunt_tasks(): + return r_serv_tracker.smembers('tracker:retro_hunt:task:pending') + +def get_all_running_retro_hunt_tasks(): + return r_serv_tracker.smembers('tracker:retro_hunt:task:running') + +def get_all_paused_retro_hunt_tasks(): + return r_serv_tracker.smembers('tracker:retro_hunt:task:paused') + +## Change STATES ## + +def get_all_completed_retro_hunt_tasks(): + return r_serv_tracker.smembers('tracker:retro_hunt:task:completed') + +def get_retro_hunt_task_to_start(): + task_uuid = r_serv_tracker.spop('tracker:retro_hunt:task:pending') + if task_uuid: + set_retro_hunt_task_state(task_uuid, 'running') + return task_uuid + +def pause_retro_hunt_task(task_uuid): + set_retro_hunt_task_state(task_uuid, 'paused') + r_cache.hset(f'tracker:retro_hunt:task:{task_uuid}', 'pause', time.time()) + +def check_retro_hunt_pause(task_uuid): + is_paused = r_cache.hget(f'tracker:retro_hunt:task:{task_uuid}', 'pause') + if is_paused: + return True + else: + return False + +def resume_retro_hunt_task(task_uuid): + r_cache.hdel(f'tracker:retro_hunt:task:{task_uuid}', 'pause') + set_retro_hunt_task_state(task_uuid, 'pending') + +## Metadata ## + +def get_retro_hunt_task_name(task_uuid): + return r_serv_tracker.hget(f'tracker:retro_hunt:task:{task_uuid}', 'name') + +def get_retro_hunt_task_state(task_uuid): + return r_serv_tracker.hget(f'tracker:retro_hunt:task:{task_uuid}', 'state') + +def set_retro_hunt_task_state(task_uuid, new_state): + curr_state = get_retro_hunt_task_state(task_uuid) + if curr_state: + r_serv_tracker.srem(f'tracker:retro_hunt:task:{curr_state}', task_uuid) + r_serv_tracker.sadd(f'tracker:retro_hunt:task:{new_state}', task_uuid) + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'state', new_state) + +def get_retro_hunt_task_type(task_uuid): + return r_serv_tracker(f'tracker:retro_hunt:task:{task_uuid}', 'type') + +# # TODO: yararule +def get_retro_hunt_task_rule(task_uuid, r_compile=False): + #rule_type = 'yara' + rule = r_serv_tracker.hget(f'tracker:retro_hunt:task:{task_uuid}', 'rule') + if r_compile: + #if rule_type == 'yara' + rule = os.path.join(get_yara_rules_dir(), rule) + rule_dict = {task_uuid : os.path.join(get_yara_rules_dir(), rule)} + rule = yara.compile(filepaths=rule_dict) + return rule + +def get_retro_hunt_task_timeout(task_uuid): + res = r_serv_tracker.hget(f'tracker:retro_hunt:task:{task_uuid}', 'timeout') + if res: + return int(res) + else: + return 30 # # TODO: FIXME use instance limit + +def get_retro_hunt_task_date_from(task_uuid): + return r_serv_tracker.hget(f'tracker:retro_hunt:task:{task_uuid}', 'date_from') + +def get_retro_hunt_task_date_to(task_uuid): + return r_serv_tracker.hget(f'tracker:retro_hunt:task:{task_uuid}', 'date_to') + +def get_retro_hunt_task_creator(task_uuid): + return r_serv_tracker.hget(f'tracker:retro_hunt:task:{task_uuid}', 'creator') + +def get_retro_hunt_last_analyzed(task_uuid): + return r_serv_tracker.hget(f'tracker:retro_hunt:task:{task_uuid}', 'last') + +# Keep history to relaunch on error/pause +def set_retro_hunt_last_analyzed(task_uuid, last_id): + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'last', last_id) + +def get_retro_hunt_task_sources(task_uuid, r_sort=False): + sources = r_serv_tracker.smembers(f'tracker:retro_hunt:task:sources:{task_uuid}') + if not sources: + sources = set(item_basic.get_all_items_sources(filter_dir=False)) + if r_sort: + sources = sorted(sources) + return sources + +def get_retro_hunt_task_tags(task_uuid): + return r_serv_tracker.smembers(f'tracker:retro_hunt:task:tags:{task_uuid}') + +def get_retro_hunt_task_mails(task_uuid): + return r_serv_tracker.smembers(f'tracker:retro_hunt:task:mails:{task_uuid}') + +# # TODO: ADD TYPE + TIMEOUT +def get_retro_hunt_task_metadata(task_uuid, date=False, progress=False, creator=False, sources=None, tags=None, description=False, nb_match=False): + task_metadata = {'uuid': task_uuid} + task_metadata['state'] = get_retro_hunt_task_state(task_uuid) + task_metadata['name'] = get_retro_hunt_task_name(task_uuid) + task_metadata['rule'] = get_retro_hunt_task_rule(task_uuid) + if creator: + task_metadata['creator'] = get_retro_hunt_task_creator(task_uuid) + if date: + task_metadata['date'] = r_serv_tracker.hget(f'tracker:retro_hunt:task:{task_uuid}', 'date') + task_metadata['date_from'] = get_retro_hunt_task_date_from(task_uuid) + task_metadata['date_to'] = get_retro_hunt_task_date_to(task_uuid) + if description: + task_metadata['description'] = r_serv_tracker.hget(f'tracker:retro_hunt:task:{task_uuid}', 'description') + if nb_match: + task_metadata['nb_match'] = get_retro_hunt_nb_match(task_uuid) + if progress: + task_metadata['progress'] = get_retro_hunt_task_progress(task_uuid) + if sources: + task_metadata['sources'] = get_retro_hunt_task_sources(task_uuid, r_sort=True) + if tags: + task_metadata['tags'] = get_retro_hunt_task_tags(task_uuid) + return task_metadata + +def get_all_retro_hunt_tasks_with_metadata(): + l_retro_hunt = [] + for task_uuid in get_all_retro_hunt_tasks(): + l_retro_hunt.append(get_retro_hunt_task_metadata(task_uuid, date=True, progress=True, tags=True, nb_match=True)) + return l_retro_hunt + +def get_retro_hunt_task_progress(task_uuid): + if get_retro_hunt_task_state(task_uuid) == 'completed': + progress = 100 + else: + progress = r_cache.hget(f'tracker:retro_hunt:task:{task_uuid}', 'progress') + if not progress: + progress = compute_retro_hunt_task_progress(task_uuid) + return progress + +def set_cache_retro_hunt_task_progress(task_uuid, progress): + r_cache.hset(f'tracker:retro_hunt:task:{task_uuid}', 'progress', progress) + +def set_cache_retro_hunt_task_id(task_uuid, id): + r_cache.hset(f'tracker:retro_hunt:task:{task_uuid}', 'id', id) + +def clear_retro_hunt_task_cache(task_uuid): + r_cache.delete(f'tracker:retro_hunt:task:{task_uuid}') + +# Others + +# date +# type +# tags +# mails +# name +# description + +# # # TODO: TYPE +def create_retro_hunt_task(name, rule, date_from, date_to, creator, sources=[], tags=[], mails=[], timeout=30, description=None, task_uuid=None): + if not task_uuid: + task_uuid = str(uuid.uuid4()) + + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'name', escape(name)) + + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'rule', rule) + + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'date', datetime.date.today().strftime("%Y%m%d")) + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'date_from', date_from) + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'date_to', date_to) + + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'creator', creator) + if description: + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'description', description) + if timeout: + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'timeout', int(timeout)) + for source in sources: + r_serv_tracker.sadd(f'tracker:retro_hunt:task:sources:{task_uuid}', escape(source)) + for tag in tags: + r_serv_tracker.sadd(f'tracker:retro_hunt:task:tags:{task_uuid}', escape(tag)) + for mail in mails: + r_serv_tracker.sadd(f'tracker:retro_hunt:task:mails:{task_uuid}', escape(mail)) + + r_serv_tracker.sadd('tracker:retro_hunt:task:all', task_uuid) + # add to pending tasks + r_serv_tracker.sadd('tracker:retro_hunt:task:pending', task_uuid) + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'state', 'pending') + return task_uuid + +# # TODO: delete rule +def delete_retro_hunt_task(task_uuid): + if r_serv_tracker.sismember('tracker:retro_hunt:task:running', task_uuid): + return None + + r_serv_tracker.srem('tracker:retro_hunt:task:pending', task_uuid) + r_serv_tracker.delete(f'tracker:retro_hunt:task:{task_uuid}') + r_serv_tracker.delete(f'tracker:retro_hunt:task:sources:{task_uuid}') + r_serv_tracker.delete(f'tracker:retro_hunt:task:tags:{task_uuid}') + r_serv_tracker.delete(f'tracker:retro_hunt:task:mails:{task_uuid}') + + for item_date in get_retro_hunt_all_item_dates(task_uuid): + r_serv_tracker.delete(f'tracker:retro_hunt:task:item:{task_uuid}:{item_date}') + + r_serv_tracker.srem('tracker:retro_hunt:task:all', task_uuid) + r_serv_tracker.srem('tracker:retro_hunt:task:pending', task_uuid) + r_serv_tracker.srem('tracker:retro_hunt:task:paused', task_uuid) + r_serv_tracker.srem('tracker:retro_hunt:task:completed', task_uuid) + + clear_retro_hunt_task_cache(task_uuid) + return task_uuid + +def get_retro_hunt_task_current_date(task_uuid): + last = get_retro_hunt_last_analyzed(task_uuid) + if last: + curr_date = item_basic.get_item_date(last) + else: + curr_date = get_retro_hunt_task_date_from(task_uuid) + return curr_date + +def get_retro_hunt_task_nb_src_done(task_uuid, sources=[]): + if not sources: + sources = list(get_retro_hunt_task_sources(task_uuid, r_sort=True)) + else: + sources = list(sources) + last_id = get_retro_hunt_last_analyzed(task_uuid) + if last_id: + last_source = item_basic.get_source(last_id) + try: + nb_src_done = sources.index(last_source) + except ValueError: + nb_src_done = 0 + else: + nb_src_done = 0 + return nb_src_done + +def get_retro_hunt_dir_day_to_analyze(task_uuid, date, filter_last=False, sources=[]): + if not sources: + sources = get_retro_hunt_task_sources(task_uuid, r_sort=True) + + # filter last + if filter_last: + last = get_retro_hunt_last_analyzed(task_uuid) + if last: + curr_source = item_basic.get_source(last) + # remove processed sources + set_sources = sources.copy() + for source in sources: + if source != curr_source: + set_sources.remove(source) + else: + break + sources = set_sources + + # return all dirs by day + date = f'{date[0:4]}/{date[4:6]}/{date[6:8]}' + dirs = set() + for source in sources: + dirs.add(os.path.join(source, date)) + return dirs + +# # TODO: move me +def get_items_to_analyze(dir, last=None): + full_dir = os.path.join(os.environ['AIL_HOME'], 'PASTES', dir) # # TODO: # FIXME: use item config dir + if os.path.isdir(full_dir): + all_items = sorted([os.path.join(dir, f) for f in os.listdir(full_dir) if os.path.isfile(os.path.join(full_dir, f))]) + # remove processed items + if last: + items_set = all_items.copy() + for item in all_items: + if item != last: + items_set.remove(item) + else: + break + all_items = items_set + return all_items + else: + return [] + +def compute_retro_hunt_task_progress(task_uuid, date_from=None, date_to=None, sources=[], curr_date=None, nb_src_done=0): + # get nb days + if not date_from: + date_from = get_retro_hunt_task_date_from(task_uuid) + if not date_to: + date_to = get_retro_hunt_task_date_to(task_uuid) + nb_days = Date.get_nb_days_by_daterange(date_from, date_to) + + # nb days completed + if not curr_date: + curr_date = get_retro_hunt_task_current_date(task_uuid) + nb_days_done = Date.get_nb_days_by_daterange(date_from, curr_date) - 1 + + # sources + if not sources: + nb_sources = len(get_retro_hunt_task_sources(task_uuid)) + else: + nb_sources = len(sources) + + # get progress + progress = ((nb_days_done * nb_sources) + nb_src_done) * 100 / (nb_days * nb_sources) + return int(progress) + + # # TODO: # FIXME: # Cache + +def save_retro_hunt_match(task_uuid, id, object_type='item'): + item_date = item_basic.get_item_date(id) + res = r_serv_tracker.sadd(f'tracker:retro_hunt:task:item:{task_uuid}:{item_date}', id) + # track nb item by date + if res == 1: + r_serv_tracker.zincrby(f'tracker:retro_hunt:task:stat:{task_uuid}', int(item_date), 1) + +def get_retro_hunt_all_item_dates(task_uuid): + return r_serv_tracker.zrange(f'tracker:retro_hunt:task:stat:{task_uuid}', 0, -1) + +def get_retro_hunt_nb_match(task_uuid): + nb_match = r_serv_tracker.hget(f'tracker:retro_hunt:task:{task_uuid}', 'nb_match') + if not nb_match: + l_date_value = r_serv_tracker.zrange(f'tracker:retro_hunt:task:stat:{task_uuid}', 0, -1, withscores=True) + nb_match = 0 + for tuple in l_date_value: + nb_match += int(tuple[1]) + return int(nb_match) + +def set_retro_hunt_nb_match(task_uuid): + l_date_value = r_serv_tracker.zrange(f'tracker:retro_hunt:task:stat:{task_uuid}', 0, -1, withscores=True) + nb_match = 0 + for tuple in l_date_value: + nb_match += int(tuple[1]) + r_serv_tracker.hset(f'tracker:retro_hunt:task:{task_uuid}', 'nb_match', nb_match) + +def get_retro_hunt_items_by_daterange(task_uuid, date_from, date_to): + all_item_id = set() + if date_from and date_to: + l_date_match = r_serv_tracker.zrange(f'tracker:retro_hunt:task:stat:{task_uuid}', 0, -1, withscores=True) + if l_date_match: + dict_date_match = dict(l_date_match) + for date_day in Date.substract_date(date_from, date_to): + if date_day in dict_date_match: + all_item_id |= r_serv_tracker.smembers(f'tracker:retro_hunt:task:item:{task_uuid}:{date_day}') + return all_item_id + +def get_retro_hunt_nb_item_by_day(l_task_uuid, date_from=None, date_to=None): + list_stats = [] + for task_uuid in l_task_uuid: + dict_task_data = [] + retro_name = get_retro_hunt_task_name(task_uuid) + l_date_match = r_serv_tracker.zrange(f'tracker:retro_hunt:task:stat:{task_uuid}', 0, -1, withscores=True) + if l_date_match: + dict_date_match = dict(l_date_match) + if not date_from: + date_from = min(dict_date_match) + if not date_to: + date_to = max(dict_date_match) + + date_range = Date.substract_date(date_from, date_to) + for date_day in date_range: + nb_seen_this_day = int(dict_date_match.get(date_day, 0)) + dict_task_data.append({"date": date_day,"value": int(nb_seen_this_day)}) + list_stats.append({"name": retro_name,"Data": dict_task_data}) + return list_stats + +## API ## +def api_check_retro_hunt_task_uuid(task_uuid): + if not is_valid_uuid_v4(task_uuid): + return ({"status": "error", "reason": "Invalid uuid"}, 400) + if not r_serv_tracker.exists(f'tracker:retro_hunt:task:{task_uuid}'): + return ({"status": "error", "reason": "Unknown uuid"}, 404) + return None + +def api_get_retro_hunt_items(dict_input): + task_uuid = dict_input.get('uuid', None) + res = api_check_retro_hunt_task_uuid(task_uuid) + if res: + return res + + date_from = dict_input.get('date_from', None) + date_to = dict_input.get('date_to', None) + if date_from is None: + date_from = get_retro_hunt_task_date_from(task_uuid) + if date_from: + date_from = date_from[0] + if date_to is None: + date_to = date_from + if date_from > date_to: + date_from = date_to + + all_items_id = get_retro_hunt_items_by_daterange(task_uuid, date_from, date_to) + all_items_id = item_basic.get_all_items_metadata_dict(all_items_id) + + res_dict = {} + res_dict['uuid'] = task_uuid + res_dict['date_from'] = date_from + res_dict['date_to'] = date_to + res_dict['items'] = all_items_id + return (res_dict, 200) + +def api_pause_retro_hunt_task(task_uuid): + res = api_check_retro_hunt_task_uuid(task_uuid) + if res: + return res + task_state = get_retro_hunt_task_state(task_uuid) + if task_state not in ['pending', 'running']: + return ({"status": "error", "reason": f"Task {task_uuid} not paused, current state: {task_state}"}, 400) + pause_retro_hunt_task(task_uuid) + return (task_uuid, 200) + +def api_resume_retro_hunt_task(task_uuid): + res = api_check_retro_hunt_task_uuid(task_uuid) + if res: + return res + task_state = get_retro_hunt_task_state(task_uuid) + if not r_serv_tracker.sismember('tracker:retro_hunt:task:paused', task_uuid): + return ({"status": "error", "reason": f"Task {task_uuid} not paused, current state: {get_retro_hunt_task_state(task_uuid)}"}, 400) + resume_retro_hunt_task(task_uuid) + return (task_uuid, 200) + +def api_validate_rule_to_add(rule, rule_type): + if rule_type=='yara_custom': + if not is_valid_yara_rule(rule): + return ({"status": "error", "reason": "Invalid custom Yara Rule"}, 400) + elif rule_type=='yara_default': + if not is_valid_default_yara_rule(rule): + return ({"status": "error", "reason": "The Yara Rule doesn't exist"}, 400) + else: + return ({"status": "error", "reason": "Incorrect type"}, 400) + return ({"status": "success", "rule": rule, "type": rule_type}, 200) + +def api_create_retro_hunt_task(dict_input, creator): + # # TODO: API: check mandatory arg + # # TODO: TIMEOUT + + # timeout=30 + rule = dict_input.get('rule', None) + if not rule: + return ({"status": "error", "reason": "Retro Hunt Rile not provided"}, 400) + task_type = dict_input.get('type', None) + if not task_type: + return ({"status": "error", "reason": "type not provided"}, 400) + + # # TODO: limit + name = dict_input.get('name', '') + name = escape(name) + name = name[:60] + # # TODO: limit + description = dict_input.get('description', '') + description = escape(description) + description = description[:1000] + + res = api_validate_rule_to_add(rule , task_type) + if res[1]!=200: + return res + + tags = dict_input.get('tags', []) + mails = dict_input.get('mails', []) + res = verify_mail_list(mails) + if res: + return res + + sources = dict_input.get('sources', []) + res = item_basic.verify_sources_list(sources) + if res: + return res + + date_from = dict_input.get('date_from', '') + date_to = dict_input.get('date_to', '') + res = Date.api_validate_str_date_range(date_from, date_to) + if res: + return res + + task_uuid = str(uuid.uuid4()) + + # RULE + rule = save_yara_rule(task_type, rule, tracker_uuid=task_uuid) + task_type = 'yara' + + task_uuid = create_retro_hunt_task(name, rule, date_from, date_to, creator, sources=sources, + tags=tags, mails=mails, timeout=30, description=description, task_uuid=task_uuid) + + return ({'name': name, 'rule': rule, 'type': task_type, 'uuid': task_uuid}, 200) + +def api_delete_retro_hunt_task(task_uuid): + res = api_check_retro_hunt_task_uuid(task_uuid) + if res: + return res + if r_serv_tracker.sismember('tracker:retro_hunt:task:running', task_uuid): + return ({"status": "error", "reason": "You can't delete a running task"}, 400) + else: + return (delete_retro_hunt_task(task_uuid), 200) + +#if __name__ == '__main__': #res = is_valid_yara_rule('rule dummy { }') # res = create_tracker('test', 'word', 'admin@admin.test', 1, [], [], None, sources=['crawled', 'pastebin.com', 'rt/pastebin.com']) - res = create_tracker('circl\.lu', 'regex', 'admin@admin.test', 1, [], [], None, sources=['crawled','pastebin.com']) - print(res) + #res = create_tracker('circl\.lu', 'regex', 'admin@admin.test', 1, [], [], None, sources=['crawled','pastebin.com']) + #print(res) #t_uuid = '1c2d35b0-9330-4feb-b454-da13007aa9f7' #res = get_tracker_sources('ail-yara-rules/rules/crypto/certificate.yar', 'yara') @@ -578,4 +1096,29 @@ if __name__ == '__main__': # Term.delete_term('074ab4be-6049-45b5-a20e-8125a4e4f500') + #res = get_items_to_analyze('archive/pastebin.com_pro/2020/05/15', last='archive/pastebin.com_pro/2020/05/15/zkHEgqjQ.gz') + #get_retro_hunt_task_progress('0', nb_src_done=2) + + #res = set_cache_retro_hunt_task_progress('0', 100) + #res = get_retro_hunt_task_nb_src_done('0', sources=['pastebin.com_pro', 'alerts/pastebin.com_pro', 'crawled']) + #print(res) + + # sources = ['pastebin.com_pro', 'alerts/pastebin.com_pro', 'crawled'] + # rule = 'custom-rules/4a8a3d04-f0b6-43ce-8e00-bdf47a8df241.yar' + # name = 'retro_hunt_test_1' + # description = 'circl retro hunt first test' + # tags = ['retro_circl', 'circl'] + # creator = 'admin@admin.test' + # date_from = '20200610' + # date_to = '20210630' + + #res = create_retro_hunt_task(name, rule, date_from, date_to, creator, sources=sources, tags=tags, description=description) + + + #get_retro_hunt_nb_item_by_day(['80b402ef-a8a9-4e97-adb6-e090edcfd571'], date_from=None, date_to=None, num_day=31) + + #res = get_retro_hunt_nb_item_by_day(['c625f971-16e6-4331-82a7-b1e1b9efdec1'], date_from='20200610', date_to='20210630') + + #res = delete_retro_hunt_task('598687b6-f765-4f8b-861a-09ad76d0ab34') + #print(res) diff --git a/bin/lib/item_basic.py b/bin/lib/item_basic.py index 5991c70d..dde052e2 100755 --- a/bin/lib/item_basic.py +++ b/bin/lib/item_basic.py @@ -7,6 +7,9 @@ import gzip import magic +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +import Tag + sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader @@ -41,7 +44,8 @@ def get_basename(item_id): return os.path.basename(item_id) def get_source(item_id): - return item_id.split('/')[-5] + l_source = item_id.split('/')[:-4] + return os.path.join(*l_source) # # TODO: add an option to check the tag def is_crawled(item_id): @@ -204,7 +208,7 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filt return l_sources_name -def get_all_items_sources(filter_dir=True, r_list=False): +def get_all_items_sources(filter_dir=False, r_list=False): res = _get_dir_source_name(PASTES_FOLDER, filter_dir=filter_dir) if r_list: res = list(res) @@ -217,6 +221,12 @@ def verify_sources_list(sources): return ({'status': 'error', 'reason': 'Invalid source', 'value': source}, 400) return None +def get_all_items_metadata_dict(list_id): + list_meta = [] + for item_id in list_id: + list_meta.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_obj_tag(item_id)} ) + return list_meta + ##-- --## diff --git a/bin/packages/Date.py b/bin/packages/Date.py index 840c9f8e..99e984dc 100644 --- a/bin/packages/Date.py +++ b/bin/packages/Date.py @@ -110,6 +110,13 @@ def get_previous_date_list(num_day): date_list.append(date.substract_day(i)) return list(reversed(date_list)) +def get_nb_days_by_daterange(date_from, date_to): + date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) + date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8])) + delta = date_to - date_from # timedelta + return len(range(delta.days + 1)) + + def substract_date(date_from, date_to): date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8])) @@ -129,6 +136,13 @@ def validate_str_date(str_date, separator=''): except TypeError: return False +def api_validate_str_date_range(date_from, date_to, separator=''): + is_date = validate_str_date(date_from, separator=separator) and validate_str_date(date_from, separator=separator) + if not is_date: + return ({"status": "error", "reason": "Invalid Date"}, 400) + if int(date_from) > int(date_to): + return ({"status": "error", "reason": "Invalid Date range, Date from > Date to"}, 400) + def sanitise_date_range(date_from, date_to, separator='', date_type='str'): ''' Check/Return a correct date_form and date_to diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index c8b681bd..f1ec8ef7 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -26,6 +26,10 @@ publish = Redis_D4_client [D4_client] subscribe = Redis_D4_client +[Retro_Hunt] +subscribe = Redis +publish = Redis_Tags + [Tracker_Term] subscribe = Redis_Global publish = Redis_Tags diff --git a/bin/trackers/Retro_Hunt.py b/bin/trackers/Retro_Hunt.py new file mode 100755 index 00000000..23d40058 --- /dev/null +++ b/bin/trackers/Retro_Hunt.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +The Retro_Hunt trackers module +=================== + +""" + +################################## +# Import External packages +################################## +import os +import re +import sys +import time +import yara + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from modules.abstract_module import AbstractModule +from packages.Item import Item +from packages.Item import Date +from lib import Tracker + +import NotificationHelper # # TODO: refractor + +class Retro_Hunt(AbstractModule): + + #mail_body_template = "AIL Framework,\nNew YARA match: {}\nitem id: {}\nurl: {}{}" + + """ + Retro_Hunt module for AIL framework + """ + def __init__(self): + super(Retro_Hunt, self).__init__() + self.pending_seconds = 5 + + self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id=" + + self.refresh_deleta = 10 + self.last_refresh = 0 + + # reset on each loop + self.task_uuid = None + self.date_from = 0 + self.date_to = 0 + self.nb_src_done = 0 + self.progress = 0 + self.item = None + + self.redis_logger.info(f"Module: {self.module_name} Launched") + + + # # TODO: send mails + # # TODO: # start_time + # end_time + + def compute(self, task_uuid): + print(task_uuid) + self.task_uuid = task_uuid + self.progress = 0 + # First launch + # restart + rule = Tracker.get_retro_hunt_task_rule(task_uuid, r_compile=True) + timeout = Tracker.get_retro_hunt_task_timeout(task_uuid) + sources = Tracker.get_retro_hunt_task_sources(task_uuid, r_sort=True) + + self.date_from = Tracker.get_retro_hunt_task_date_from(task_uuid) + self.date_to = Tracker.get_retro_hunt_task_date_to(task_uuid) + self.tags = Tracker.get_retro_hunt_task_tags(task_uuid) + curr_date = Tracker.get_retro_hunt_task_current_date(task_uuid) + self.nb_src_done = Tracker.get_retro_hunt_task_nb_src_done(task_uuid, sources=sources) + self.progress = self.update_progress(sources, curr_date) + # iterate on date + filter_last = True + while int(curr_date) <= int(self.date_to): + print(curr_date) + dirs_date = Tracker.get_retro_hunt_dir_day_to_analyze(task_uuid, curr_date, filter_last=filter_last, sources=sources) + filter_last = False + nb_id = 0 + self.nb_src_done = 0 + self.update_progress(sources, curr_date) + # # TODO: Filter previous item + for dir in dirs_date: + print(dir) + l_obj = Tracker.get_items_to_analyze(dir) + for id in l_obj: + #print(f'{dir} / {id}') + self.item = Item(id) + # save current item in cache + Tracker.set_cache_retro_hunt_task_id(task_uuid, id) + + yara_match = rule.match(data=self.item.get_content(), callback=self.yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=timeout) + + # save last item + if nb_id % 10 == 0: # # TODO: Add nb before save in DB + Tracker.set_retro_hunt_last_analyzed(task_uuid, id) + nb_id += 1 + self.update_progress(sources, curr_date) + + # PAUSE + self.update_progress(sources, curr_date) + if Tracker.check_retro_hunt_pause(task_uuid): + Tracker.set_retro_hunt_last_analyzed(task_uuid, id) + #self.update_progress(sources, curr_date, save_db=True) + Tracker.pause_retro_hunt_task(task_uuid) + Tracker.clear_retro_hunt_task_cache(task_uuid) + return None + + self.nb_src_done += 1 + self.update_progress(sources, curr_date) + curr_date = Date.date_add_day(curr_date) + print('-----') + + self.update_progress(sources, curr_date) + + Tracker.set_retro_hunt_task_state(task_uuid, 'completed') + Tracker.set_retro_hunt_nb_match(task_uuid) + Tracker.clear_retro_hunt_task_cache(task_uuid) + + + # # TODO: stop + + def update_progress(self, sources, curr_date, save_db=False): + progress = Tracker.compute_retro_hunt_task_progress(self.task_uuid, date_from=self.date_from, date_to=self.date_to, + sources=sources, curr_date=curr_date, nb_src_done=self.nb_src_done) + if self.progress != progress: + Tracker.set_cache_retro_hunt_task_progress(self.task_uuid, progress) + self.progress = progress + # if save_db: + # Tracker.set_retro_hunt_task_progress(task_uuid, progress) + + def yara_rules_match(self, data): + #print(data) + + task_uuid = data['namespace'] + id = self.item.get_id() + + Tracker.save_retro_hunt_match(task_uuid, id) + + # Tags + for tag in self.tags: + msg = f'{tag};{id}' + self.send_message_to_queue(msg, 'Tags') + + # # Mails + # mail_to_notify = Tracker.get_tracker_mails(tracker_uuid) + # if mail_to_notify: + # mail_subject = Tracker.get_email_subject(tracker_uuid) + # mail_body = Tracker_Yara.mail_body_template.format(data['rule'], item_id, self.full_item_url, item_id) + # for mail in mail_to_notify: + # self.redis_logger.debug(f'Send Mail {mail_subject}') + # print(f'Send Mail {mail_subject}') + # NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body) + return yara.CALLBACK_CONTINUE + + def run(self): + """ + Run Module endless process + """ + + # Endless loop processing messages from the input queue + while self.proceed: + task_uuid = Tracker.get_retro_hunt_task_to_start() + if task_uuid: + # Module processing with the message from the queue + self.redis_logger.debug(task_uuid) + #try: + self.compute(task_uuid) + #except Exception as err: + # self.redis_logger.error(f'Error in module {self.module_name}: {err}') + # # Remove uuid ref + # self.remove_submit_uuid(uuid) + else: + # Wait before next process + self.redis_logger.debug(f'{self.module_name}, waiting for new message, Idling {self.pending_seconds}s') + time.sleep(self.pending_seconds) + + +if __name__ == '__main__': + + module = Retro_Hunt() + module.run() diff --git a/bin/trackers/Tracker_Regex.py b/bin/trackers/Tracker_Regex.py index a5b047f3..93eedbbb 100755 --- a/bin/trackers/Tracker_Regex.py +++ b/bin/trackers/Tracker_Regex.py @@ -79,9 +79,7 @@ class Tracker_Regex(AbstractModule): if tracker_sources and item_source not in tracker_sources: continue - item_date = item.get_date() - - Tracker.add_tracked_item(tracker_uuid, item_id, item_date) + Tracker.add_tracked_item(tracker_uuid, item_id) tags_to_add = Tracker.get_tracker_tags(tracker_uuid) for tag in tags_to_add: diff --git a/bin/trackers/Tracker_Term.py b/bin/trackers/Tracker_Term.py index fa9553ac..53673859 100755 --- a/bin/trackers/Tracker_Term.py +++ b/bin/trackers/Tracker_Term.py @@ -111,7 +111,7 @@ class Tracker_Term(AbstractModule): if nb_uniq_word >= nb_words_threshold: self.new_term_found(word_set, 'set', item.get_id(), item_date, item_source) - def new_term_found(self, term, term_type, item_id, item_date, item_source): + def new_term_found(self, term, term_type, item_id, item_source): uuid_list = Term.get_term_uuid_list(term, term_type) self.redis_logger.info(f'new tracked term found: {term} in {item_id}') print(f'new tracked term found: {term} in {item_id}') @@ -120,7 +120,7 @@ class Tracker_Term(AbstractModule): tracker_sources = Tracker.get_tracker_uuid_sources(term_uuid) if not tracker_sources or item_source in tracker_sources: print(not tracker_sources or item_source in tracker_sources) - Tracker.add_tracked_item(term_uuid, item_id, item_date) + Tracker.add_tracked_item(term_uuid, item_id) tags_to_add = Term.get_term_tags(term_uuid) for tag in tags_to_add: diff --git a/bin/trackers/Tracker_Yara.py b/bin/trackers/Tracker_Yara.py index af16e2a0..8384b7c8 100755 --- a/bin/trackers/Tracker_Yara.py +++ b/bin/trackers/Tracker_Yara.py @@ -78,8 +78,7 @@ class Tracker_Yara(AbstractModule): print(f'Source Filtering: {data["rule"]}') return yara.CALLBACK_CONTINUE - item_date = self.item.get_date() - Tracker.add_tracked_item(tracker_uuid, item_id, item_date) + Tracker.add_tracked_item(tracker_uuid, item_id) # Tags tags_to_add = Tracker.get_tracker_tags(tracker_uuid) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index bce50bb3..32fc2823 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -43,6 +43,7 @@ from blueprints.correlation import correlation from blueprints.tags_ui import tags_ui from blueprints.import_export import import_export from blueprints.objects_item import objects_item +from blueprints.hunters import hunters from blueprints.old_endpoints import old_endpoints @@ -100,6 +101,7 @@ app.register_blueprint(correlation, url_prefix=baseUrl) app.register_blueprint(tags_ui, url_prefix=baseUrl) app.register_blueprint(import_export, url_prefix=baseUrl) app.register_blueprint(objects_item, url_prefix=baseUrl) +app.register_blueprint(hunters, url_prefix=baseUrl) app.register_blueprint(old_endpoints, url_prefix=baseUrl) # ========= =========# diff --git a/var/www/blueprints/hunters.py b/var/www/blueprints/hunters.py new file mode 100644 index 00000000..1cdde3b7 --- /dev/null +++ b/var/www/blueprints/hunters.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys +import json +import random + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, make_response +from flask_login import login_required, current_user, login_user, logout_user + +sys.path.append('modules') +import Flask_config + +# Import Role_Manager +from Role_Manager import create_user_db, check_password_strength, check_user_role_integrity +from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) +import item_basic +import Tracker + +bootstrap_label = Flask_config.bootstrap_label + +# ============ BLUEPRINT ============ +hunters = Blueprint('hunters', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/hunter')) + +# ============ VARIABLES ============ + + + +# ============ FUNCTIONS ============ +def api_validator(api_response): + if api_response: + return Response(json.dumps(api_response[0], indent=2, sort_keys=True), mimetype='application/json'), api_response[1] + +def create_json_response(data, status_code): + return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code + +# ============= ROUTES ============== + +@hunters.route('/retro_hunt/tasks', methods=['GET']) +@login_required +@login_read_only +def retro_hunt_all_tasks(): + retro_hunts = Tracker.get_all_retro_hunt_tasks_with_metadata() + return render_template("retro_hunt_tasks.html", retro_hunts=retro_hunts, bootstrap_label=bootstrap_label) + +@hunters.route('/retro_hunt/task/show', methods=['GET']) +@login_required +@login_read_only +def retro_hunt_show_task(): + task_uuid = request.args.get('uuid', None) + + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + if date_from: + date_from = date_from.replace('-', '') + if date_to: + date_to = date_to.replace('-', '') + + res = Tracker.api_check_retro_hunt_task_uuid(task_uuid) + if res: + return create_json_response(res[0], res[1]) + + dict_task = Tracker.get_retro_hunt_task_metadata(task_uuid, date=True, progress=True, creator=True, + sources=True, tags=True, description=True) + rule_content = Tracker.get_yara_rule_content(dict_task['rule']) + + if date_from: + res = Tracker.api_get_retro_hunt_items({'uuid': task_uuid, 'date_from': date_from, 'date_to': date_to}) + if res[1] != 200: + return create_json_response(res[0], res[1]) + dict_task['items'] = res[0]['items'] + dict_task['date_from_input'] = res[0]['date_from'] + dict_task['date_to_input'] = res[0]['date_to'] + else: + dict_task['items'] = [] + dict_task['date_from_input'] = dict_task['date_from'] + dict_task['date_to_input'] = dict_task['date_to'] + + return render_template("show_retro_hunt.html", dict_task=dict_task, + rule_content=rule_content, + bootstrap_label=bootstrap_label) + + +@hunters.route('/retro_hunt/task/add', methods=['GET', 'POST']) +@login_required +@login_analyst +def retro_hunt_add_task(): + if request.method == 'POST': + name = request.form.get("name", '') + description = request.form.get("description", '') + timeout = request.form.get("timeout", 30) + tags = request.form.get("tags", []) + if tags: + tags = tags.split() + # mails = request.form.get("mails", []) + # if mails: + # mails = mails.split() + + sources = request.form.get("sources", []) + if sources: + sources = json.loads(sources) + + date_from = request.form.get('date_from') + date_to = request.form.get('date_to') + if date_from: + date_from = date_from.replace('-', '') + if date_to: + date_to = date_to.replace('-', '') + + # YARA # + yara_default_rule = request.form.get("yara_default_rule") + yara_custom_rule = request.form.get("yara_custom_rule") + if yara_custom_rule: + rule = yara_custom_rule + rule_type='yara_custom' + else: + rule = yara_default_rule + rule_type='yara_default' + + user_id = current_user.get_id() + + input_dict = {"name": name, "description": description, "creator": user_id, + "rule": rule, "type": rule_type, + "tags": tags, "sources": sources, "timeout": timeout, #"mails": mails, + "date_from": date_from, "date_to": date_to} + + res = Tracker.api_create_retro_hunt_task(input_dict, user_id) + if res[1] == 200: + return redirect(url_for('hunters.retro_hunt_all_tasks')) + else: + ## TODO: use modal + return create_json_response(res[0], res[1]) + else: + return render_template("add_retro_hunt_task.html", + all_yara_files=Tracker.get_all_default_yara_files(), + all_sources=item_basic.get_all_items_sources(r_list=True)) + +@hunters.route('/retro_hunt/task/pause', methods=['GET']) +@login_required +@login_analyst +def retro_hunt_pause_task(): + task_uuid = request.args.get('uuid', None) + res = Tracker.api_pause_retro_hunt_task(task_uuid) + if res[1] != 200: + return create_json_response(res[0], res[1]) + return redirect(url_for('hunters.retro_hunt_all_tasks')) + +@hunters.route('/retro_hunt/task/resume', methods=['GET']) +@login_required +@login_analyst +def retro_hunt_resume_task(): + task_uuid = request.args.get('uuid', None) + res = Tracker.api_resume_retro_hunt_task(task_uuid) + if res[1] != 200: + return create_json_response(res[0], res[1]) + return redirect(url_for('hunters.retro_hunt_all_tasks')) + +@hunters.route('/retro_hunt/task/delete', methods=['GET']) +@login_required +@login_analyst +def retro_hunt_delete_task(): + task_uuid = request.args.get('uuid', None) + res = Tracker.api_delete_retro_hunt_task(task_uuid) + if res[1] != 200: + return create_json_response(res[0], res[1]) + return redirect(url_for('hunters.retro_hunt_all_tasks')) + + +#### JSON #### + +@hunters.route("/tracker/get_json_retro_hunt_nb_items_by_date", methods=['GET']) +@login_required +@login_read_only +def get_json_retro_hunt_nb_items_by_date(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + + if date_from: + date_from = date_from.replace('-', '') + if date_to: + date_to = date_to.replace('-', '') + + task_uuid = request.args.get('uuid') + + if date_from and date_to: + res = Tracker.get_retro_hunt_nb_item_by_day([task_uuid], date_from=date_from, date_to=date_to) + else: + res = Term.get_retro_hunt_nb_item_by_day([task_uuid]) + return jsonify(res) + + +## - - ## diff --git a/var/www/modules/hunter/Flask_hunter.py b/var/www/modules/hunter/Flask_hunter.py index e9dd97d1..a04c7286 100644 --- a/var/www/modules/hunter/Flask_hunter.py +++ b/var/www/modules/hunter/Flask_hunter.py @@ -28,9 +28,6 @@ import Flask_config app = Flask_config.app baseUrl = Flask_config.baseUrl -r_serv_term = Flask_config.r_serv_term -r_serv_cred = Flask_config.r_serv_cred -r_serv_db = Flask_config.r_serv_db bootstrap_label = Flask_config.bootstrap_label hunter = Blueprint('hunter', __name__, template_folder='templates') diff --git a/var/www/templates/hunter/add_retro_hunt_task.html b/var/www/templates/hunter/add_retro_hunt_task.html new file mode 100644 index 00000000..d172a6b7 --- /dev/null +++ b/var/www/templates/hunter/add_retro_hunt_task.html @@ -0,0 +1,228 @@ + + + +
+Name | +Date | +Nb Matches | ++ | + |
---|---|---|---|---|
+
+
+ {{ dict_task['name']}}
+
+
+
+ {% for tag in dict_task['tags'] %}
+
+ {{ tag }}
+
+ {% endfor %}
+
+ |
+ {{ dict_task['date'][0:4]}}/{{ dict_task['date'][4:6]}}/{{ dict_task['date'][6:8]}} | +
+ {{dict_task['nb_match']}}+ |
+ + {%if dict_task['state']=='paused'%} + + + + {%endif%} + {%if dict_task['state']=='running' or dict_task['state']=='pending'%} + + + + + {%endif%} + | + ++ + {%if dict_task['state']=='pending'%} + pending + {%elif dict_task['state']=='completed'%} + completed + {%elif dict_task['state']=='paused'%} + paused [{{ dict_task['progress']}}%] + {%elif dict_task['state']=='running'%} + running [{{ dict_task['progress']}}%] + {%endif%} + + | +
Date | ++ {{dict_task['date'][0:4]}}/{{dict_task['date'][4:6]}}/{{dict_task['date'][6:8]}} + | +
Search date | ++ {{dict_task['date_from'][0:4]}}/{{dict_task['date_from'][4:6]}}/{{dict_task['date_from'][6:8]}} - + {{dict_task['date_to'][0:4]}}/{{dict_task['date_to'][4:6]}}/{{dict_task['date_to'][6:8]}} + | +
Description | +{{dict_task['description']}} | +
Tags | ++ {%for tag in dict_task['tags']%} + + {{ tag }} + + {%endfor%} + | +
Creator | +{{dict_task['creator']}} | +
Sources | +
+
+ {%if not dict_task['sources']%}
+ All Souces
+ {%else%}
+ {%for source in dict_task['sources']%}
+ {{source}}
+ {%endfor%}
+ {%endif%}
+
+ |
+
{{ rule_content }}+ +
Date | +Item Id | +
---|---|
+ {{item['date'][0:4]}}/{{item['date'][4:6]}}/{{item['date'][6:8]}} + | +
+
+ {{ item['id'] }}
+
+
+ {% for tag in item['tags'] %}
+
+ {{ tag }}
+
+ {% endfor %}
+
+ |
+