chg: [Tracker term] track terms by sources

This commit is contained in:
Terrtia 2021-06-14 17:36:30 +02:00
parent eb7981cccf
commit a17ab90511
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
5 changed files with 102 additions and 49 deletions

View file

@ -74,7 +74,7 @@ def get_tracker_level(tracker_uuid):
def get_tracker_user_id(tracker_uuid): def get_tracker_user_id(tracker_uuid):
return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'user_id') return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'user_id')
def get_tracker_uuid_list(tracker, tracker_type): def get_tracker_uuid_list(tracker, tracker_type): ######################################################### USE ME
return list(r_serv_tracker.smembers('all:tracker_uuid:{}:{}'.format(tracker_type, tracker))) return list(r_serv_tracker.smembers('all:tracker_uuid:{}:{}'.format(tracker_type, tracker)))
def get_tracker_tags(tracker_uuid): def get_tracker_tags(tracker_uuid):
@ -83,6 +83,9 @@ def get_tracker_tags(tracker_uuid):
def get_tracker_mails(tracker_uuid): def get_tracker_mails(tracker_uuid):
return list(r_serv_tracker.smembers('tracker:mail:{}'.format(tracker_uuid))) return list(r_serv_tracker.smembers('tracker:mail:{}'.format(tracker_uuid)))
def get_tracker_uuid_sources(tracker_uuid):
return list(r_serv_tracker.smembers(f'tracker:sources:{tracker_uuid}'))
def get_tracker_description(tracker_uuid): def get_tracker_description(tracker_uuid):
return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'description') return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'description')
@ -133,10 +136,11 @@ def get_tracker_sparkline(tracker_uuid, num_day=6):
def add_tracked_item(tracker_uuid, item_id, item_date): def add_tracked_item(tracker_uuid, item_id, item_date):
# track item # track item
r_serv_tracker.sadd('tracker:item:{}:{}'.format(tracker_uuid, item_date), item_id) res = r_serv_tracker.sadd(f'tracker:item:{tracker_uuid}:{item_date}', item_id)
# track nb item by date # track nb item by date
r_serv_tracker.zadd('tracker:stat:{}'.format(tracker_uuid), item_date, int(item_date)) if res == 1:
r_serv_tracker.zadd('tracker:stat:{}'.format(tracker_uuid), item_date, int(item_date))
bin/lib/Tracker.py
def get_email_subject(tracker_uuid): def get_email_subject(tracker_uuid):
tracker_description = get_tracker_description(tracker_uuid) tracker_description = get_tracker_description(tracker_uuid)
if not tracker_description: if not tracker_description:
@ -150,6 +154,10 @@ def get_tracker_last_updated_by_type(tracker_type):
epoch_update = 0 epoch_update = 0
return float(epoch_update) return float(epoch_update)
# # TODO: check type API
def trigger_trackers_refresh(tracker_type):
r_serv_tracker.set(f'tracker:refresh:{tracker_type}', time.time())
###################### ######################
#### TRACKERS ACL #### #### TRACKERS ACL ####
@ -235,7 +243,7 @@ def api_validate_tracker_to_add(tracker , tracker_type, nb_words=1):
return ({"status": "error", "reason": "Incorrect type"}, 400) return ({"status": "error", "reason": "Incorrect type"}, 400)
return ({"status": "success", "tracker": tracker, "type": tracker_type}, 200) return ({"status": "success", "tracker": tracker, "type": tracker_type}, 200)
def create_tracker(tracker, tracker_type, user_id, level, tags, mails, description, dashboard=0, tracker_uuid=None): def create_tracker(tracker, tracker_type, user_id, level, tags, mails, description, dashboard=0, tracker_uuid=None, sources=[]):
# edit tracker # edit tracker
if tracker_uuid: if tracker_uuid:
edit_tracker = True edit_tracker = True
@ -255,7 +263,7 @@ def create_tracker(tracker, tracker_type, user_id, level, tags, mails, descripti
# YARA # YARA
if tracker_type == 'yara_custom' or tracker_type == 'yara_default': if tracker_type == 'yara_custom' or tracker_type == 'yara_default':
# delete yara rule # create yara rule
if tracker_type == 'yara_default' and old_type == 'yara': if tracker_type == 'yara_default' and old_type == 'yara':
if not is_default_yara_rule(old_tracker): if not is_default_yara_rule(old_tracker):
filepath = get_yara_rule_file_by_tracker_name(old_tracker) filepath = get_yara_rule_file_by_tracker_name(old_tracker)
@ -318,6 +326,11 @@ def create_tracker(tracker, tracker_type, user_id, level, tags, mails, descripti
for mail in mails: for mail in mails:
r_serv_tracker.sadd('tracker:mail:{}'.format(tracker_uuid), escape(mail) ) r_serv_tracker.sadd('tracker:mail:{}'.format(tracker_uuid), escape(mail) )
# create tracker sources filter
for source in sources:
# escape source ?
r_serv_tracker.sadd(f'tracker:sources:{tracker_uuid}', escape(source) )
# toggle refresh module tracker list/set # toggle refresh module tracker list/set
r_serv_tracker.set('tracker:refresh:{}'.format(tracker_type), time.time()) r_serv_tracker.set('tracker:refresh:{}'.format(tracker_type), time.time())
if tracker_type != old_type: # toggle old type refresh if tracker_type != old_type: # toggle old type refresh
@ -346,6 +359,7 @@ def api_add_tracker(dict_input, user_id):
res = verify_mail_list(mails) res = verify_mail_list(mails)
if res: if res:
return res return res
sources = dict_input.get('sources', [])
## TODO: add dashboard key ## TODO: add dashboard key
level = dict_input.get('level', 1) level = dict_input.get('level', 1)
@ -371,7 +385,7 @@ def api_add_tracker(dict_input, user_id):
if is_tracker_in_user_level(tracker, tracker_type, user_id) and not tracker_uuid: if is_tracker_in_user_level(tracker, tracker_type, user_id) and not tracker_uuid:
return ({"status": "error", "reason": "Tracker already exist"}, 409) return ({"status": "error", "reason": "Tracker already exist"}, 409)
tracker_uuid = create_tracker(tracker , tracker_type, user_id, level, tags, mails, description, tracker_uuid=tracker_uuid) tracker_uuid = create_tracker(tracker , tracker_type, user_id, level, tags, mails, description, tracker_uuid=tracker_uuid, sources=sources)
return ({'tracker': tracker, 'type': tracker_type, 'uuid': tracker_uuid}, 200) return ({'tracker': tracker, 'type': tracker_type, 'uuid': tracker_uuid}, 200)
@ -407,10 +421,12 @@ def get_all_default_yara_rules_by_type(yara_types):
else: else:
return [] return []
def get_all_tracked_yara_files(): def get_all_tracked_yara_files(filter_disabled=False):
yara_files = r_serv_tracker.smembers('all:tracker:yara') yara_files = r_serv_tracker.smembers('all:tracker:yara')
if not yara_files: if not yara_files:
yara_files = [] yara_files = []
if filter_disabled:
pass
return yara_files return yara_files
def reload_yara_rules(): def reload_yara_rules():
@ -424,6 +440,22 @@ def reload_yara_rules():
rules = yara.compile(filepaths=rule_dict) rules = yara.compile(filepaths=rule_dict)
return rules return rules
# # TODO:
# Avoid useless CHECK
# Empty list == ALL SOURCES
# FIXME MOOVE ME
def get_tracker_sources(tracker, tracker_type):
l_sources = set()
for tracker_uuid in get_tracker_uuid_list(tracker, tracker_type):
sources = get_tracker_uuid_sources(tracker_uuid)
if sources:
for source in get_tracker_uuid_sources(tracker_uuid):
l_sources.add(source)
else:
l_sources = []
break
return l_sources
def is_valid_yara_rule(yara_rule): def is_valid_yara_rule(yara_rule):
try: try:
yara.compile(source=yara_rule) yara.compile(source=yara_rule)
@ -518,5 +550,14 @@ def api_get_default_rule_content(default_yara_rule):
##-- YARA --## ##-- YARA --##
if __name__ == '__main__': if __name__ == '__main__':
res = is_valid_yara_rule('rule dummy { }') #res = is_valid_yara_rule('rule dummy { }')
# res = create_tracker('test', 'word', 'admin@admin.test', 1, [], [], None, sources=['crawled', 'pastebin.com', 'rt/pastebin.com'])
res = create_tracker('test', 'word', 'admin@admin.test', 1, [], [], None)
# print(res)
t_uuid = '1c2d35b0-9330-4feb-b454-da13007aa9f7'
res = get_tracker_sources('test', 'word')
print(res) print(res)

View file

@ -64,7 +64,8 @@ def get_item_content(item_id):
item_content = f.read().decode() item_content = f.read().decode()
r_cache.set(item_full_path, item_content) r_cache.set(item_full_path, item_content)
r_cache.expire(item_full_path, 300) r_cache.expire(item_full_path, 300)
except: except Exception as e:
print(e)
item_content = '' item_content = ''
return str(item_content) return str(item_content)
@ -176,7 +177,7 @@ def add_map_obj_id_item_id(obj_id, item_id, obj_type):
##-- --## ##-- --##
## COMMON ## ## COMMON ##
def _get_dir_source_name(directory, source_name=None, l_sources_name=set()): def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filter_dir=False):
if source_name: if source_name:
l_dir = os.listdir(os.path.join(directory, source_name)) l_dir = os.listdir(os.path.join(directory, source_name))
else: else:
@ -188,12 +189,16 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set()):
else: else:
for src_name in l_dir: for src_name in l_dir:
if len(src_name) == 4: if len(src_name) == 4:
try: #try:
int(src_name) int(src_name)
l_sources_name.add(os.path.join(source_name)) to_add = os.path.join(source_name)
return l_sources_name # filter sources, remove first directory
except: if filter_dir:
pass to_add = to_add.replace('archive/', '').replace('alerts/', '')
l_sources_name.add(to_add)
return l_sources_name
#except:
# pass
if source_name: if source_name:
src_name = os.path.join(source_name, src_name) src_name = os.path.join(source_name, src_name)
l_sources_name = _get_dir_source_name(directory, source_name=src_name, l_sources_name=l_sources_name) l_sources_name = _get_dir_source_name(directory, source_name=src_name, l_sources_name=l_sources_name)

View file

@ -570,7 +570,9 @@ class Item(AbstractObject):
""" """
Returns Item source/feeder name Returns Item source/feeder name
""" """
return item_basic.get_source(self.id) #return self.id.split('/')[-5]
l_source = self.id.split('/')[:-4]
return os.path.join(*l_source)
def get_basename(self): def get_basename(self):
return os.path.basename(self.id) return os.path.basename(self.id)
@ -605,11 +607,8 @@ class Item(AbstractObject):
except FileNotFoundError: except FileNotFoundError:
return False return False
# if __name__ == '__main__': #if __name__ == '__main__':
#
# item = Item('')
# res = item.get_date(separator=True)
# print(res)
# import Domain # import Domain
# domain = Domain.Domain('domain.onion') # domain = Domain.Domain('domain.onion')

View file

@ -319,6 +319,9 @@ def delete_term(term_uuid):
# remove mails # remove mails
r_serv_term.delete('tracker:mail:{}'.format(term_uuid)) r_serv_term.delete('tracker:mail:{}'.format(term_uuid))
# remove sources
r_serv_term.delete('tracker:sources:{}'.format(term_uuid))
# remove item set # remove item set
all_item_date = r_serv_term.zrange('tracker:stat:{}'.format(term_uuid), 0, -1) all_item_date = r_serv_term.zrange('tracker:stat:{}'.format(term_uuid), 0, -1)
for date in all_item_date: for date in all_item_date:

View file

@ -90,46 +90,51 @@ class Tracker_Term(AbstractModule):
# create token statistics # create token statistics
#for word in dict_words_freq: #for word in dict_words_freq:
# Term.create_token_statistics(item_date, word, dict_words_freq[word]) # Term.create_token_statistics(item_date, word, dict_words_freq[word])
item_source = item.get_source()
# check solo words # check solo words
####### # TODO: check if source needed #######
for word in self.list_tracked_words: for word in self.list_tracked_words:
if word in dict_words_freq: if word in dict_words_freq:
self.new_term_found(word, 'word', item.get_id(), item_date) self.new_term_found(word, 'word', item.get_id(), item_date, item_source)
# check words set # check words set
for elem in self.set_tracked_words_list: for elem in self.set_tracked_words_list:
list_words = elem[0] list_words = elem[0]
nb_words_threshold = elem[1] nb_words_threshold = elem[1]
word_set = elem[2] word_set = elem[2]
nb_uniq_word = 0 nb_uniq_word = 0
for word in list_words: for word in list_words:
if word in dict_words_freq: if word in dict_words_freq:
nb_uniq_word += 1 nb_uniq_word += 1
if nb_uniq_word >= nb_words_threshold: if nb_uniq_word >= nb_words_threshold:
self.new_term_found(word_set, 'set', item.get_id(), item_date) self.new_term_found(word_set, 'set', item.get_id(), item_date, item_source)
def new_term_found(self, term, term_type, item_id, item_date): def new_term_found(self, term, term_type, item_id, item_date, item_source):
uuid_list = Term.get_term_uuid_list(term, term_type) uuid_list = Term.get_term_uuid_list(term, term_type)
self.redis_logger.info(f'new tracked term found: {term} in {item_id}') self.redis_logger.info(f'new tracked term found: {term} in {item_id}')
print(f'new tracked term found: {term} in {item_id}') print(f'new tracked term found: {term} in {item_id}')
for term_uuid in uuid_list: for term_uuid in uuid_list:
Term.add_tracked_item(term_uuid, item_id, item_date) tracker_sources = Tracker.get_tracker_uuid_sources(term_uuid)
if not tracker_sources or item_source in tracker_sources:
print(not tracker_sources or item_source in tracker_sources)
Tracker.add_tracked_item(term_uuid, item_id, item_date)
tags_to_add = Term.get_term_tags(term_uuid) tags_to_add = Term.get_term_tags(term_uuid)
for tag in tags_to_add: for tag in tags_to_add:
msg = '{};{}'.format(tag, item_id) msg = '{};{}'.format(tag, item_id)
self.send_message_to_queue(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')
mail_to_notify = Term.get_term_mails(term_uuid) mail_to_notify = Term.get_term_mails(term_uuid)
if mail_to_notify: if mail_to_notify:
mail_subject = Tracker.get_email_subject(term_uuid) mail_subject = Tracker.get_email_subject(term_uuid)
mail_body = Tracker_Term.mail_body_template.format(term, item_id, self.full_item_url, item_id) mail_body = Tracker_Term.mail_body_template.format(term, item_id, self.full_item_url, item_id)
for mail in mail_to_notify: for mail in mail_to_notify:
self.redis_logger.debug(f'Send Mail {mail_subject}') self.redis_logger.debug(f'Send Mail {mail_subject}')
print(f'Send Mail {mail_subject}') print(f'S print(item_content)end Mail {mail_subject}')
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body) NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
if __name__ == '__main__': if __name__ == '__main__':