From 28320a32a6e2e3c02ffa8b3f08bd1b3a36541a55 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 6 Aug 2019 17:03:49 +0200 Subject: [PATCH 01/12] chg: [term] refractor + add new tracked word/set --- OVERVIEW.md | 59 +++++++ bin/TermTrackerMod.py | 58 +++++++ bin/packages/Term.py | 206 +++++++++++++++++++++++ doc/README.md | 88 ++++++++++ var/www/modules/restApi/Flask_restApi.py | 9 + 5 files changed, 420 insertions(+) create mode 100755 bin/TermTrackerMod.py create mode 100755 bin/packages/Term.py diff --git a/OVERVIEW.md b/OVERVIEW.md index f4ee12ec..77339321 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -109,8 +109,56 @@ Redis and ARDB overview | **uuid**:ltags | **tag** | | **uuid**:ltagsgalaxies | **tag** | +## DB2 - New TermFreq: + +##### Term Tracker metadata: +| Hset - Key | Field | Value | +| ------ | ------ | ------ | +| tracked_term:**uuid** | tracked | **tacked word/set/regex** | +| | type | **term/set/regex** | +| | date | **date added** | +| | user_id | **created by user_id** | +| | dashboard | **0/1 Display alert on dashboard** | +| | level | **0/1 Tracker visibility** | + +##### Term Tracked by user_id (visibility level: user only): +| Set - Key | Value | +| ------ | ------ | +| user:tracked_term:**user_id** | **uuid - tracked term uuid** | + +##### Global Term Tracked (visibility level: all users): +| Set - Key | Value | +| ------ | ------ | +| gobal:tracked_term | **uuid - tracked term uuid** | + +##### All Term Tracked by type: +| Set - Key | Value | +| ------ | ------ | +| all:tracked_term:**word/set/regex - term type** | **tracked term** | + +| Set - Key | Value | +| ------ | ------ | +| all:tracked_term_uuid:**tracked term** | **uuid - tracked term uuid** | + +##### All Term Tracked items: +| Set - Key | Value | +| ------ | ------ | +| tracked_term:item:**uuid** | **item_id** | + +##### All Term Tracked tags: +| Set - Key | Value | +| ------ | ------ | +| tracked_term:tags:**uuid** | **tag** | + +##### All Term Tracked tags: +| Set - Key | Value | +| ------ | ------ | +| tracked_term:mail:**uuid** | **mail** | + ## DB2 - TermFreq: +##### Set: + ##### Set: | Key | Value | | ------ | ------ | @@ -118,6 +166,17 @@ Redis and ARDB overview | TrackedSetSet | **tracked_set** | | TrackedRegexSet | **tracked_regex** | | | | +| | | +| global:TrackedSetTermSet | **tracked_term** | +| global:TrackedSetSet | **tracked_set** | +| global:TrackedRegexSet | **tracked_regex** | +| | | +| | | +| user:**user_id**:TrackedSetTermSet | **tracked_term** | +| user:**user_id**:TrackedSetSet | **tracked_set** | +| user:**user_id**:TrackedRegexSet | **tracked_regex** | +| | | +| | | | tracked_**tracked_term** | **item_path** | | set_**tracked_set** | **item_path** | | regex_**tracked_regex** | **item_path** | diff --git a/bin/TermTrackerMod.py b/bin/TermTrackerMod.py new file mode 100755 index 00000000..2d0458b5 --- /dev/null +++ b/bin/TermTrackerMod.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +The TermTracker Module +=================== + +""" +import os +import sys +import time + +from packages import Paste +from packages import Term + +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) +import Flask_config + +r_serv_term = Flask_config.r_serv_term + +# loads tracked words +list_tracked_words = Term.get_tracked_words_list() +set_tracked_words_list = Term.get_set_tracked_words_list() + +def new_term_found(term, term_type): + uuid_list = get_term_uuid_list() + email_notification = [] + tags = [] + + for term_uuid in uuid_list: + pass + + +if __name__ == "__main__": + + item_id = 'submitted/2019/08/02/cc1900ed-6051-473a-ba7a-850a17d0cc02.gz' + #item_id = 'submitted/2019/08/02/0a52d82d-a89d-4004-9535-8a0bc9c1ce49.gz' + paste = Paste.Paste(item_id) + res = Term.parse_tracked_term_to_add('test zorro meroio apple weert', 'word') + + ''' + dict_words_freq = Term.get_text_word_frequency(paste.get_p_content()) + + # check solo words + for word in list_tracked_words: + if word in dict_words_freq: + pass + # tag + get uuids ... + + # check words set + for list_words, nb_words_threshold in set_tracked_words_list: + nb_uniq_word = 0 + for word in list_words: + if word in dict_words_freq: + nb_uniq_word += 1 + if nb_uniq_word > nb_words_threshold: + # tag + get uuid + pass + ''' diff --git a/bin/packages/Term.py b/bin/packages/Term.py new file mode 100755 index 00000000..0902d56f --- /dev/null +++ b/bin/packages/Term.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import uuid +import redis +import datetime + +from collections import defaultdict + +from nltk.tokenize import RegexpTokenizer +from textblob import TextBlob + +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) +import Flask_config + +r_serv_term = Flask_config.r_serv_term + +special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\') +special_characters.add('\\s') + +# NLTK tokenizer +tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', + gaps=True, discard_empty=True) + +def get_text_word_frequency(item_content, filtering=True): + item_content = item_content.lower() + words_dict = defaultdict(int) + + if filtering: + blob = TextBlob(item_content , tokenizer=tokenizer) + else: + blob = TextBlob(item_content) + for word in blob.tokens: + words_dict[word] += 1 + print(words_dict) + return words_dict + +# # TODO: create all tracked words +def get_tracked_words_list(): + return list(r_serv_term.smembers('all:tracked_term:word')) + +def get_set_tracked_words_list(): + set_list = r_serv_term.smembers('all:tracked_term:set') + all_set_list = [] + for elem in set_list: + elem = elem.split(';') + num_words = int(elem[1]) + ter_set = elem[0].split(',') + all_set_list.append((ter_set, num_words)) + +def parse_json_term_to_add(dict_input): + term = dict_input.get('term', None) + if not term: + return ({"status": "error", "reason": "Term not provided"}, 400) + term_type = dict_input.get('term', None) + if not term_type: + return ({"status": "error", "reason": "Term type not provided"}, 400) + nb_words = dict_input.get('nb_words', 1) + + res = parse_tracked_term_to_add(term , term_type, nb_words=nb_words) + if res['status']=='error': + return res + + # get user_id + tags = dict_input.get('tags', []) + mails = dict_input.get('mails', []) + ## TODO: verify mail integrity + + ## TODO: add dashboard key + level = dict_input.get('level', 1) + try: + level = int(level) + if level not in range(0, 1): + level = 1 + except: + level = 1 + + term_uuid = add_tracked_term(term , term_type, user_id, level, tags, mails) + + return ({'term': term, 'uuid': term_uuid}, 200) + + +def parse_tracked_term_to_add(term , term_type, nb_words=1): + + # todo verify regex format + if term_type=='regex': + # TODO: verify regex integrity + pass + elif term_type=='word' or term_type=='set': + # force lowercase + term = term.lower() + word_set = set(term) + set_inter = word_set.intersection(special_characters) + if set_inter: + return ({"status": "error", "reason": "special character not allowed", "message": "Please use a regex or remove all special characters"}, 400) + words = term.split() + # not a word + if term_type=='word' and words: + term_type = 'set' + + # ouput format: term1,term2,term3;2 + if term_type=='set': + try: + nb_words = int(nb_words) + except: + nb_words = 1 + + words_set = set(words) + words_set = sorted(words_set) + term = ",".join(words_set) + term = "{};{}".format(term, nb_words) + + print(term) + print(term_type) + + return ({"status": "success", "term": term, "type": term_type}, 200) + + else: + return ({"status": "error", "reason": "Incorrect type"}, 400) + +def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0): + + term_uuid = str(uuid.uuid4()) + + # create metadata + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'tracked',term) + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'type', term_type) + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'date', datetime.date.today().strftime("%Y%m%d")) + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'user_id', user_id) + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'level', level) + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'dashboard', dashboard) + + # create all term set + r_serv_term.sadd('all:tracked_term:{}'.format(term_type), term) + + # create term - uuid map + r_serv_term.sadd('all:tracked_term_uuid:{}'.format(term), term_uuid) + + # add display level set + if level == 0: # user only + r_serv_term.sadd('user:tracked_term:{}'.format(user_id), term_uuid) + elif level == 1: # global + r_serv_term.sadd('gobal:tracked_term', term_uuid) + + # create term tags list + for tag in tags: + r_serv_term.sadd('tracked_term:tags:{}'.format(term_uuid), tag) + + # create term tags mail notification list + for mail in mails: + r_serv_term.sadd('tracked_term:mail:{}'.format(term_uuid), mail) + + return term_uuid + +def get_term_uuid_list(term): + return list(r_serv_term.smembers('all:tracked_term_uuid:{}'.format(term))) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +def get_global_tracked_term(): + dict_tracked = {} + tracked_set = list(r_serv_term.smembers('global:TrackedSetSet')) + tracked_regex = list(r_serv_term.smembers('global:TrackedRegexSet')) + tracked_terms = list(r_serv_term.smembers('global:TrackedSetTermSet')) + return {'term': tracked_terms, 'set': tracked_terms, 'regex': tracked_regex} + +def get_user_tracked_term(user_id): + dict_tracked = {} + tracked_set = list(r_serv_term.smembers('user:{}:TrackedSetSet'.format(user_id))) + tracked_regex = list(r_serv_term.smembers('user:{}:TrackedRegexSet').format(user_id)) + tracked_terms = list(r_serv_term.smembers('user:{}:TrackedSetTermSet').format(user_id)) + return {'term': tracked_terms, 'set': tracked_terms, 'regex': tracked_regex} diff --git a/doc/README.md b/doc/README.md index a466c681..52768cd0 100644 --- a/doc/README.md +++ b/doc/README.md @@ -583,6 +583,94 @@ curl https://127.0.0.1:7000/api/v1/get/tag/metadata --header "Authorization: iHc + + +## Tracker + + + +### Add term tracker: `api/v1/add/tracker/term` + +#### Description +Add term tracker + +**Method** : `POST` + +#### Parameters +- `term` + - term to add + - *str - word(s)* + - default: `text` +- `nb_words` + - number of words in set + - *int* + - default: `1` +- `type` + - term type + - *str* + - mandatory: `word`, `set`, `regex` +- `tags` + - list of tags + - *list* + - default: `[]` +- `mails` + - list of mails to notify + - *list* + - default: `[]` +- `level` + - tracker visibility + - *int - 0: user only, 1: all users* + - default: `1` + +#### JSON response +- `uuid` + - import uuid + - *uuid4* + +#### Example +``` +curl https://127.0.0.1:7000/api/v1/import/item --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST +``` + +#### input.json Example +```json + { + "type": "text", + "tags": [ + "infoleak:analyst-detection=\"private-key\"" + ], + "text": "text to import" + } +``` + +#### Expected Success Response +**HTTP Status Code** : `200` + +```json + { + "uuid": "0c3d7b34-936e-4f01-9cdf-2070184b6016" + } +``` + +#### Expected Fail Response +**HTTP Status Code** : `400` + +```json + {"status": "error", "reason": "Malformed JSON"} + {"status": "error", "reason": "No text supplied"} + {"status": "error", "reason": "Tags or Galaxy not enabled"} + {"status": "error", "reason": "Size exceeds default"} +``` + + + + + + + + + + ## Import management diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py index 6ea8dd69..f951ef9f 100644 --- a/var/www/modules/restApi/Flask_restApi.py +++ b/var/www/modules/restApi/Flask_restApi.py @@ -307,6 +307,15 @@ def get_all_tags(): res = {'tags': Tag.get_all_tags()} return Response(json.dumps(res, indent=2, sort_keys=True), mimetype='application/json'), 200 +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # +# # # # # # # # # # # # # # TAGS # # # # # # # # # # # # # # # # # +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # +@restApi.route("api/v1/add/tracker/term", methods=['POST']) +#@token_required('analyst') +def add_tracker_term(): + data = request.get_json() + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # IMPORT # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # From bb6d3a6a263eb3d6c55e32f3b9b2c25558669708 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 7 Aug 2019 12:08:24 +0200 Subject: [PATCH 02/12] chg: [Term tracker] add term tracker module (word + set) + API: add new term to track (word + set + regex) --- bin/NotificationHelper.py | 7 -- bin/TermTrackerMod.py | 88 ++++++++++++------ bin/packages/Term.py | 113 +++++++++++++++++++---- bin/packages/config.cfg.sample | 2 +- doc/README.md | 2 +- var/www/modules/restApi/Flask_restApi.py | 20 ++-- 6 files changed, 170 insertions(+), 62 deletions(-) diff --git a/bin/NotificationHelper.py b/bin/NotificationHelper.py index 1bccd314..4007e56f 100755 --- a/bin/NotificationHelper.py +++ b/bin/NotificationHelper.py @@ -20,13 +20,6 @@ configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') publisher.port = 6380 publisher.channel = "Script" -# notifications enabled/disabled -TrackedTermsNotificationEnabled_Name = "TrackedNotifications" - -# associated notification email addresses for a specific term` -# Keys will be e.g. TrackedNotificationEmails -TrackedTermsNotificationEmailsPrefix_Name = "TrackedNotificationEmails_" - def sendEmailNotification(recipient, alert_name, content): if not os.path.exists(configfile): diff --git a/bin/TermTrackerMod.py b/bin/TermTrackerMod.py index 2d0458b5..fe60640e 100755 --- a/bin/TermTrackerMod.py +++ b/bin/TermTrackerMod.py @@ -9,50 +9,84 @@ import os import sys import time +from Helper import Process +from pubsublogger import publisher + +import NotificationHelper + from packages import Paste from packages import Term sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) import Flask_config -r_serv_term = Flask_config.r_serv_term +full_item_url = "/showsavedpaste/?paste=" + +mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}" # loads tracked words list_tracked_words = Term.get_tracked_words_list() set_tracked_words_list = Term.get_set_tracked_words_list() -def new_term_found(term, term_type): - uuid_list = get_term_uuid_list() - email_notification = [] - tags = [] +def new_term_found(term, term_type, item_id): + uuid_list = Term.get_term_uuid_list(term) for term_uuid in uuid_list: - pass + Term.add_tracked_item(term_uuid, item_id) + + tags_to_add = Term.get_term_tags(term_uuid) + for tag in tags_to_add: + msg = '{};{}'.format(tag, item_id) + p.populate_set_out(msg, 'Tags') + + mail_to_notify = Term.get_term_mails(term_uuid) + if mail_to_notify: + mail_body = mail_body_template.format(term, item_id, full_item_url, item_id) + for mail in mail_to_notify: + NotificationHelper.sendEmailNotification(mail, 'Term Tracker', mail_body) if __name__ == "__main__": - item_id = 'submitted/2019/08/02/cc1900ed-6051-473a-ba7a-850a17d0cc02.gz' - #item_id = 'submitted/2019/08/02/0a52d82d-a89d-4004-9535-8a0bc9c1ce49.gz' - paste = Paste.Paste(item_id) - res = Term.parse_tracked_term_to_add('test zorro meroio apple weert', 'word') + publisher.port = 6380 + publisher.channel = "Script" + publisher.info("Script TermTrackerMod started") - ''' - dict_words_freq = Term.get_text_word_frequency(paste.get_p_content()) + #config_section = 'TermTrackerMod' + config_section = 'Curve' + p = Process(config_section) - # check solo words - for word in list_tracked_words: - if word in dict_words_freq: - pass - # tag + get uuids ... + full_item_url = p.config.get("Notifications", "ail_domain") + full_item_url - # check words set - for list_words, nb_words_threshold in set_tracked_words_list: - nb_uniq_word = 0 - for word in list_words: - if word in dict_words_freq: - nb_uniq_word += 1 - if nb_uniq_word > nb_words_threshold: - # tag + get uuid - pass - ''' + while True: + + item_id = p.get_from_set() + item_id = 'submitted/2019/08/02/cc1900ed-6051-473a-ba7a-850a17d0cc02.gz' + #item_id = 'submitted/2019/08/02/0a52d82d-a89d-4004-9535-8a0bc9c1ce49.gz' + + if message is not None: + + paste = Paste.Paste(item_id) + + dict_words_freq = Term.get_text_word_frequency(paste.get_p_content()) + + # check solo words + for word in list_tracked_words: + if word in dict_words_freq: + new_term_found(word, 'word', item_id) + + # check words set + for elem in set_tracked_words_list: + list_words = elem[0] + nb_words_threshold = elem[1] + word_set = elem[2] + nb_uniq_word = 0 + + for word in list_words: + if word in dict_words_freq: + nb_uniq_word += 1 + if nb_uniq_word >= nb_words_threshold: + new_term_found(word_set, 'set', item_id) + + else: + time.sleep(5) diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 0902d56f..312ed7bd 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -2,6 +2,7 @@ # -*-coding:UTF-8 -* import os +import re import sys import uuid import redis @@ -16,6 +17,7 @@ sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) import Flask_config r_serv_term = Flask_config.r_serv_term +email_regex = Flask_config.email_regex special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\') special_characters.add('\\s') @@ -24,6 +26,26 @@ special_characters.add('\\s') tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', gaps=True, discard_empty=True) +def is_valid_mail(email): + result = email_regex.match(email) + if result: + return True + else: + return False + +def verify_mail_list(mail_list): + for mail in mail_list: + if not is_valid_mail(mail): + return ({'status': 'error', 'reason': 'Invalid email', 'value': mail}, 400) + return None + +def is_valid_regex(term_regex): + try: + re.compile(term_regex) + return True + except: + return False + def get_text_word_frequency(item_content, filtering=True): item_content = item_content.lower() words_dict = defaultdict(int) @@ -34,7 +56,6 @@ def get_text_word_frequency(item_content, filtering=True): blob = TextBlob(item_content) for word in blob.tokens: words_dict[word] += 1 - print(words_dict) return words_dict # # TODO: create all tracked words @@ -45,28 +66,40 @@ def get_set_tracked_words_list(): set_list = r_serv_term.smembers('all:tracked_term:set') all_set_list = [] for elem in set_list: - elem = elem.split(';') - num_words = int(elem[1]) - ter_set = elem[0].split(',') - all_set_list.append((ter_set, num_words)) + res = elem.split(';') + num_words = int(res[1]) + ter_set = res[0].split(',') + all_set_list.append((ter_set, num_words, elem)) + return all_set_list -def parse_json_term_to_add(dict_input): +def is_term_tracked_in_global_level(term): + res = r_serv_term.smembers('all:tracked_term_uuid:{}'.format(term)) + if res: + for elem_uuid in res: + if r_serv_term.hget('tracked_term:{}'.format(elem_uuid), 'level')=='1': + return True + return False + +def parse_json_term_to_add(dict_input, user_id): term = dict_input.get('term', None) if not term: return ({"status": "error", "reason": "Term not provided"}, 400) - term_type = dict_input.get('term', None) + term_type = dict_input.get('type', None) if not term_type: return ({"status": "error", "reason": "Term type not provided"}, 400) nb_words = dict_input.get('nb_words', 1) res = parse_tracked_term_to_add(term , term_type, nb_words=nb_words) - if res['status']=='error': + if res[1]!=200: return res + term = res[0]['term'] + term_type = res[0]['type'] - # get user_id tags = dict_input.get('tags', []) mails = dict_input.get('mails', []) - ## TODO: verify mail integrity + res = verify_mail_list(mails) + if res: + return res ## TODO: add dashboard key level = dict_input.get('level', 1) @@ -77,17 +110,20 @@ def parse_json_term_to_add(dict_input): except: level = 1 + # check if term already tracked in global + if level==1: + if is_term_tracked_in_global_level(term): + return ({"status": "error", "reason": "Term already tracked"}, 409) + term_uuid = add_tracked_term(term , term_type, user_id, level, tags, mails) - return ({'term': term, 'uuid': term_uuid}, 200) + return ({'term': term, 'type': term_type, 'uuid': term_uuid}, 200) def parse_tracked_term_to_add(term , term_type, nb_words=1): - - # todo verify regex format if term_type=='regex': - # TODO: verify regex integrity - pass + if not is_valid_regex(term): + return ({"status": "error", "reason": "Invalid regex"}, 400) elif term_type=='word' or term_type=='set': # force lowercase term = term.lower() @@ -97,7 +133,7 @@ def parse_tracked_term_to_add(term , term_type, nb_words=1): return ({"status": "error", "reason": "special character not allowed", "message": "Please use a regex or remove all special characters"}, 400) words = term.split() # not a word - if term_type=='word' and words: + if term_type=='word' and len(words)>1: term_type = 'set' # ouput format: term1,term2,term3;2 @@ -106,19 +142,21 @@ def parse_tracked_term_to_add(term , term_type, nb_words=1): nb_words = int(nb_words) except: nb_words = 1 + if nb_words==0: + nb_words = 1 words_set = set(words) words_set = sorted(words_set) + term = ",".join(words_set) term = "{};{}".format(term, nb_words) - print(term) - print(term_type) - - return ({"status": "success", "term": term, "type": term_type}, 200) + if nb_words > len(words_set): + nb_words = len(words_set) else: return ({"status": "error", "reason": "Incorrect type"}, 400) + return ({"status": "success", "term": term, "type": term_type}, 200) def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0): @@ -154,9 +192,44 @@ def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0) return term_uuid +def delete_term(term_uuid): + term = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'tracked') + term_type = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'type') + term_level = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'level') + r_serv_term.srem('all:tracked_term_uuid:{}'.format(term), term_uuid) + r_serv_term.srem('all:tracked_term:{}'.format(term_type), term_uuid) + + + if level == 0: # user only + user_id = term_type = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'user_id') + r_serv_term.srem('user:tracked_term:{}'.format(user_id), term_uuid) + elif level == 1: # global + r_serv_term.srem('gobal:tracked_term', term_uuid) + + # delete metatadata + r_serv_term.delete('tracked_term:{}'.format(term_uuid)) + + # remove tags + r_serv_term.delete('tracked_term:tags:{}'.format(term_uuid)) + + # remove mails + r_serv_term.delete('tracked_term:mail:{}'.format(term_uuid)) + + # remove item set + r_serv_term.delete('tracked_term:item:{}'.format(term_uuid)) + def get_term_uuid_list(term): return list(r_serv_term.smembers('all:tracked_term_uuid:{}'.format(term))) +def get_term_tags(term_uuid): + return list(r_serv_term.smembers('tracked_term:tags:{}'.format(term_uuid))) + +def get_term_mails(term_uuid): + return list(r_serv_term.smembers('tracked_term:mail:{}'.format(term_uuid))) + +def add_tracked_item(term_uuid, item_id): + r_serv_term.sadd('tracked_term:item:{}'.format(term_uuid), item_id) + diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index ea0ea55c..09e05ddf 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -23,7 +23,7 @@ sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon ##### Notifications ###### [Notifications] -ail_domain = http://localhost:7000 +ail_domain = https://localhost:7000 sender = sender@example.com sender_host = smtp.example.com sender_port = 1337 diff --git a/doc/README.md b/doc/README.md index 52768cd0..31f13cc3 100644 --- a/doc/README.md +++ b/doc/README.md @@ -600,7 +600,7 @@ Add term tracker - `term` - term to add - *str - word(s)* - - default: `text` + - mandatory - `nb_words` - number of words in set - *int* diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py index f951ef9f..864e7ed3 100644 --- a/var/www/modules/restApi/Flask_restApi.py +++ b/var/www/modules/restApi/Flask_restApi.py @@ -17,6 +17,7 @@ import Import_helper import Item import Paste import Tag +import Term from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response from flask_login import login_required @@ -55,8 +56,11 @@ def verify_token(token): else: return False +def get_user_from_token(token): + return r_serv_db.hget('user:tokens', token) + def verify_user_role(role, token): - user_id = r_serv_db.hget('user:tokens', token) + user_id = get_user_from_token(token) if user_id: if is_in_role(user_id, role): return True @@ -308,13 +312,17 @@ def get_all_tags(): return Response(json.dumps(res, indent=2, sort_keys=True), mimetype='application/json'), 200 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # -# # # # # # # # # # # # # # TAGS # # # # # # # # # # # # # # # # # +# # # # # # # # # # # # # # TRACKER # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # -@restApi.route("api/v1/add/tracker/term", methods=['POST']) -#@token_required('analyst') +@restApi.route("api/v1/add/tracker/term", methods=['GET']) +@token_required('analyst') def add_tracker_term(): - data = request.get_json() - + #data = request.get_json() + data = {"term": "pi", 'type' : "word"} + user_token = get_auth_from_header() + user_id = get_user_from_token(user_token) + res = Term.parse_json_term_to_add(data, user_id) + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # IMPORT # # # # # # # # # # # # # # # # # # From d9bdfecef395e4b88f2c5df3bc59b8ac55285790 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 7 Aug 2019 12:19:42 +0200 Subject: [PATCH 03/12] fix: [Term Tracker module] chg module flow --- bin/TermTrackerMod.py | 2 +- bin/packages/modules.cfg | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/TermTrackerMod.py b/bin/TermTrackerMod.py index fe60640e..08eb7247 100755 --- a/bin/TermTrackerMod.py +++ b/bin/TermTrackerMod.py @@ -53,7 +53,7 @@ if __name__ == "__main__": publisher.info("Script TermTrackerMod started") #config_section = 'TermTrackerMod' - config_section = 'Curve' + config_section = 'TermTrackerMod' p = Process(config_section) full_item_url = p.config.get("Notifications", "ail_domain") + full_item_url diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index f1fe5e3d..4526d978 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -34,6 +34,10 @@ publish = Redis_Words subscribe = Redis_Words publish = Redis_CurveManageTopSets,Redis_Tags +[TermTrackerMod] +subscribe = Redis_Global +publish = Redis_Tags + [RegexForTermsFrequency] subscribe = Redis_Global publish = Redis_Tags From 1008c7c4fed46286dc3a780e9712674c83472fff Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 9 Aug 2019 14:20:13 +0200 Subject: [PATCH 04/12] chg: [Term Tracker] refractor term tracker word/set/regex modules + remove old modules --- OVERVIEW.md | 37 ++++--- bin/Curve.py | 184 --------------------------------- bin/CurveManageTopSets.py | 166 ----------------------------- bin/DbCleaner.py | 59 +++++++++++ bin/Dir.py | 48 --------- bin/LAUNCH.sh | 14 +-- bin/Lines.py | 85 --------------- bin/ModuleStats.py | 1 - bin/RegexForTermsFrequency.py | 157 ---------------------------- bin/RegexTracker.py | 96 +++++++++++++++++ bin/SetForTermsFrequency.py | 151 --------------------------- bin/TermTrackerMod.py | 56 +++++++--- bin/Tokenize.py | 71 ------------- bin/packages/Date.py | 12 ++- bin/packages/Item.py | 3 + bin/packages/Term.py | 67 ++++++++++-- bin/packages/config.cfg.sample | 5 +- bin/packages/lib_words.py | 52 ---------- bin/packages/modules.cfg | 21 +--- 19 files changed, 304 insertions(+), 981 deletions(-) delete mode 100755 bin/Curve.py delete mode 100755 bin/CurveManageTopSets.py create mode 100755 bin/DbCleaner.py delete mode 100755 bin/Dir.py delete mode 100755 bin/Lines.py delete mode 100755 bin/RegexForTermsFrequency.py create mode 100755 bin/RegexTracker.py delete mode 100755 bin/SetForTermsFrequency.py delete mode 100755 bin/Tokenize.py diff --git a/OVERVIEW.md b/OVERVIEW.md index 77339321..a3425155 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -138,12 +138,12 @@ Redis and ARDB overview | Set - Key | Value | | ------ | ------ | -| all:tracked_term_uuid:**tracked term** | **uuid - tracked term uuid** | +| all:tracked_term_uuid:**term type**:**tracked term** | **uuid - tracked term uuid** | ##### All Term Tracked items: | Set - Key | Value | | ------ | ------ | -| tracked_term:item:**uuid** | **item_id** | +| tracked_term:item:**uuid**:**date** | **item_id** | ##### All Term Tracked tags: | Set - Key | Value | @@ -155,6 +155,29 @@ Redis and ARDB overview | ------ | ------ | | tracked_term:mail:**uuid** | **mail** | +##### Refresh Tracked term: +| Key | Value | +| ------ | ------ | +| tracked_term:refresh:word | **last refreshed epoch** | +| tracked_term:refresh:set | - | +| tracked_term:refresh:regex | - | + +##### Zset Stat Tracked term: +| Key | Field | Value | +| ------ | ------ | ------ | +| tracked_term:stat:**uuid** | **date** | **nb_seen** | + +##### Stat token: +| Key | Field | Value | +| ------ | ------ | ------ | +| stat_token_total_by_day:**date** | **word** | **nb_seen** | +| | | | +| stat_token_per_item_by_day:**date** | **word** | **nb_seen** | + +| Set - Key | Value | +| ------ | ------ | +| stat_token_history | **date** | + ## DB2 - TermFreq: ##### Set: @@ -167,16 +190,6 @@ Redis and ARDB overview | TrackedRegexSet | **tracked_regex** | | | | | | | -| global:TrackedSetTermSet | **tracked_term** | -| global:TrackedSetSet | **tracked_set** | -| global:TrackedRegexSet | **tracked_regex** | -| | | -| | | -| user:**user_id**:TrackedSetTermSet | **tracked_term** | -| user:**user_id**:TrackedSetSet | **tracked_set** | -| user:**user_id**:TrackedRegexSet | **tracked_regex** | -| | | -| | | | tracked_**tracked_term** | **item_path** | | set_**tracked_set** | **item_path** | | regex_**tracked_regex** | **item_path** | diff --git a/bin/Curve.py b/bin/Curve.py deleted file mode 100755 index c7083c54..00000000 --- a/bin/Curve.py +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module. - -This modules update a .csv file used to draw curves representing selected -words and their occurency per day. - -..note:: The channel will have the name of the file created. - -..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put -the same Subscriber name in both of them. - - -This Module is also used for term frequency. - -/!\ Top set management is done in the module Curve_manage_top_set - - -Requirements ------------- - -*Need running Redis instances. (Redis) -*Categories files of words in /files/ need to be created -*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. - -""" -import redis -import time -from pubsublogger import publisher -from packages import lib_words -import os -import datetime -import calendar - -from Helper import Process - -# Email notifications -from NotificationHelper import * - -# Config Variables -BlackListTermsSet_Name = "BlackListSetTermSet" -TrackedTermsSet_Name = "TrackedSetTermSet" -top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set -oneDay = 60*60*24 -top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] -top_termFreq_setName_week = ["TopTermFreq_set_week", 7] -top_termFreq_setName_month = ["TopTermFreq_set_month", 31] -top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] - -TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_" - -# create direct link in mail -full_paste_url = "/showsavedpaste/?paste=" - -def check_if_tracked_term(term, path): - if term in server_term.smembers(TrackedTermsSet_Name): - #add_paste to tracked_word_set - set_name = "tracked_" + term - server_term.sadd(set_name, path) - print(term, 'addded', set_name, '->', path) - p.populate_set_out("New Term added", 'CurveManageTopSets') - - # Send a notification only when the member is in the set - if term in server_term.smembers(TrackedTermsNotificationEnabled_Name): - - # create mail body - mail_body = ("AIL Framework,\n" - "New occurrence for term: " + term + "\n" - ''+full_paste_url + path) - - # Send to every associated email adress - for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + term): - sendEmailNotification(email, 'Term', mail_body) - - # tag paste - for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + term): - msg = '{};{}'.format(tag, path) - p.populate_set_out(msg, 'Tags') - - -def getValueOverRange(word, startDate, num_day): - to_return = 0 - for timestamp in range(startDate, startDate - num_day*oneDay, -oneDay): - value = server_term.hget(timestamp, word) - to_return += int(value) if value is not None else 0 - return to_return - - - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Curve' - p = Process(config_section) - - # REDIS # - r_serv1 = redis.StrictRedis( - host=p.config.get("ARDB_Curve", "host"), - port=p.config.get("ARDB_Curve", "port"), - db=p.config.get("ARDB_Curve", "db"), - decode_responses=True) - - server_term = redis.StrictRedis( - host=p.config.get("ARDB_TermFreq", "host"), - port=p.config.get("ARDB_TermFreq", "port"), - db=p.config.get("ARDB_TermFreq", "db"), - decode_responses=True) - - # FUNCTIONS # - publisher.info("Script Curve started") - - # create direct link in mail - full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url - - # FILE CURVE SECTION # - csv_path = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "wordtrending_csv")) - wordfile_path = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "wordsfile")) - - message = p.get_from_set() - prec_filename = None - generate_new_graph = False - - # Term Frequency - top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] - top_termFreq_setName_week = ["TopTermFreq_set_week", 7] - top_termFreq_setName_month = ["TopTermFreq_set_month", 31] - - while True: - - if message is not None: - generate_new_graph = True - - filename, word, score = message.split() - temp = filename.split('/') - date = temp[-4] + temp[-3] + temp[-2] - timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) - curr_set = top_termFreq_setName_day[0] + str(timestamp) - - - low_word = word.lower() - #Old curve with words in file - r_serv1.hincrby(low_word, date, int(score)) - - # Update redis - #consider the num of occurence of this term - curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score))) - #1 term per paste - curr_word_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), low_word, int(1))) - - # Add in set only if term is not in the blacklist - if low_word not in server_term.smembers(BlackListTermsSet_Name): - #consider the num of occurence of this term - server_term.zincrby(curr_set, low_word, float(score)) - #1 term per paste - server_term.zincrby("per_paste_" + curr_set, low_word, float(1)) - - #Add more info for tracked terms - check_if_tracked_term(low_word, filename) - - #send to RegexForTermsFrequency - to_send = "{} {} {}".format(filename, timestamp, word) - p.populate_set_out(to_send, 'RegexForTermsFrequency') - - else: - - if generate_new_graph: - generate_new_graph = False - print('Building graph') - today = datetime.date.today() - year = today.year - month = today.month - - lib_words.create_curve_with_word_file(r_serv1, csv_path, - wordfile_path, year, - month) - - publisher.debug("Script Curve is Idling") - print("sleeping") - time.sleep(10) - message = p.get_from_set() diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py deleted file mode 100755 index 4eaf9c3f..00000000 --- a/bin/CurveManageTopSets.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" - -This module manage top sets for terms frequency. -Every 'refresh_rate' update the weekly and monthly set - -""" - -import redis -import time -import datetime -import copy -from pubsublogger import publisher -from packages import lib_words -import datetime -import calendar -import os -import configparser - -# Config Variables -Refresh_rate = 60*5 #sec -BlackListTermsSet_Name = "BlackListSetTermSet" -TrackedTermsSet_Name = "TrackedSetTermSet" -top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set -oneDay = 60*60*24 -num_day_month = 31 -num_day_week = 7 - -top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] -top_termFreq_setName_week = ["TopTermFreq_set_week", 7] -top_termFreq_setName_month = ["TopTermFreq_set_month", 31] -top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] - - -def manage_top_set(): - startDate = datetime.datetime.now() - startDate = startDate.replace(hour=0, minute=0, second=0, microsecond=0) - startDate = calendar.timegm(startDate.timetuple()) - blacklist_size = int(server_term.scard(BlackListTermsSet_Name)) - - dico = {} - dico_per_paste = {} - - # Retreive top data (max_card + blacklist_size) from days sets - for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay): - curr_set = top_termFreq_setName_day[0] + str(timestamp) - array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size) - array_top_day_per_paste = server_term.zrevrangebyscore("per_paste_" + curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size) - - for word, value in array_top_day: - if word not in server_term.smembers(BlackListTermsSet_Name): - if word in dico.keys(): - dico[word] += value - else: - dico[word] = value - - for word, value in array_top_day_per_paste: - if word not in server_term.smembers(BlackListTermsSet_Name): - if word in dico_per_paste.keys(): - dico_per_paste[word] += value - else: - dico_per_paste[word] = value - - if timestamp == startDate - num_day_week*oneDay: - dico_week = copy.deepcopy(dico) - dico_week_per_paste = copy.deepcopy(dico_per_paste) - - # convert dico into sorted array - array_month = [] - for w, v in dico.items(): - array_month.append((w, v)) - array_month.sort(key=lambda tup: -tup[1]) - array_month = array_month[0:20] - - array_week = [] - for w, v in dico_week.items(): - array_week.append((w, v)) - array_week.sort(key=lambda tup: -tup[1]) - array_week = array_week[0:20] - - # convert dico_per_paste into sorted array - array_month_per_paste = [] - for w, v in dico_per_paste.items(): - array_month_per_paste.append((w, v)) - array_month_per_paste.sort(key=lambda tup: -tup[1]) - array_month_per_paste = array_month_per_paste[0:20] - - array_week_per_paste = [] - for w, v in dico_week_per_paste.items(): - array_week_per_paste.append((w, v)) - array_week_per_paste.sort(key=lambda tup: -tup[1]) - array_week_per_paste = array_week_per_paste[0:20] - - - # suppress every terms in top sets - for curr_set, curr_num_day in top_termFreq_set_array[1:3]: - for w in server_term.zrange(curr_set, 0, -1): - server_term.zrem(curr_set, w) - for w in server_term.zrange("per_paste_" + curr_set, 0, -1): - server_term.zrem("per_paste_" + curr_set, w) - - # Add top term from sorted array in their respective sorted sets - for elem in array_week: - server_term.zadd(top_termFreq_setName_week[0], float(elem[1]), elem[0]) - for elem in array_week_per_paste: - server_term.zadd("per_paste_" + top_termFreq_setName_week[0], float(elem[1]), elem[0]) - - for elem in array_month: - server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0]) - for elem in array_month_per_paste: - server_term.zadd("per_paste_" + top_termFreq_setName_month[0], float(elem[1]), elem[0]) - - timestamp = int(time.mktime(datetime.datetime.now().timetuple())) - value = str(timestamp) + ", " + "-" - r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) - print("refreshed module") - - - -if __name__ == '__main__': - # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) - # Port of the redis instance used by pubsublogger - publisher.port = 6380 - # Script is the default channel used for the modules. - publisher.channel = 'Script' - - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) - - - # For Module Manager - r_temp = redis.StrictRedis( - host=cfg.get('RedisPubSub', 'host'), - port=cfg.getint('RedisPubSub', 'port'), - db=cfg.getint('RedisPubSub', 'db'), - decode_responses=True) - - timestamp = int(time.mktime(datetime.datetime.now().timetuple())) - value = str(timestamp) + ", " + "-" - r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) - r_temp.sadd("MODULE_TYPE_"+ "CurveManageTopSets" , str(os.getpid())) - - server_term = redis.StrictRedis( - host=cfg.get("ARDB_TermFreq", "host"), - port=cfg.getint("ARDB_TermFreq", "port"), - db=cfg.getint("ARDB_TermFreq", "db"), - decode_responses=True) - - publisher.info("Script Curve_manage_top_set started") - - # Sent to the logging a description of the module - publisher.info("Manage the top sets with the data created by the module curve.") - - manage_top_set() - - while True: - # Get one message from the input queue (module only work if linked with a queue) - time.sleep(Refresh_rate) # sleep a long time then manage the set - manage_top_set() diff --git a/bin/DbCleaner.py b/bin/DbCleaner.py new file mode 100755 index 00000000..ed2bb752 --- /dev/null +++ b/bin/DbCleaner.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +The TermTracker Module +=================== + +""" +import os +import sys +import time +import datetime + +from pubsublogger import publisher + +import NotificationHelper + +from packages import Date +from packages import Item +from packages import Term + +def clean_term_db_stat_token(): + all_stat_date = Term.get_all_token_stat_history() + + list_date_to_keep = Date.get_date_range(31) + for date in all_stat_date: + if date not in list_date_to_keep: + # remove history + Term.delete_token_statistics_by_date(date) + + print('Term Stats Cleaned') + + +if __name__ == "__main__": + + publisher.port = 6380 + publisher.channel = "Script" + publisher.info("DbCleaner started") + + config_section = 'TermTrackerMod' + + # low priority + time.sleep(180) + + daily_cleaner = True + current_date = datetime.datetime.now().strftime("%Y%m%d") + + while True: + + if daily_cleaner: + clean_term_db_stat_token() + daily_cleaner = False + else: + sys.exit(0) + time.sleep(600) + + new_date = datetime.datetime.now().strftime("%Y%m%d") + if new_date != current_date: + current_date = new_date + daily_cleaner = True diff --git a/bin/Dir.py b/bin/Dir.py deleted file mode 100755 index d76a7ad5..00000000 --- a/bin/Dir.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import argparse -import redis -from pubsublogger import publisher -from packages.lib_words import create_dirfile -import configparser - - -def main(): - """Main Function""" - - # CONFIG # - cfg = configparser.ConfigParser() - cfg.read('./packages/config.cfg') - - parser = argparse.ArgumentParser( - description='''This script is a part of the Analysis Information Leak - framework. It create a redis list called "listfile" which contain - the absolute filename of all the files from the directory given in - the argument "directory".''', - epilog='Example: ./Dir.py /home/2013/03/') - - parser.add_argument('directory', type=str, - help='The directory to run inside', action='store') - - parser.add_argument('-db', type=int, default=0, - help='The name of the Redis DB (default 0)', - choices=[0, 1, 2, 3, 4], action='store') - - parser.add_argument('-ow', help='trigger the overwritting mode', - action='store_true') - - args = parser.parse_args() - - r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), - port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db"), - decode_responses=True) - - publisher.port = 6380 - publisher.channel = "Script" - - create_dirfile(r_serv, args.directory, args.ow) - -if __name__ == "__main__": - main() diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 98645165..4d6619c8 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -153,14 +153,10 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Duplicates" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Duplicates.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Lines" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Lines.py; read x" - sleep 0.1 screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DomClassifier.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "Categ" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Categ.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Tokenize" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Tokenize.py; read x" - sleep 0.1 screen -S "Script_AIL" -X screen -t "CreditCards" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./CreditCards.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "BankAccount" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./BankAccount.py; read x" @@ -175,13 +171,9 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Credential" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Credential.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Curve" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Curve.py; read x" + screen -S "Script_AIL" -X screen -t "TermTrackerMod" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./TermTrackerMod.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "CurveManageTopSets" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./CurveManageTopSets.py; read x" - sleep 0.1 - screen -S "Script_AIL" -X screen -t "RegexForTermsFrequency" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./RegexForTermsFrequency.py; read x" - sleep 0.1 - screen -S "Script_AIL" -X screen -t "SetForTermsFrequency" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SetForTermsFrequency.py; read x" + screen -S "Script_AIL" -X screen -t "RegexTracker" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./RegexTracker.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "Indexer" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Indexer.py; read x" sleep 0.1 @@ -213,6 +205,8 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x" sleep 0.1 + screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DbCleaner.py; read x" + sleep 0.1 screen -S "Script_AIL" -X screen -t "UpdateBackground" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./update-background.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./submit_paste.py; read x" diff --git a/bin/Lines.py b/bin/Lines.py deleted file mode 100755 index e4187dc7..00000000 --- a/bin/Lines.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -""" -The ZMQ_PubSub_Lines Module -============================ - -This module is consuming the Redis-list created by the ZMQ_PubSub_Line_Q -Module. - -It perform a sorting on the line's length and publish/forward them to -differents channels: - -*Channel 1 if max length(line) < max -*Channel 2 if max length(line) > max - -The collected informations about the processed pastes -(number of lines and maximum length line) are stored in Redis. - -..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put -the same Subscriber name in both of them. - -Requirements ------------- - -*Need running Redis instances. (LevelDB & Redis) -*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly. - -""" -import argparse -import time -from packages import Paste -from pubsublogger import publisher - -from Helper import Process - -if __name__ == '__main__': - publisher.port = 6380 - publisher.channel = 'Script' - - config_section = 'Lines' - p = Process(config_section) - - # SCRIPT PARSER # - parser = argparse.ArgumentParser( - description='This script is a part of the Analysis Information \ - Leak framework.') - - parser.add_argument( - '-max', type=int, default=500, - help='The limit between "short lines" and "long lines"', - action='store') - - args = parser.parse_args() - - # FUNCTIONS # - tmp_string = "Lines script Subscribed to channel {} and Start to publish \ - on channel Longlines, Shortlines" - publisher.info(tmp_string) - - while True: - try: - message = p.get_from_set() - print(message) - if message is not None: - PST = Paste.Paste(message) - else: - publisher.debug("Tokeniser is idling 10s") - time.sleep(10) - continue - - # FIXME do it in the paste class - lines_infos = PST.get_lines_info() - PST.save_attribute_redis("p_nb_lines", lines_infos[0]) - PST.save_attribute_redis("p_max_length_line", lines_infos[1]) - - # FIXME Not used. - PST.store.sadd("Pastes_Objects", PST.p_rel_path) - print(PST.p_rel_path) - if lines_infos[1] < args.max: - p.populate_set_out( PST.p_rel_path , 'LinesShort') - else: - p.populate_set_out( PST.p_rel_path , 'LinesLong') - except IOError: - print("CRC Checksum Error on : ", PST.p_rel_path) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 6743cdca..cfdb82f7 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -9,7 +9,6 @@ import time import datetime import redis import os -from packages import lib_words from packages.Date import Date from pubsublogger import publisher from Helper import Process diff --git a/bin/RegexForTermsFrequency.py b/bin/RegexForTermsFrequency.py deleted file mode 100755 index cd8102c1..00000000 --- a/bin/RegexForTermsFrequency.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -This Module is used for term frequency. -It processes every paste coming from the global module and test the regexs -supplied in the term webpage. - -""" -import redis -import time -from pubsublogger import publisher -from packages import Paste -import calendar -import re -import signal -import time -from Helper import Process -# Email notifications -from NotificationHelper import * - - -class TimeoutException(Exception): - pass - - -def timeout_handler(signum, frame): - raise TimeoutException - -signal.signal(signal.SIGALRM, timeout_handler) - -# Config Variables -DICO_REFRESH_TIME = 60 # s - -BlackListTermsSet_Name = "BlackListSetTermSet" -TrackedTermsSet_Name = "TrackedSetTermSet" -TrackedRegexSet_Name = "TrackedRegexSet" - -top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set -oneDay = 60*60*24 -top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] -top_termFreq_setName_week = ["TopTermFreq_set_week", 7] -top_termFreq_setName_month = ["TopTermFreq_set_month", 31] -top_termFreq_set_array = [top_termFreq_setName_day, top_termFreq_setName_week, top_termFreq_setName_month] - -TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_" - -# create direct link in mail -full_paste_url = "/showsavedpaste/?paste=" - - -def refresh_dicos(): - dico_regex = {} - dico_regexname_to_redis = {} - for regex_str in server_term.smembers(TrackedRegexSet_Name): - dico_regex[regex_str[1:-1]] = re.compile(regex_str[1:-1]) - dico_regexname_to_redis[regex_str[1:-1]] = regex_str - - return dico_regex, dico_regexname_to_redis - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'RegexForTermsFrequency' - p = Process(config_section) - max_execution_time = p.config.getint(config_section, "max_execution_time") - - # REDIS # - server_term = redis.StrictRedis( - host=p.config.get("ARDB_TermFreq", "host"), - port=p.config.get("ARDB_TermFreq", "port"), - db=p.config.get("ARDB_TermFreq", "db"), - decode_responses=True) - - # FUNCTIONS # - publisher.info("RegexForTermsFrequency script started") - - # create direct link in mail - full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url - - # compile the regex - dico_refresh_cooldown = time.time() - dico_regex, dico_regexname_to_redis = refresh_dicos() - - message = p.get_from_set() - - # Regex Frequency - while True: - - if message is not None: - if time.time() - dico_refresh_cooldown > DICO_REFRESH_TIME: - dico_refresh_cooldown = time.time() - dico_regex, dico_regexname_to_redis = refresh_dicos() - print('dico got refreshed') - - filename = message - temp = filename.split('/') - timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) - - curr_set = top_termFreq_setName_day[0] + str(timestamp) - paste = Paste.Paste(filename) - content = paste.get_p_content() - - # iterate the word with the regex - for regex_str, compiled_regex in dico_regex.items(): - - signal.alarm(max_execution_time) - try: - matched = compiled_regex.search(content) - except TimeoutException: - print ("{0} processing timeout".format(paste.p_rel_path)) - continue - else: - signal.alarm(0) - - if matched is not None: # there is a match - print('regex matched {}'.format(regex_str)) - matched = matched.group(0) - regex_str_complete = "/" + regex_str + "/" - # Add in Regex track set only if term is not in the blacklist - if regex_str_complete not in server_term.smembers(BlackListTermsSet_Name): - # Send a notification only when the member is in the set - if regex_str_complete in server_term.smembers(TrackedTermsNotificationEnabled_Name): - - # create mail body - mail_body = ("AIL Framework,\n" - "New occurrence for regex: " + regex_str + "\n" - ''+full_paste_url + filename) - - # Send to every associated email adress - for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + regex_str_complete): - sendEmailNotification(email, 'Term', mail_body) - - # tag paste - for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + regex_str_complete): - msg = '{};{}'.format(tag, filename) - p.populate_set_out(msg, 'Tags') - - set_name = 'regex_' + dico_regexname_to_redis[regex_str] - new_to_the_set = server_term.sadd(set_name, filename) - new_to_the_set = True if new_to_the_set == 1 else False - - # consider the num of occurence of this term - regex_value = int(server_term.hincrby(timestamp, dico_regexname_to_redis[regex_str], int(1))) - # 1 term per paste - if new_to_the_set: - regex_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_regexname_to_redis[regex_str], int(1))) - server_term.zincrby("per_paste_" + curr_set, dico_regexname_to_redis[regex_str], float(1)) - server_term.zincrby(curr_set, dico_regexname_to_redis[regex_str], float(1)) - else: - pass - - else: - publisher.debug("Script RegexForTermsFrequency is Idling") - print("sleeping") - time.sleep(5) - message = p.get_from_set() diff --git a/bin/RegexTracker.py b/bin/RegexTracker.py new file mode 100755 index 00000000..260db3c9 --- /dev/null +++ b/bin/RegexTracker.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +This Module is used for regex tracking. +It processes every paste coming from the global module and test the regexs +supplied in the term webpage. + +""" +import os +import re +import sys +import time +import signal + +from Helper import Process +from pubsublogger import publisher + +import NotificationHelper + +from packages import Item +from packages import Term + +full_item_url = "/showsavedpaste/?paste=" +mail_body_template = "AIL Framework,\nNew occurrence for term tracked regex: {}\nitem id: {}\nurl: {}{}" + +dict_regex_tracked = Term.get_regex_tracked_words_dict() +last_refresh = time.time() + +class TimeoutException(Exception): + pass +def timeout_handler(signum, frame): + raise TimeoutException +signal.signal(signal.SIGALRM, timeout_handler) + +def new_term_found(term, term_type, item_id, item_date): + uuid_list = Term.get_term_uuid_list(term, 'regex') + print('new tracked term found: {} in {}'.format(term, item_id)) + + for term_uuid in uuid_list: + Term.add_tracked_item(term_uuid, item_id, item_date) + + tags_to_add = Term.get_term_tags(term_uuid) + for tag in tags_to_add: + msg = '{};{}'.format(tag, item_id) + p.populate_set_out(msg, 'Tags') + + mail_to_notify = Term.get_term_mails(term_uuid) + if mail_to_notify: + mail_body = mail_body_template.format(term, item_id, full_item_url, item_id) + for mail in mail_to_notify: + NotificationHelper.sendEmailNotification(mail, 'Term Tracker', mail_body) + +if __name__ == "__main__": + publisher.port = 6380 + publisher.channel = "Script" + publisher.info("Script RegexTracker started") + + config_section = 'RegexTracker' + p = Process(config_section) + max_execution_time = p.config.getint(config_section, "max_execution_time") + + ull_item_url = p.config.get("Notifications", "ail_domain") + full_item_url + + # Regex Frequency + while True: + + item_id = p.get_from_set() + + if item_id is not None: + + item_date = Item.get_item_date(item_id) + item_content = Item.get_item_content(item_id) + + for regex in dict_regex_tracked: + + signal.alarm(max_execution_time) + try: + matched = dict_regex_tracked[regex].search(item_content) + except TimeoutException: + print ("{0} processing timeout".format(paste.p_rel_path)) + continue + else: + signal.alarm(0) + + if matched: + new_term_found(regex, 'regex', item_id, item_date) + + + else: + time.sleep(5) + + # refresh Tracked term + if last_refresh < Term.get_tracked_term_last_updated_by_type('regex'): + dict_regex_tracked = Term.get_regex_tracked_words_dict() + last_refresh = time.time() + print('Tracked set refreshed') diff --git a/bin/SetForTermsFrequency.py b/bin/SetForTermsFrequency.py deleted file mode 100755 index 19ed7210..00000000 --- a/bin/SetForTermsFrequency.py +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -This Module is used for term frequency. -It processes every paste coming from the global module and test the sets -supplied in the term webpage. - -""" -import redis -import time -from pubsublogger import publisher -from packages import lib_words -from packages import Paste -import os -import datetime -import calendar -import re -import ast -from Helper import Process - -# Email notifications -from NotificationHelper import * - -# Config Variables -BlackListTermsSet_Name = "BlackListSetTermSet" -TrackedTermsSet_Name = "TrackedSetTermSet" -TrackedRegexSet_Name = "TrackedRegexSet" -TrackedSetSet_Name = "TrackedSetSet" - -top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set -oneDay = 60*60*24 -top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] -top_termFreq_setName_week = ["TopTermFreq_set_week", 7] -top_termFreq_setName_month = ["TopTermFreq_set_month", 31] -top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] - -TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_" - -# create direct link in mail -full_paste_url = "/showsavedpaste/?paste=" - -def add_quote_inside_tab(tab): - quoted_tab = "[" - for elem in tab[1:-1].split(','): - elem = elem.lstrip().strip() - quoted_tab += "\'{}\', ".format(elem) - quoted_tab = quoted_tab[:-2] #remove trailing , - quoted_tab += "]" - return str(quoted_tab) - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'SetForTermsFrequency' - p = Process(config_section) - - # REDIS # - server_term = redis.StrictRedis( - host=p.config.get("ARDB_TermFreq", "host"), - port=p.config.get("ARDB_TermFreq", "port"), - db=p.config.get("ARDB_TermFreq", "db"), - decode_responses=True) - - # FUNCTIONS # - publisher.info("RegexForTermsFrequency script started") - - # create direct link in mail - full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url - - #get the dico and matching percent - dico_percent = {} - dico_set_tab = {} - dico_setname_to_redis = {} - for set_str in server_term.smembers(TrackedSetSet_Name): - tab_set = set_str[1:-1] - tab_set = add_quote_inside_tab(tab_set) - perc_finder = re.compile("\[[0-9]{1,3}\]").search(tab_set) - if perc_finder is not None: - match_percent = perc_finder.group(0)[1:-1] - dico_percent[tab_set] = float(match_percent) - dico_set_tab[tab_set] = ast.literal_eval(tab_set) - dico_setname_to_redis[tab_set] = set_str - else: - continue - - message = p.get_from_set() - - while True: - - if message is not None: - filename = message - temp = filename.split('/') - timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) - content = Paste.Paste(filename).get_p_content() - - curr_set = top_termFreq_setName_day[0] + str(timestamp) - - #iterate over the words of the file - match_dico = {} - for word in content.split(): - for cur_set, array_set in dico_set_tab.items(): - for w_set in array_set[:-1]: #avoid the percent matching - if word == w_set: - try: - match_dico[str(array_set)] += 1 - except KeyError: - match_dico[str(array_set)] = 1 - - #compute matching % - for the_set, matchingNum in match_dico.items(): - eff_percent = float(matchingNum) / float((len(ast.literal_eval(the_set))-1)) * 100 #-1 bc if the percent matching - if eff_percent >= dico_percent[the_set]: - # Send a notification only when the member is in the set - if dico_setname_to_redis[str(the_set)] in server_term.smembers(TrackedTermsNotificationEnabled_Name): - - # create mail body - mail_body = ("AIL Framework,\n" - "New occurrence for term: " + dico_setname_to_redis[str(the_set)] + "\n" - ''+full_paste_url + filename) - - # Send to every associated email adress - for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + dico_setname_to_redis[str(the_set)]): - sendEmailNotification(email, 'Term', mail_body) - - # tag paste - for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + dico_setname_to_redis[str(the_set)]): - msg = '{};{}'.format(tag, filename) - p.populate_set_out(msg, 'Tags') - - print(the_set, "matched in", filename) - set_name = 'set_' + dico_setname_to_redis[the_set] - new_to_the_set = server_term.sadd(set_name, filename) - new_to_the_set = True if new_to_the_set == 1 else False - - #consider the num of occurence of this set - set_value = int(server_term.hincrby(timestamp, dico_setname_to_redis[the_set], int(1))) - - # FIXME - avoid using per paste as a set is checked over the entire paste - #1 term per paste - if new_to_the_set: - set_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_setname_to_redis[the_set], int(1))) - server_term.zincrby("per_paste_" + curr_set, dico_setname_to_redis[the_set], float(1)) - server_term.zincrby(curr_set, dico_setname_to_redis[the_set], float(1)) - - - else: - publisher.debug("Script RegexForTermsFrequency is Idling") - print("sleeping") - time.sleep(5) - message = p.get_from_set() diff --git a/bin/TermTrackerMod.py b/bin/TermTrackerMod.py index 08eb7247..fca0439f 100755 --- a/bin/TermTrackerMod.py +++ b/bin/TermTrackerMod.py @@ -8,13 +8,14 @@ The TermTracker Module import os import sys import time +import signal from Helper import Process from pubsublogger import publisher import NotificationHelper -from packages import Paste +from packages import Item from packages import Term sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) @@ -26,13 +27,22 @@ mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\n # loads tracked words list_tracked_words = Term.get_tracked_words_list() +last_refresh_word = time.time() set_tracked_words_list = Term.get_set_tracked_words_list() +last_refresh_set = time.time() -def new_term_found(term, term_type, item_id): - uuid_list = Term.get_term_uuid_list(term) +class TimeoutException(Exception): + pass +def timeout_handler(signum, frame): + raise TimeoutException +signal.signal(signal.SIGALRM, timeout_handler) + +def new_term_found(term, term_type, item_id, item_date): + uuid_list = Term.get_term_uuid_list(term, term_type) + print('new tracked term found: {} in {}'.format(term, item_id)) for term_uuid in uuid_list: - Term.add_tracked_item(term_uuid, item_id) + Term.add_tracked_item(term_uuid, item_id, item_date) tags_to_add = Term.get_term_tags(term_uuid) for tag in tags_to_add: @@ -52,28 +62,38 @@ if __name__ == "__main__": publisher.channel = "Script" publisher.info("Script TermTrackerMod started") - #config_section = 'TermTrackerMod' config_section = 'TermTrackerMod' p = Process(config_section) + max_execution_time = p.config.getint(config_section, "max_execution_time") full_item_url = p.config.get("Notifications", "ail_domain") + full_item_url while True: item_id = p.get_from_set() - item_id = 'submitted/2019/08/02/cc1900ed-6051-473a-ba7a-850a17d0cc02.gz' - #item_id = 'submitted/2019/08/02/0a52d82d-a89d-4004-9535-8a0bc9c1ce49.gz' - if message is not None: + if item_id is not None: - paste = Paste.Paste(item_id) + item_date = Item.get_item_date(item_id) + item_content = Item.get_item_content(item_id) - dict_words_freq = Term.get_text_word_frequency(paste.get_p_content()) + signal.alarm(max_execution_time) + try: + dict_words_freq = Term.get_text_word_frequency(item_content) + except TimeoutException: + print ("{0} processing timeout".format(paste.p_rel_path)) + continue + else: + signal.alarm(0) + + # create token statistics + for word in dict_words_freq: + Term.create_token_statistics(item_date, word, dict_words_freq[word]) # check solo words for word in list_tracked_words: if word in dict_words_freq: - new_term_found(word, 'word', item_id) + new_term_found(word, 'word', item_id, item_date) # check words set for elem in set_tracked_words_list: @@ -86,7 +106,19 @@ if __name__ == "__main__": if word in dict_words_freq: nb_uniq_word += 1 if nb_uniq_word >= nb_words_threshold: - new_term_found(word_set, 'set', item_id) + new_term_found(word_set, 'set', item_id, item_date) else: time.sleep(5) + + + # refresh Tracked term + if last_refresh_word < Term.get_tracked_term_last_updated_by_type('word'): + list_tracked_words = Term.get_tracked_words_list() + last_refresh_word = time.time() + print('Tracked word refreshed') + + if last_refresh_set < Term.get_tracked_term_last_updated_by_type('set'): + set_tracked_words_list = Term.get_set_tracked_words_list() + last_refresh_set = time.time() + print('Tracked set refreshed') diff --git a/bin/Tokenize.py b/bin/Tokenize.py deleted file mode 100755 index 4e13b9ff..00000000 --- a/bin/Tokenize.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -The Tokenize Module -=================== - -This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q -Module. - -It tokenize the content of the paste and publish the result in the following -format: - channel_name+' '+/path/of/the/paste.gz+' '+tokenized_word+' '+scoring - - ..seealso:: Paste method (_get_top_words) - -..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put -the same Subscriber name in both of them. - -Requirements ------------- - -*Need running Redis instances. (Redis) -*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. - -""" -import time -from packages import Paste -from pubsublogger import publisher - -from Helper import Process -import signal - -class TimeoutException(Exception): - pass - -def timeout_handler(signum, frame): - raise TimeoutException - -signal.signal(signal.SIGALRM, timeout_handler) - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Tokenize' - p = Process(config_section) - - # LOGGING # - publisher.info("Tokeniser started") - - while True: - message = p.get_from_set() - print(message) - if message is not None: - paste = Paste.Paste(message) - signal.alarm(5) - try: - for word, score in paste._get_top_words().items(): - if len(word) >= 4: - msg = '{} {} {}'.format(paste.p_rel_path, word, score) - p.populate_set_out(msg) - except TimeoutException: - p.incr_module_timeout_statistic() - print ("{0} processing timeout".format(paste.p_rel_path)) - continue - else: - signal.alarm(0) - else: - publisher.debug("Tokeniser is idling 10s") - time.sleep(10) - print("Sleeping") diff --git a/bin/packages/Date.py b/bin/packages/Date.py index 85edb0be..ccf59c54 100644 --- a/bin/packages/Date.py +++ b/bin/packages/Date.py @@ -1,5 +1,7 @@ #!/usr/bin/python3 +import datetime + class Date(object): """docstring for Date""" def __init__(self, *args): @@ -34,7 +36,6 @@ class Date(object): self.day = day def substract_day(self, numDay): - import datetime computed_date = datetime.date(int(self.year), int(self.month), int(self.day)) - datetime.timedelta(numDay) comp_year = str(computed_date.year) comp_month = str(computed_date.month).zfill(2) @@ -50,3 +51,12 @@ def date_substract_day(date, num_day=1): new_date = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8])) - datetime.timedelta(num_day) new_date = str(new_date).replace('-', '') return new_date + +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return list(reversed(date_list)) diff --git a/bin/packages/Item.py b/bin/packages/Item.py index 2c10cb85..4dcdde85 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -2,10 +2,13 @@ # -*-coding:UTF-8 -* import os +import sys import gzip import redis +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) import Flask_config +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Date import Tag diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 312ed7bd..2f45c677 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -4,6 +4,7 @@ import os import re import sys +import time import uuid import redis import datetime @@ -72,14 +73,30 @@ def get_set_tracked_words_list(): all_set_list.append((ter_set, num_words, elem)) return all_set_list -def is_term_tracked_in_global_level(term): - res = r_serv_term.smembers('all:tracked_term_uuid:{}'.format(term)) +def get_regex_tracked_words_dict(): + regex_list = r_serv_term.smembers('all:tracked_term:regex') + dict_tracked_regex = {} + for regex in regex_list: + dict_tracked_regex[regex] = re.compile(regex) + return dict_tracked_regex + +def is_term_tracked_in_global_level(term, term_type): + res = r_serv_term.smembers('all:tracked_term_uuid:{}:{}'.format(term_type, term)) if res: for elem_uuid in res: if r_serv_term.hget('tracked_term:{}'.format(elem_uuid), 'level')=='1': return True return False +def is_term_tracked_in_user_level(term, term_type, user_id): + res = r_serv_term.smembers('user:tracked_term:{}'.format(user_id)) + if res: + for elem_uuid in res: + if r_serv_term.hget('tracked_term:{}'.format(elem_uuid), 'tracked')== term: + if r_serv_term.hget('tracked_term:{}'.format(elem_uuid), 'type')== term_type: + return True + return False + def parse_json_term_to_add(dict_input, user_id): term = dict_input.get('term', None) if not term: @@ -112,7 +129,10 @@ def parse_json_term_to_add(dict_input, user_id): # check if term already tracked in global if level==1: - if is_term_tracked_in_global_level(term): + if is_term_tracked_in_global_level(term, term_type): + return ({"status": "error", "reason": "Term already tracked"}, 409) + else: + if is_term_tracked_in_user_level(term, term_type, user_id): return ({"status": "error", "reason": "Term already tracked"}, 409) term_uuid = add_tracked_term(term , term_type, user_id, level, tags, mails) @@ -174,7 +194,7 @@ def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0) r_serv_term.sadd('all:tracked_term:{}'.format(term_type), term) # create term - uuid map - r_serv_term.sadd('all:tracked_term_uuid:{}'.format(term), term_uuid) + r_serv_term.sadd('all:tracked_term_uuid:{}:{}'.format(term_type, term), term_uuid) # add display level set if level == 0: # user only @@ -190,15 +210,22 @@ def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0) for mail in mails: r_serv_term.sadd('tracked_term:mail:{}'.format(term_uuid), mail) + # toggle refresh module tracker list/set + r_serv_term.set('tracked_term:refresh:{}'.format(term_type), time.time()) + return term_uuid def delete_term(term_uuid): term = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'tracked') term_type = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'type') term_level = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'level') - r_serv_term.srem('all:tracked_term_uuid:{}'.format(term), term_uuid) - r_serv_term.srem('all:tracked_term:{}'.format(term_type), term_uuid) + r_serv_term.srem('all:tracked_term_uuid:{}:{}'.format(term_type, term), term_uuid) + # Term not tracked by other users + if not r_serv_term.exists('all:tracked_term_uuid:{}:{}'.format(term_type, term)): + r_serv_term.srem('all:tracked_term:{}'.format(term_type), term) + # toggle refresh module tracker list/set + r_serv_term.set('tracked_term:refresh:{}'.format(term_type), time.time()) if level == 0: # user only user_id = term_type = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'user_id') @@ -218,8 +245,8 @@ def delete_term(term_uuid): # remove item set r_serv_term.delete('tracked_term:item:{}'.format(term_uuid)) -def get_term_uuid_list(term): - return list(r_serv_term.smembers('all:tracked_term_uuid:{}'.format(term))) +def get_term_uuid_list(term, term_type): + return list(r_serv_term.smembers('all:tracked_term_uuid:{}:{}'.format(term_type, term))) def get_term_tags(term_uuid): return list(r_serv_term.smembers('tracked_term:tags:{}'.format(term_uuid))) @@ -227,10 +254,30 @@ def get_term_tags(term_uuid): def get_term_mails(term_uuid): return list(r_serv_term.smembers('tracked_term:mail:{}'.format(term_uuid))) -def add_tracked_item(term_uuid, item_id): - r_serv_term.sadd('tracked_term:item:{}'.format(term_uuid), item_id) +def add_tracked_item(term_uuid, item_id, item_date): + # track item + r_serv_term.sadd('tracked_term:item:{}:{}'.format(term_uuid, item_date), item_id) + # track nb item by date + r_serv_term.zincrby('tracked_term:stat:{}'.format(term_uuid), item_date, 1) +def create_token_statistics(item_date, word, nb): + r_serv_term.zincrby('stat_token_per_item_by_day:{}'.format(item_date), word, 1) + r_serv_term.zincrby('stat_token_total_by_day:{}'.format(item_date), word, nb) + r_serv_term.sadd('stat_token_history', item_date) +def delete_token_statistics_by_date(item_date): + r_serv_term.delete('stat_token_per_item_by_day:{}'.format(item_date)) + r_serv_term.delete('stat_token_total_by_day:{}'.format(item_date)) + r_serv_term.srem('stat_token_history', item_date) + +def get_all_token_stat_history(): + return r_serv_term.smembers('stat_token_history') + +def get_tracked_term_last_updated_by_type(term_type): + epoch_update = r_serv_term.get('tracked_term:refresh:{}'.format(term_type)) + if not epoch_update: + epoch_update = 0 + return float(epoch_update) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 09e05ddf..52388ed5 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -107,7 +107,10 @@ operation_mode = 3 ttl_duplicate = 86400 default_unnamed_feed_name = unnamed_feeder -[RegexForTermsFrequency] +[TermTrackerMod] +max_execution_time = 120 + +[RegexTracker] max_execution_time = 60 ##### Redis ##### diff --git a/bin/packages/lib_words.py b/bin/packages/lib_words.py index 54581403..e44a922c 100644 --- a/bin/packages/lib_words.py +++ b/bin/packages/lib_words.py @@ -11,62 +11,10 @@ from dateutil.rrule import rrule, DAILY import csv -def listdirectory(path): - """Path Traversing Function. - - :param path: -- The absolute pathname to a directory. - - This function is returning all the absolute path of the files contained in - the argument directory. - - """ - fichier = [] - for root, dirs, files in os.walk(path): - - for i in files: - - fichier.append(os.path.join(root, i)) - - return fichier - clean = lambda dirty: ''.join(filter(string.printable.__contains__, dirty)) """It filters out non-printable characters from the string it receives.""" -def create_dirfile(r_serv, directory, overwrite): - """Create a file of path. - - :param r_serv: -- connexion to redis database - :param directory: -- The folder where to launch the listing of the .gz files - - This function create a list in redis with inside the absolute path - of all the pastes needed to be proceeded by function using parallel - (like redis_words_ranking) - - """ - if overwrite: - r_serv.delete("filelist") - - for x in listdirectory(directory): - r_serv.lpush("filelist", x) - - publisher.info("The list was overwritten") - - else: - if r_serv.llen("filelist") == 0: - - for x in listdirectory(directory): - r_serv.lpush("filelist", x) - - publisher.info("New list created") - else: - - for x in listdirectory(directory): - r_serv.lpush("filelist", x) - - publisher.info("The list was updated with new elements") - - def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month): """Create a csv file used with dygraph. diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 4526d978..7c8e3138 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -19,36 +19,17 @@ subscribe = Redis_Global [Attributes] subscribe = Redis_Global -[Lines] -subscribe = Redis_Global -publish = Redis_LinesShort,Redis_LinesLong - [DomClassifier] subscribe = Redis_Global -[Tokenize] -subscribe = Redis_LinesShort -publish = Redis_Words - -[Curve] -subscribe = Redis_Words -publish = Redis_CurveManageTopSets,Redis_Tags - [TermTrackerMod] subscribe = Redis_Global publish = Redis_Tags -[RegexForTermsFrequency] +[RegexTracker] subscribe = Redis_Global publish = Redis_Tags -[SetForTermsFrequency] -subscribe = Redis_Global -publish = Redis_Tags - -[CurveManageTopSets] -subscribe = Redis_CurveManageTopSets - [Categ] subscribe = Redis_Global publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey From 4b0819a644b01dad5c35efda3bc0a2a11d1d35aa Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 9 Aug 2019 15:22:57 +0200 Subject: [PATCH 05/12] chg: [api] add endpoint: delete tracker term (regex/set/word) --- bin/packages/Term.py | 37 +++++++++++++- doc/README.md | 64 ++++++++++++++++++++---- var/www/modules/restApi/Flask_restApi.py | 15 ++++-- 3 files changed, 100 insertions(+), 16 deletions(-) diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 2f45c677..30911252 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -27,6 +27,21 @@ special_characters.add('\\s') tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', gaps=True, discard_empty=True) +def is_valid_uuid_v4(UUID): + UUID = UUID.replace('-', '') + try: + uuid_test = uuid.UUID(hex=UUID, version=4) + return uuid_test.hex == UUID + except: + return False + +# # TODO: use new package => duplicate fct +def is_in_role(user_id, role): + if r_serv_db.sismember('user_role:{}'.format(role), user_id): + return True + else: + return False + def is_valid_mail(email): result = email_regex.match(email) if result: @@ -215,10 +230,25 @@ def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0) return term_uuid +def parse_tracked_term_to_delete(dict_input, user_id): + term_uuid = dict_input.get('uuid', None) + if not is_valid_uuid_v4(term_uuid): + return ({"status": "error", "reason": "Invalid uuid"}, 400) + level = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'level') + if not level: + return ({"status": "error", "reason": "Unknown uuid"}, 404) + if level == 0: + if r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'user_id') != user_id: + if not is_in_role(user_id, 'admin'): + return ({"status": "error", "reason": "Unknown uuid"}, 404) + + delete_term(term_uuid) + return ({"uuid": term_uuid}, 200) + def delete_term(term_uuid): term = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'tracked') term_type = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'type') - term_level = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'level') + level = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'level') r_serv_term.srem('all:tracked_term_uuid:{}:{}'.format(term_type, term), term_uuid) # Term not tracked by other users if not r_serv_term.exists('all:tracked_term_uuid:{}:{}'.format(term_type, term)): @@ -243,7 +273,10 @@ def delete_term(term_uuid): r_serv_term.delete('tracked_term:mail:{}'.format(term_uuid)) # remove item set - r_serv_term.delete('tracked_term:item:{}'.format(term_uuid)) + all_item_date = r_serv_term.zrange('tracked_term:stat:{}'.format(term_uuid), 0, -1) + for date in all_item_date: + r_serv_term.delete('tracked_term:item:{}:{}'.format(term_uuid, date)) + r_serv_term.delete('tracked_term:stat:{}'.format(term_uuid)) def get_term_uuid_list(term, term_type): return list(r_serv_term.smembers('all:tracked_term_uuid:{}:{}'.format(term_type, term))) diff --git a/doc/README.md b/doc/README.md index 31f13cc3..764b4ff5 100644 --- a/doc/README.md +++ b/doc/README.md @@ -629,17 +629,13 @@ Add term tracker #### Example ``` -curl https://127.0.0.1:7000/api/v1/import/item --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST +curl https://127.0.0.1:7000/api/v1/add/tracker/term --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST ``` #### input.json Example ```json { - "type": "text", - "tags": [ - "infoleak:analyst-detection=\"private-key\"" - ], - "text": "text to import" + } ``` @@ -648,7 +644,7 @@ curl https://127.0.0.1:7000/api/v1/import/item --header "Authorization: iHc1_ChZ ```json { - "uuid": "0c3d7b34-936e-4f01-9cdf-2070184b6016" + } ``` @@ -656,10 +652,56 @@ curl https://127.0.0.1:7000/api/v1/import/item --header "Authorization: iHc1_ChZ **HTTP Status Code** : `400` ```json - {"status": "error", "reason": "Malformed JSON"} - {"status": "error", "reason": "No text supplied"} - {"status": "error", "reason": "Tags or Galaxy not enabled"} - {"status": "error", "reason": "Size exceeds default"} + +``` + + + + +### Delete term tracker: `api/v1/delete/tracker/term` + +#### Description +Delete term tracker + +**Method** : `DELETE` + +#### Parameters +- `uuid` + - tracked term uuid + - *uuid4* + - mandatory + +#### JSON response +- `uuid` + - deleted uuid + - *uuid4* + +#### Example +``` +curl https://127.0.0.1:7000/api/v1/add/tracker/term --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST +``` + +#### input.json Example +```json + { + + } +``` + +#### Expected Success Response +**HTTP Status Code** : `200` + +```json + { + + } +``` + +#### Expected Fail Response +**HTTP Status Code** : `400` + +```json + ``` diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py index 864e7ed3..3544f705 100644 --- a/var/www/modules/restApi/Flask_restApi.py +++ b/var/www/modules/restApi/Flask_restApi.py @@ -314,16 +314,25 @@ def get_all_tags(): # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # TRACKER # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # -@restApi.route("api/v1/add/tracker/term", methods=['GET']) +@restApi.route("api/v1/add/tracker/term", methods=['POST']) @token_required('analyst') def add_tracker_term(): - #data = request.get_json() - data = {"term": "pi", 'type' : "word"} + data = request.get_json() user_token = get_auth_from_header() user_id = get_user_from_token(user_token) res = Term.parse_json_term_to_add(data, user_id) return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] +@restApi.route("api/v1/delete/tracker/term", methods=['DELETE']) +@token_required('analyst') +def delete_tracker_term(): + data = request.get_json() + user_token = get_auth_from_header() + user_id = get_user_from_token(user_token) + res = Term.parse_tracked_term_to_delete(data, user_id) + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # IMPORT # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # From 80f95350747d60a859c3a4ea3ed677f736c816d4 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 9 Aug 2019 17:07:54 +0200 Subject: [PATCH 06/12] chg: [api] add endpoint: get tracked item_id by uuid and daterange --- bin/packages/Term.py | 62 ++++++++++++++++++---- doc/README.md | 67 ++++++++++++++++++++++++ var/www/modules/restApi/Flask_restApi.py | 8 +++ 3 files changed, 126 insertions(+), 11 deletions(-) diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 30911252..7a6655ee 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -42,6 +42,19 @@ def is_in_role(user_id, role): else: return False +def check_term_uuid_valid_access(term_uuid, user_id): + if not is_valid_uuid_v4(term_uuid): + return ({"status": "error", "reason": "Invalid uuid"}, 400) + level = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'level') + if not level: + return ({"status": "error", "reason": "Unknown uuid"}, 404) + if level == 0: + if r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'user_id') != user_id: + if not is_in_role(user_id, 'admin'): + return ({"status": "error", "reason": "Unknown uuid"}, 404) + return None + + def is_valid_mail(email): result = email_regex.match(email) if result: @@ -95,6 +108,13 @@ def get_regex_tracked_words_dict(): dict_tracked_regex[regex] = re.compile(regex) return dict_tracked_regex +def get_tracked_term_list_item(term_uuid, date_from, date_to): + all_item_id = [] + if date_from and date_to: + for date in r_serv_term.zrangebyscore('tracked_term:stat:{}'.format(term_uuid), int(date_from), int(date_to)): + all_item_id = all_item_id + list(r_serv_term.smembers('tracked_term:item:{}:{}'.format(term_uuid, date))) + return all_item_id + def is_term_tracked_in_global_level(term, term_type): res = r_serv_term.smembers('all:tracked_term_uuid:{}:{}'.format(term_type, term)) if res: @@ -231,16 +251,9 @@ def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0) return term_uuid def parse_tracked_term_to_delete(dict_input, user_id): - term_uuid = dict_input.get('uuid', None) - if not is_valid_uuid_v4(term_uuid): - return ({"status": "error", "reason": "Invalid uuid"}, 400) - level = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'level') - if not level: - return ({"status": "error", "reason": "Unknown uuid"}, 404) - if level == 0: - if r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'user_id') != user_id: - if not is_in_role(user_id, 'admin'): - return ({"status": "error", "reason": "Unknown uuid"}, 404) + res = check_term_uuid_valid_access(term_uuid, user_id) + if res: + return res delete_term(term_uuid) return ({"uuid": term_uuid}, 200) @@ -291,7 +304,7 @@ def add_tracked_item(term_uuid, item_id, item_date): # track item r_serv_term.sadd('tracked_term:item:{}:{}'.format(term_uuid, item_date), item_id) # track nb item by date - r_serv_term.zincrby('tracked_term:stat:{}'.format(term_uuid), item_date, 1) + r_serv_term.zadd('tracked_term:stat:{}'.format(term_uuid), item_date, int(item_date)) def create_token_statistics(item_date, word, nb): r_serv_term.zincrby('stat_token_per_item_by_day:{}'.format(item_date), word, 1) @@ -312,8 +325,35 @@ def get_tracked_term_last_updated_by_type(term_type): epoch_update = 0 return float(epoch_update) +def parse_get_tracker_term_item(dict_input, user_id): + term_uuid = dict_input.get('uuid', None) + res = check_term_uuid_valid_access(term_uuid, user_id) + if res: + return res + date_from = dict_input.get('date_from', None) + date_to = dict_input.get('date_to', None) + + if date_from is None: + date_from = r_serv_term.zrevrange('tracked_term:stat:{}'.format(term_uuid), 0, 0) + if date_from: + date_from = date_from[0] + + if date_to is None: + date_to = date_from + + if date_from > date_to: + date_from = date_to + + all_item_id = get_tracked_term_list_item(term_uuid, date_from, date_to) + + res_dict = {} + res_dict['uuid'] = term_uuid + res_dict['date_from'] = date_from + res_dict['date_to'] = date_to + res_dict['items'] = all_item_id + return (res_dict, 200) diff --git a/doc/README.md b/doc/README.md index 764b4ff5..91b486d0 100644 --- a/doc/README.md +++ b/doc/README.md @@ -708,6 +708,73 @@ curl https://127.0.0.1:7000/api/v1/add/tracker/term --header "Authorization: iHc +### Delete term tracker: `api/v1/add/tracker/term/item` + +#### Description +Delete term tracker + +**Method** : `POST` + +#### Parameters +- `uuid` + - tracked term uuid + - *uuid4* + - mandatory +- `date_from` + - date from + - *str - YYMMDD* + - default: last tracked items date +- `date_to` + - date to + - *str - YYMMDD* + - default: `None` + +#### JSON response +- `uuid` + - term uuid + - *uuid4* +- `date_from` + - date from + - *str - YYMMDD* +- `date_to` + - date to + - *str - YYMMDD* +- `items` + - list of item id + - *list* + +#### Example +``` +curl https://127.0.0.1:7000/api/v1/add/tracker/term --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST +``` + +#### input.json Example +```json + { + + } +``` + +#### Expected Success Response +**HTTP Status Code** : `200` + +```json + { + + } +``` + +#### Expected Fail Response +**HTTP Status Code** : `400` + +```json + +``` + + + + + diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py index 3544f705..e57157fe 100644 --- a/var/www/modules/restApi/Flask_restApi.py +++ b/var/www/modules/restApi/Flask_restApi.py @@ -332,6 +332,14 @@ def delete_tracker_term(): res = Term.parse_tracked_term_to_delete(data, user_id) return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] +@restApi.route("api/v1/get/tracker/term/item", methods=['POST']) +@token_required('analyst') +def get_tracker_term_item(): + data = request.get_json() + user_token = get_auth_from_header() + user_id = get_user_from_token(user_token) + res = Term.parse_get_tracker_term_item(data, user_id) + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # IMPORT # # # # # # # # # # # # # # # # # # From 7ed09bc92360e6791c1ba0ae66404cf36ad69188 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 14 Aug 2019 09:44:49 +0200 Subject: [PATCH 07/12] chg: [UI term tracker] refractor term management: trackers list + show trackers + add new trackers --- bin/packages/Term.py | 120 +++-- doc/README.md | 2 +- var/www/modules/terms/Flask_terms.py | 420 +++++------------- .../modules/terms/templates/Add_tracker.html | 153 +++++++ .../terms/templates/showTrackerTerm.html | 319 +++++++++++++ .../templates/tracker_term_management.html | 181 ++++++++ var/www/templates/nav_bar.html | 2 +- var/www/templates/tracker/menu_sidebar.html | 24 + 8 files changed, 865 insertions(+), 356 deletions(-) create mode 100644 var/www/modules/terms/templates/Add_tracker.html create mode 100644 var/www/modules/terms/templates/showTrackerTerm.html create mode 100644 var/www/modules/terms/templates/tracker_term_management.html create mode 100644 var/www/templates/tracker/menu_sidebar.html diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 7a6655ee..37fb661d 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -17,6 +17,8 @@ from textblob import TextBlob sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) import Flask_config +import Date + r_serv_term = Flask_config.r_serv_term email_regex = Flask_config.email_regex @@ -235,7 +237,7 @@ def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0) if level == 0: # user only r_serv_term.sadd('user:tracked_term:{}'.format(user_id), term_uuid) elif level == 1: # global - r_serv_term.sadd('gobal:tracked_term', term_uuid) + r_serv_term.sadd('global:tracked_term', term_uuid) # create term tags list for tag in tags: @@ -274,7 +276,7 @@ def delete_term(term_uuid): user_id = term_type = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'user_id') r_serv_term.srem('user:tracked_term:{}'.format(user_id), term_uuid) elif level == 1: # global - r_serv_term.srem('gobal:tracked_term', term_uuid) + r_serv_term.srem('global:tracked_term', term_uuid) # delete metatadata r_serv_term.delete('tracked_term:{}'.format(term_uuid)) @@ -291,6 +293,20 @@ def delete_term(term_uuid): r_serv_term.delete('tracked_term:item:{}:{}'.format(term_uuid, date)) r_serv_term.delete('tracked_term:stat:{}'.format(term_uuid)) +def replace_tracked_term_tags(term_uuid, tags): + r_serv_term.delete('tracked_term:tags:{}'.format(term_uuid)) + for tag in tags: + r_serv_term.sadd('tracked_term:tags:{}'.format(term_uuid), tag) + +def replace_tracked_term_mails(term_uuid, mails): + res = verify_mail_list(mails) + if res: + return res + else: + r_serv_term.delete('tracked_term:mail:{}'.format(term_uuid)) + for mail in mails: + r_serv_term.sadd('tracked_term:mail:{}'.format(term_uuid), mail) + def get_term_uuid_list(term, term_type): return list(r_serv_term.smembers('all:tracked_term_uuid:{}:{}'.format(term_type, term))) @@ -336,7 +352,7 @@ def parse_get_tracker_term_item(dict_input, user_id): date_to = dict_input.get('date_to', None) if date_from is None: - date_from = r_serv_term.zrevrange('tracked_term:stat:{}'.format(term_uuid), 0, 0) + date_from = get_tracked_term_first_seen(term_uuid) if date_from: date_from = date_from[0] @@ -355,45 +371,83 @@ def parse_get_tracker_term_item(dict_input, user_id): res_dict['items'] = all_item_id return (res_dict, 200) +def get_tracked_term_first_seen(term_uuid): + res = r_serv_term.zrange('tracked_term:stat:{}'.format(term_uuid), 0, 0) + if res: + return res[0] + else: + return None +def get_tracked_term_last_seen(term_uuid): + res = r_serv_term.zrevrange('tracked_term:stat:{}'.format(term_uuid), 0, 0) + if res: + return res[0] + else: + return None +def get_term_metedata(term_uuid, user_id=False, level=False, tags=False, mails=False, sparkline=False): + dict_uuid = {} + dict_uuid['term'] = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'tracked') + dict_uuid['type'] = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'type') + dict_uuid['date'] = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'date') + dict_uuid['first_seen'] = get_tracked_term_first_seen(term_uuid) + dict_uuid['last_seen'] = get_tracked_term_last_seen(term_uuid) + if user_id: + dict_uuid['user_id'] = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'user_id') + if level: + dict_uuid['level'] = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'level') + if mails: + dict_uuid['mails'] = get_list_trackeed_term_mails(term_uuid) + if tags: + dict_uuid['tags'] = get_list_trackeed_term_tags(term_uuid) + if sparkline: + dict_uuid['sparkline'] = get_tracked_term_sparkline(term_uuid) + dict_uuid['uuid'] = term_uuid + return dict_uuid +def get_tracked_term_sparkline(term_uuid, num_day=6): + date_range_sparkline = Date.get_date_range(num_day) + sparklines_value = [] + for date_day in date_range_sparkline: + nb_seen_this_day = r_serv_term.zscore('tracked_term:stat:{}'.format(term_uuid), date_day) + if nb_seen_this_day is None: + nb_seen_this_day = 0 + sparklines_value.append(int(nb_seen_this_day)) + return sparklines_value +def get_list_trackeed_term_tags(term_uuid): + res = r_serv_term.smembers('tracked_term:tags:{}'.format(term_uuid)) + if res: + return list(res) + else: + return [] +def get_list_trackeed_term_mails(term_uuid): + res = r_serv_term.smembers('tracked_term:mail:{}'.format(term_uuid)) + if res: + return list(res) + else: + return [] +def get_user_tracked_term_uuid(user_id): + return list(r_serv_term.smembers('user:tracked_term:{}'.format(user_id))) +def get_global_tracked_term_uuid(): + return list(r_serv_term.smembers('global:tracked_term')) +def get_all_user_tracked_terms(user_id): + all_user_term = [] + all_user_term_uuid = get_user_tracked_term_uuid(user_id) + for term_uuid in all_user_term_uuid: + all_user_term.append(get_term_metedata(term_uuid, tags=True, mails=True)) + return all_user_term +def get_all_global_tracked_terms(): + all_user_term = [] + all_user_term_uuid = get_global_tracked_term_uuid() - - - - - - - - - - - - - - - - - -def get_global_tracked_term(): - dict_tracked = {} - tracked_set = list(r_serv_term.smembers('global:TrackedSetSet')) - tracked_regex = list(r_serv_term.smembers('global:TrackedRegexSet')) - tracked_terms = list(r_serv_term.smembers('global:TrackedSetTermSet')) - return {'term': tracked_terms, 'set': tracked_terms, 'regex': tracked_regex} - -def get_user_tracked_term(user_id): - dict_tracked = {} - tracked_set = list(r_serv_term.smembers('user:{}:TrackedSetSet'.format(user_id))) - tracked_regex = list(r_serv_term.smembers('user:{}:TrackedRegexSet').format(user_id)) - tracked_terms = list(r_serv_term.smembers('user:{}:TrackedSetTermSet').format(user_id)) - return {'term': tracked_terms, 'set': tracked_terms, 'regex': tracked_regex} + for term_uuid in all_user_term_uuid: + all_user_term.append(get_term_metedata(term_uuid, user_id=True, tags=True, mails=True)) + return all_user_term diff --git a/doc/README.md b/doc/README.md index 91b486d0..03a1aa07 100644 --- a/doc/README.md +++ b/doc/README.md @@ -708,7 +708,7 @@ curl https://127.0.0.1:7000/api/v1/add/tracker/term --header "Authorization: iHc -### Delete term tracker: `api/v1/add/tracker/term/item` +### Delete term tracker: `api/v1/delete/tracker/term/item` #### Description Delete term tracker diff --git a/var/www/modules/terms/Flask_terms.py b/var/www/modules/terms/Flask_terms.py index f3b8c7de..9ffcf3ee 100644 --- a/var/www/modules/terms/Flask_terms.py +++ b/var/www/modules/terms/Flask_terms.py @@ -6,20 +6,25 @@ note: The matching of credential against supplied credential is done using Levenshtein distance ''' +import json import redis import datetime import calendar import flask -from flask import Flask, render_template, jsonify, request, Blueprint, url_for, redirect +from flask import Flask, render_template, jsonify, request, Blueprint, url_for, redirect, Response from Role_Manager import login_admin, login_analyst -from flask_login import login_required +from flask_login import login_required, current_user import re -import Paste from pprint import pprint import Levenshtein +# --------------------------------------------------------------- + +import Paste +import Term + # ============ VARIABLES ============ import Flask_config @@ -146,337 +151,110 @@ def save_tag_to_auto_push(list_tag): # ============ ROUTES ============ -@terms.route("/terms_management/") +@terms.route("/tracker_term") +def tracked_term_menu(): + user_id = current_user.get_id() + user_term = Term.get_all_user_tracked_terms(user_id) + global_term = Term.get_all_global_tracked_terms() + return render_template("tracker_term_management.html", user_term=user_term, global_term=global_term, bootstrap_label=bootstrap_label) + + +@terms.route("/tracker/add", methods=['GET', 'POST']) @login_required @login_analyst -def terms_management(): - per_paste = request.args.get('per_paste') - if per_paste == "1" or per_paste is None: - per_paste_text = "per_paste_" - per_paste = 1 - else: - per_paste_text = "" - per_paste = 0 +def add_tracked_term_menu(): + if request.method == 'POST': + term = request.form.get("term") + term_type = request.form.get("tracker_type") + nb_words = request.form.get("nb_word", 1) + level = request.form.get("level", 1) + tags = request.form.get("tags", []) + mails = request.form.get("mails", []) - today = datetime.datetime.now() - today = today.replace(hour=0, minute=0, second=0, microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - - # Map tracking if notifications are enabled for a specific term - notificationEnabledDict = {} - - # Maps a specific term to the associated email addresses - notificationEMailTermMapping = {} - notificationTagsTermMapping = {} - - #Regex - trackReg_list = [] - trackReg_list_values = [] - trackReg_list_num_of_paste = [] - for tracked_regex in r_serv_term.smembers(TrackedRegexSet_Name): - - notificationEMailTermMapping[tracked_regex] = r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex) - notificationTagsTermMapping[tracked_regex] = r_serv_term.smembers(TrackedTermsNotificationTagsPrefix_Name + tracked_regex) - - if tracked_regex not in notificationEnabledDict: - notificationEnabledDict[tracked_regex] = False - - trackReg_list.append(tracked_regex) - value_range = Term_getValueOverRange(tracked_regex, today_timestamp, [1, 7, 31], per_paste=per_paste_text) - - term_date = r_serv_term.hget(TrackedRegexDate_Name, tracked_regex) - - set_paste_name = "regex_" + tracked_regex - trackReg_list_num_of_paste.append(r_serv_term.scard(set_paste_name)) - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(term_date) - trackReg_list_values.append(value_range) - - if tracked_regex in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): - notificationEnabledDict[tracked_regex] = True - - #Set - trackSet_list = [] - trackSet_list_values = [] - trackSet_list_num_of_paste = [] - for tracked_set in r_serv_term.smembers(TrackedSetSet_Name): - tracked_set = tracked_set - - notificationEMailTermMapping[tracked_set] = r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_set) - notificationTagsTermMapping[tracked_set] = r_serv_term.smembers(TrackedTermsNotificationTagsPrefix_Name + tracked_set) - - if tracked_set not in notificationEnabledDict: - notificationEnabledDict[tracked_set] = False - - trackSet_list.append(tracked_set) - value_range = Term_getValueOverRange(tracked_set, today_timestamp, [1, 7, 31], per_paste=per_paste_text) - - term_date = r_serv_term.hget(TrackedSetDate_Name, tracked_set) - - set_paste_name = "set_" + tracked_set - trackSet_list_num_of_paste.append(r_serv_term.scard(set_paste_name)) - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(term_date) - trackSet_list_values.append(value_range) - - if tracked_set in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): - notificationEnabledDict[tracked_set] = True - - #Tracked terms - track_list = [] - track_list_values = [] - track_list_num_of_paste = [] - for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): - - notificationEMailTermMapping[tracked_term] = r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term) - notificationTagsTermMapping[tracked_term] = r_serv_term.smembers(TrackedTermsNotificationTagsPrefix_Name + tracked_term) - - if tracked_term not in notificationEnabledDict: - notificationEnabledDict[tracked_term] = False - - track_list.append(tracked_term) - value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31], per_paste=per_paste_text) - - term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) - - set_paste_name = "tracked_" + tracked_term - - track_list_num_of_paste.append( r_serv_term.scard(set_paste_name) ) - - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(term_date) - track_list_values.append(value_range) - - if tracked_term in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): - notificationEnabledDict[tracked_term] = True - - #blacklist terms - black_list = [] - for blacked_term in r_serv_term.smembers(BlackListTermsSet_Name): - term_date = r_serv_term.hget(BlackListTermsDate_Name, blacked_term) - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - black_list.append([blacked_term, term_date]) - - return render_template("terms_management.html", - black_list=black_list, track_list=track_list, trackReg_list=trackReg_list, trackSet_list=trackSet_list, - track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste, - trackReg_list_values=trackReg_list_values, trackReg_list_num_of_paste=trackReg_list_num_of_paste, - trackSet_list_values=trackSet_list_values, trackSet_list_num_of_paste=trackSet_list_num_of_paste, - per_paste=per_paste, notificationEnabledDict=notificationEnabledDict, bootstrap_label=bootstrap_label, - notificationEMailTermMapping=notificationEMailTermMapping, notificationTagsTermMapping=notificationTagsTermMapping) - - -@terms.route("/terms_management_query_paste/") -@login_required -@login_analyst -def terms_management_query_paste(): - term = request.args.get('term') - paste_info = [] - - # check if regex or not - if term.startswith('/') and term.endswith('/'): - set_paste_name = "regex_" + term - track_list_path = r_serv_term.smembers(set_paste_name) - elif term.startswith('\\') and term.endswith('\\'): - set_paste_name = "set_" + term - track_list_path = r_serv_term.smembers(set_paste_name) - else: - set_paste_name = "tracked_" + term - track_list_path = r_serv_term.smembers(set_paste_name) - - for path in track_list_path: - paste = Paste.Paste(path) - p_date = str(paste._get_p_date()) - p_date = p_date[0:4]+'/'+p_date[4:6]+'/'+p_date[6:8] - p_source = paste.p_source - p_size = paste.p_size - p_mime = paste.p_mime - p_lineinfo = paste.get_lines_info() - p_content = paste.get_p_content() - if p_content != 0: - p_content = p_content[0:400] - paste_info.append({"path": path, "date": p_date, "source": p_source, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) - - return jsonify(paste_info) - - -@terms.route("/terms_management_query/") -@login_required -@login_analyst -def terms_management_query(): - TrackedTermsDate_Name = "TrackedTermDate" - BlackListTermsDate_Name = "BlackListTermDate" - term = request.args.get('term') - section = request.args.get('section') - - today = datetime.datetime.now() - today = today.replace(hour=0, minute=0, second=0, microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - value_range = Term_getValueOverRange(term, today_timestamp, [1, 7, 31]) - - if section == "followTerm": - term_date = r_serv_term.hget(TrackedTermsDate_Name, term) - elif section == "blacklistTerm": - term_date = r_serv_term.hget(BlackListTermsDate_Name, term) - - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(str(term_date)) - return jsonify(value_range) - - -@terms.route("/terms_management_action/", methods=['GET']) -@login_required -@login_analyst -def terms_management_action(): - today = datetime.datetime.now() - today = today.replace(microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - - - section = request.args.get('section') - action = request.args.get('action') - term = request.args.get('term') - notificationEmailsParam = request.args.get('emailAddresses') - input_tags = request.args.get('tags') - - if action is None or term is None or notificationEmailsParam is None: - return "None" - else: - if section == "followTerm": - if action == "add": - - # Make a list of all passed email addresses - notificationEmails = notificationEmailsParam.split() - - validNotificationEmails = [] - # check for valid email addresses - for email in notificationEmails: - # Really basic validation: - # has exactly one @ sign, and at least one . in the part after the @ - if re.match(r"[^@]+@[^@]+\.[^@]+", email): - validNotificationEmails.append(email) - - # create tags list - list_tags = input_tags.split() - - # check if regex/set or simple term - #regex - if term.startswith('/') and term.endswith('/'): - r_serv_term.sadd(TrackedRegexSet_Name, term) - r_serv_term.hset(TrackedRegexDate_Name, term, today_timestamp) - # add all valid emails to the set - for email in validNotificationEmails: - r_serv_term.sadd(TrackedTermsNotificationEmailsPrefix_Name + term, email) - # enable notifications by default - r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, term) - # add tags list - for tag in list_tags: - r_serv_term.sadd(TrackedTermsNotificationTagsPrefix_Name + term, tag) - save_tag_to_auto_push(list_tags) - - #set - elif term.startswith('\\') and term.endswith('\\'): - tab_term = term[1:-1] - perc_finder = re.compile("\[[0-9]{1,3}\]").search(tab_term) - if perc_finder is not None: - match_percent = perc_finder.group(0)[1:-1] - set_to_add = term - else: - match_percent = DEFAULT_MATCH_PERCENT - set_to_add = "\\" + tab_term[:-1] + ", [{}]]\\".format(match_percent) - r_serv_term.sadd(TrackedSetSet_Name, set_to_add) - r_serv_term.hset(TrackedSetDate_Name, set_to_add, today_timestamp) - # add all valid emails to the set - for email in validNotificationEmails: - r_serv_term.sadd(TrackedTermsNotificationEmailsPrefix_Name + set_to_add, email) - # enable notifications by default - r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, set_to_add) - # add tags list - for tag in list_tags: - r_serv_term.sadd(TrackedTermsNotificationTagsPrefix_Name + set_to_add, tag) - save_tag_to_auto_push(list_tags) - - #simple term - else: - r_serv_term.sadd(TrackedTermsSet_Name, term.lower()) - r_serv_term.hset(TrackedTermsDate_Name, term.lower(), today_timestamp) - # add all valid emails to the set - for email in validNotificationEmails: - r_serv_term.sadd(TrackedTermsNotificationEmailsPrefix_Name + term.lower(), email) - # enable notifications by default - r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, term.lower()) - # add tags list - for tag in list_tags: - r_serv_term.sadd(TrackedTermsNotificationTagsPrefix_Name + term.lower(), tag) - save_tag_to_auto_push(list_tags) - - elif action == "toggleEMailNotification": - # get the current state - if term in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): - # remove it - r_serv_term.srem(TrackedTermsNotificationEnabled_Name, term.lower()) - else: - # add it - r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, term.lower()) - - #del action - else: - if term.startswith('/') and term.endswith('/'): - r_serv_term.srem(TrackedRegexSet_Name, term) - r_serv_term.hdel(TrackedRegexDate_Name, term) - elif term.startswith('\\') and term.endswith('\\'): - r_serv_term.srem(TrackedSetSet_Name, term) - r_serv_term.hdel(TrackedSetDate_Name, term) - else: - r_serv_term.srem(TrackedTermsSet_Name, term.lower()) - r_serv_term.hdel(TrackedTermsDate_Name, term.lower()) - - # delete the associated notification emails too - r_serv_term.delete(TrackedTermsNotificationEmailsPrefix_Name + term) - # delete the associated tags set - r_serv_term.delete(TrackedTermsNotificationTagsPrefix_Name + term) - - elif section == "blacklistTerm": - if action == "add": - r_serv_term.sadd(BlackListTermsSet_Name, term.lower()) - r_serv_term.hset(BlackListTermsDate_Name, term, today_timestamp) - else: - r_serv_term.srem(BlackListTermsSet_Name, term.lower()) + if mails: + mails = mails.split() + if tags: + tags = tags.split() + input_dict = {"term": term, "type": term_type, "nb_words": nb_words, "tags": tags, "mails": mails} + user_id = current_user.get_id() + res = Term.parse_json_term_to_add(input_dict, user_id) + if res[1] == 200: + return redirect(url_for('terms.tracked_term_menu')) else: - return "None" + ## TODO: use modal + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] + else: + return render_template("Add_tracker.html") - to_return = {} - to_return["section"] = section - to_return["action"] = action - to_return["term"] = term - return jsonify(to_return) - -@terms.route("/terms_management/delete_terms_tags", methods=['POST']) +@terms.route("/tracker/show_term_tracker") @login_required @login_analyst -def delete_terms_tags(): - term = request.form.get('term') - tags_to_delete = request.form.getlist('tags_to_delete') +def show_term_tracker(): + user_id = current_user.get_id() + term_uuid = request.args.get('uuid', None) + res = Term.check_term_uuid_valid_access(term_uuid, user_id) + if res: # invalid access + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] - if term is not None and tags_to_delete is not None: - for tag in tags_to_delete: - r_serv_term.srem(TrackedTermsNotificationTagsPrefix_Name + term, tag) - return redirect(url_for('terms.terms_management')) + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + + if date_from: + date_from = date_from.replace('-', '') + if date_to: + date_to = date_to.replace('-', '') + + term_metadata = Term.get_term_metedata(term_uuid, user_id=True, level=True, tags=True, mails=True, sparkline=True) + + if date_from: + res = Term.parse_get_tracker_term_item({'uuid': term_uuid, 'date_from': date_from, 'date_to': date_to}, user_id) + if res[1] !=200: + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] + term_metadata['items'] = res[0]['items'] + term_metadata['date_from'] = res[0]['date_from'] + term_metadata['date_to'] = res[0]['date_to'] else: - return 'None args', 400 + term_metadata['items'] = [] + term_metadata['date_from'] = '' + term_metadata['date_to'] = '' -@terms.route("/terms_management/delete_terms_email", methods=['GET']) + return render_template("showTrackerTerm.html", term_metadata=term_metadata, bootstrap_label=bootstrap_label) + +@terms.route("/tracker/update_tracker_tags", methods=['POST']) @login_required @login_analyst -def delete_terms_email(): - term = request.args.get('term') - email = request.args.get('email') - - if term is not None and email is not None: - r_serv_term.srem(TrackedTermsNotificationEmailsPrefix_Name + term, email) - return redirect(url_for('terms.terms_management')) +def update_tracker_tags(): + user_id = current_user.get_id() + term_uuid = request.form.get('uuid') + res = Term.check_term_uuid_valid_access(term_uuid, user_id) + if res: # invalid access + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] + tags = request.form.get('tags') + if tags: + tags = tags.split() else: - return 'None args', 400 + tags = [] + Term.replace_tracked_term_tags(term_uuid, tags) + return redirect(url_for('terms.show_term_tracker', uuid=term_uuid)) + +@terms.route("/tracker/update_tracker_mails", methods=['POST']) +@login_required +@login_analyst +def update_tracker_mails(): + user_id = current_user.get_id() + term_uuid = request.form.get('uuid') + res = Term.check_term_uuid_valid_access(term_uuid, user_id) + if res: # invalid access + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] + mails = request.form.get('mails') + if mails: + mails = mails.split() + else: + mails = [] + res = Term.replace_tracked_term_mails(term_uuid, mails) + if res: # invalid mail + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] + return redirect(url_for('terms.show_term_tracker', uuid=term_uuid)) @terms.route("/terms_plot_tool/") diff --git a/var/www/modules/terms/templates/Add_tracker.html b/var/www/modules/terms/templates/Add_tracker.html new file mode 100644 index 00000000..8c0832d9 --- /dev/null +++ b/var/www/modules/terms/templates/Add_tracker.html @@ -0,0 +1,153 @@ + + + + + AIL-Framework + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'crawler/menu_sidebar.html' %} + +
+ +
+
+
Create a new tracker
+
+
+

Enter a domain and choose what kind of data you want.

+ +
+ +
+
+
+
+
+
+ +
+
+
+
+
+ +
+
+
+
+ + +
+
+
+ +
+ + + +

Terms to track (space separated)

+ +
+
+ +
+
+ +
+
+ + +
+ + +
+ + + + + +
+
+ + +
+ +
+
+ + + + + diff --git a/var/www/modules/terms/templates/showTrackerTerm.html b/var/www/modules/terms/templates/showTrackerTerm.html new file mode 100644 index 00000000..51d94699 --- /dev/null +++ b/var/www/modules/terms/templates/showTrackerTerm.html @@ -0,0 +1,319 @@ + + + + + + + AIL Framework - AIL + + + + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'tracker/menu_sidebar.html' %} + +
+ +
+
+

{{ term_metadata['uuid'] }}

+
    +
  • +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    TypeTrackedDate addedLevelCreated byFirst seenLast seenTags Email
    {{ term_metadata['type'] }}{{ term_metadata['term'] }}{{ term_metadata['date'][0:4] }}/{{ term_metadata['date'][4:6] }}/{{ term_metadata['date'][6:8] }}{{ term_metadata['level'] }}{{ term_metadata['user_id'] }} + {% if term_metadata['first_seen'] %} + {{ term_metadata['first_seen'][0:4] }}/{{ term_metadata['first_seen'][4:6] }}/{{ term_metadata['first_seen'][6:8] }} + {% endif %} + + {% if term_metadata['last_seen'] %} + {{ term_metadata['last_seen'][0:4] }}/{{ term_metadata['last_seen'][4:6] }}/{{ term_metadata['last_seen'][6:8] }} + {% endif %} + + {% for tag in term_metadata['tags'] %} + + {{ tag }} + + {% endfor %} + + + {% for mail in term_metadata['mails'] %} + {{ mail }}
    + {% endfor %} +
    +
    +
    +
    +
    +
    +
  • +
+ +
+
+ +
All Tags added for this tracker, space separated:
+
+
+
+
+ +
+ + +
+ +
+ +
+
+ +
All E-Mails to Notify for this tracker, space separated:
+
+
+
+
+ +
+ + +
+ +
+ + + + +
+
+ + +
+
+ +
+
+
+
+ +
+
+
+
+
+ +
+
+
+ + + +
+
+ + {%if term_metadata['items']%} + + + + + + + + + {% for item in term_metadata['items'] %} + + + + {% endfor %} + + +
Item
+ +
{{ item }}
+
+
+ {% endif %} + +
+
+
+ + + + + + + + + + diff --git a/var/www/modules/terms/templates/tracker_term_management.html b/var/www/modules/terms/templates/tracker_term_management.html new file mode 100644 index 00000000..ae8547ad --- /dev/null +++ b/var/www/modules/terms/templates/tracker_term_management.html @@ -0,0 +1,181 @@ + + + + + + + + Terms Management + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'tracker/menu_sidebar.html' %} + +
+ +
+
+
Your Tracked Terms
+
+
+ + + + + + + + + + + + {% for dict_uuid in user_term %} + + + + + + + + {% endfor %} + +
TypeTracked TermFirst seenLast seenEmail notification
{{dict_uuid['type']}} + {{dict_uuid['term']}} +
+ {% for tag in dict_uuid['tags'] %} + + {{ tag }} + + {% endfor %} +
+
+ {% if dict_uuid['first_seen'] %} + {{dict_uuid['first_seen'][0:4]}}/{{dict_uuid['first_seen'][4:6]}}/{{dict_uuid['first_seen'][6:8]}} + {% endif %} + + {% if dict_uuid['last_seen'] %} + {{dict_uuid['last_seen'][0:4]}}/{{dict_uuid['last_seen'][4:6]}}/{{dict_uuid['last_seen'][6:8]}} + {% endif %} + + {% for mail in dict_uuid['mails'] %} + {{ mail }}
+ {% endfor %} +
+
+
+ +
+
+
Global Tracked Terms
+
+
+ + + + + + + + + + + + {% for dict_uuid in global_term %} + + + + + + + + {% endfor %} + +
TypeTracked TermFirst seenLast seenEmail notification
{{dict_uuid['type']}} + {{dict_uuid['term']}} +
+ {% for tag in dict_uuid['tags'] %} + + {{ tag }} + + {% endfor %} +
+
+ {% if dict_uuid['first_seen'] %} + {{dict_uuid['first_seen'][0:4]}}/{{dict_uuid['first_seen'][4:6]}}/{{dict_uuid['first_seen'][6:8]}} + {% endif %} + + {% if dict_uuid['last_seen'] %} + {{dict_uuid['last_seen'][0:4]}}/{{dict_uuid['last_seen'][4:6]}}/{{dict_uuid['last_seen'][6:8]}} + {% endif %} + + {% for mail in dict_uuid['mails'] %} + {{ mail }}
+ {% endfor %} +
+
+
+ +
+
+
+ + + + + diff --git a/var/www/templates/nav_bar.html b/var/www/templates/nav_bar.html index 7a813b42..6b4c572e 100644 --- a/var/www/templates/nav_bar.html +++ b/var/www/templates/nav_bar.html @@ -19,7 +19,7 @@ Browse Items