diff --git a/OVERVIEW.md b/OVERVIEW.md index f4ee12ec..be19bbfd 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -109,8 +109,81 @@ Redis and ARDB overview | **uuid**:ltags | **tag** | | **uuid**:ltagsgalaxies | **tag** | +## DB2 - Leak Hunter: + +##### Tracker metadata: +| Hset - Key | Field | Value | +| ------ | ------ | ------ | +| tracker:**uuid** | tracked | **tacked word/set/regex** | +| | type | **word/set/regex** | +| | date | **date added** | +| | user_id | **created by user_id** | +| | dashboard | **0/1 Display alert on dashboard** | +| | level | **0/1 Tracker visibility** | + +##### Tracker by user_id (visibility level: user only): +| Set - Key | Value | +| ------ | ------ | +| user:tracker:**user_id** | **uuid - tracker uuid** | +| user:tracker:**user_id**:**word/set/regex - tracker type** | **uuid - tracker uuid** | + +##### Global Tracker (visibility level: all users): +| Set - Key | Value | +| ------ | ------ | +| gobal:tracker | **uuid - tracker uuid** | +| gobal:tracker:**word/set/regex - tracker type** | **uuid - tracker uuid** | + +##### All Tracker by type: +| Set - Key | Value | +| ------ | ------ | +| all:tracker:**word/set/regex - tracker type** | **tracked item** | + +| Set - Key | Value | +| ------ | ------ | +| all:tracker_uuid:**tracker type**:**tracked item** | **uuid - tracker uuid** | + +##### All Tracked items: +| Set - Key | Value | +| ------ | ------ | +| tracker:item:**uuid**:**date** | **item_id** | + +##### All Tracked tags: +| Set - Key | Value | +| ------ | ------ | +| tracker:tags:**uuid** | **tag** | + +##### All Tracked mail: +| Set - Key | Value | +| ------ | ------ | +| tracker:mail:**uuid** | **mail** | + +##### Refresh Tracker: +| Key | Value | +| ------ | ------ | +| tracker:refresh:word | **last refreshed epoch** | +| tracker:refresh:set | - | +| tracker:refresh:regex | - | + +##### Zset Stat Tracker: +| Key | Field | Value | +| ------ | ------ | ------ | +| tracker:stat:**uuid** | **date** | **nb_seen** | + +##### Stat token: +| Key | Field | Value | +| ------ | ------ | ------ | +| stat_token_total_by_day:**date** | **word** | **nb_seen** | +| | | | +| stat_token_per_item_by_day:**date** | **word** | **nb_seen** | + +| Set - Key | Value | +| ------ | ------ | +| stat_token_history | **date** | + ## DB2 - TermFreq: +##### Set: + ##### Set: | Key | Value | | ------ | ------ | @@ -118,6 +191,7 @@ Redis and ARDB overview | TrackedSetSet | **tracked_set** | | TrackedRegexSet | **tracked_regex** | | | | +| | | | tracked_**tracked_term** | **item_path** | | set_**tracked_set** | **item_path** | | regex_**tracked_regex** | **item_path** | diff --git a/bin/Curve.py b/bin/Curve.py deleted file mode 100755 index c7083c54..00000000 --- a/bin/Curve.py +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module. - -This modules update a .csv file used to draw curves representing selected -words and their occurency per day. - -..note:: The channel will have the name of the file created. - -..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put -the same Subscriber name in both of them. - - -This Module is also used for term frequency. - -/!\ Top set management is done in the module Curve_manage_top_set - - -Requirements ------------- - -*Need running Redis instances. (Redis) -*Categories files of words in /files/ need to be created -*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. - -""" -import redis -import time -from pubsublogger import publisher -from packages import lib_words -import os -import datetime -import calendar - -from Helper import Process - -# Email notifications -from NotificationHelper import * - -# Config Variables -BlackListTermsSet_Name = "BlackListSetTermSet" -TrackedTermsSet_Name = "TrackedSetTermSet" -top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set -oneDay = 60*60*24 -top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] -top_termFreq_setName_week = ["TopTermFreq_set_week", 7] -top_termFreq_setName_month = ["TopTermFreq_set_month", 31] -top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] - -TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_" - -# create direct link in mail -full_paste_url = "/showsavedpaste/?paste=" - -def check_if_tracked_term(term, path): - if term in server_term.smembers(TrackedTermsSet_Name): - #add_paste to tracked_word_set - set_name = "tracked_" + term - server_term.sadd(set_name, path) - print(term, 'addded', set_name, '->', path) - p.populate_set_out("New Term added", 'CurveManageTopSets') - - # Send a notification only when the member is in the set - if term in server_term.smembers(TrackedTermsNotificationEnabled_Name): - - # create mail body - mail_body = ("AIL Framework,\n" - "New occurrence for term: " + term + "\n" - ''+full_paste_url + path) - - # Send to every associated email adress - for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + term): - sendEmailNotification(email, 'Term', mail_body) - - # tag paste - for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + term): - msg = '{};{}'.format(tag, path) - p.populate_set_out(msg, 'Tags') - - -def getValueOverRange(word, startDate, num_day): - to_return = 0 - for timestamp in range(startDate, startDate - num_day*oneDay, -oneDay): - value = server_term.hget(timestamp, word) - to_return += int(value) if value is not None else 0 - return to_return - - - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Curve' - p = Process(config_section) - - # REDIS # - r_serv1 = redis.StrictRedis( - host=p.config.get("ARDB_Curve", "host"), - port=p.config.get("ARDB_Curve", "port"), - db=p.config.get("ARDB_Curve", "db"), - decode_responses=True) - - server_term = redis.StrictRedis( - host=p.config.get("ARDB_TermFreq", "host"), - port=p.config.get("ARDB_TermFreq", "port"), - db=p.config.get("ARDB_TermFreq", "db"), - decode_responses=True) - - # FUNCTIONS # - publisher.info("Script Curve started") - - # create direct link in mail - full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url - - # FILE CURVE SECTION # - csv_path = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "wordtrending_csv")) - wordfile_path = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "wordsfile")) - - message = p.get_from_set() - prec_filename = None - generate_new_graph = False - - # Term Frequency - top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] - top_termFreq_setName_week = ["TopTermFreq_set_week", 7] - top_termFreq_setName_month = ["TopTermFreq_set_month", 31] - - while True: - - if message is not None: - generate_new_graph = True - - filename, word, score = message.split() - temp = filename.split('/') - date = temp[-4] + temp[-3] + temp[-2] - timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) - curr_set = top_termFreq_setName_day[0] + str(timestamp) - - - low_word = word.lower() - #Old curve with words in file - r_serv1.hincrby(low_word, date, int(score)) - - # Update redis - #consider the num of occurence of this term - curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score))) - #1 term per paste - curr_word_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), low_word, int(1))) - - # Add in set only if term is not in the blacklist - if low_word not in server_term.smembers(BlackListTermsSet_Name): - #consider the num of occurence of this term - server_term.zincrby(curr_set, low_word, float(score)) - #1 term per paste - server_term.zincrby("per_paste_" + curr_set, low_word, float(1)) - - #Add more info for tracked terms - check_if_tracked_term(low_word, filename) - - #send to RegexForTermsFrequency - to_send = "{} {} {}".format(filename, timestamp, word) - p.populate_set_out(to_send, 'RegexForTermsFrequency') - - else: - - if generate_new_graph: - generate_new_graph = False - print('Building graph') - today = datetime.date.today() - year = today.year - month = today.month - - lib_words.create_curve_with_word_file(r_serv1, csv_path, - wordfile_path, year, - month) - - publisher.debug("Script Curve is Idling") - print("sleeping") - time.sleep(10) - message = p.get_from_set() diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py deleted file mode 100755 index 4eaf9c3f..00000000 --- a/bin/CurveManageTopSets.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" - -This module manage top sets for terms frequency. -Every 'refresh_rate' update the weekly and monthly set - -""" - -import redis -import time -import datetime -import copy -from pubsublogger import publisher -from packages import lib_words -import datetime -import calendar -import os -import configparser - -# Config Variables -Refresh_rate = 60*5 #sec -BlackListTermsSet_Name = "BlackListSetTermSet" -TrackedTermsSet_Name = "TrackedSetTermSet" -top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set -oneDay = 60*60*24 -num_day_month = 31 -num_day_week = 7 - -top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] -top_termFreq_setName_week = ["TopTermFreq_set_week", 7] -top_termFreq_setName_month = ["TopTermFreq_set_month", 31] -top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] - - -def manage_top_set(): - startDate = datetime.datetime.now() - startDate = startDate.replace(hour=0, minute=0, second=0, microsecond=0) - startDate = calendar.timegm(startDate.timetuple()) - blacklist_size = int(server_term.scard(BlackListTermsSet_Name)) - - dico = {} - dico_per_paste = {} - - # Retreive top data (max_card + blacklist_size) from days sets - for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay): - curr_set = top_termFreq_setName_day[0] + str(timestamp) - array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size) - array_top_day_per_paste = server_term.zrevrangebyscore("per_paste_" + curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size) - - for word, value in array_top_day: - if word not in server_term.smembers(BlackListTermsSet_Name): - if word in dico.keys(): - dico[word] += value - else: - dico[word] = value - - for word, value in array_top_day_per_paste: - if word not in server_term.smembers(BlackListTermsSet_Name): - if word in dico_per_paste.keys(): - dico_per_paste[word] += value - else: - dico_per_paste[word] = value - - if timestamp == startDate - num_day_week*oneDay: - dico_week = copy.deepcopy(dico) - dico_week_per_paste = copy.deepcopy(dico_per_paste) - - # convert dico into sorted array - array_month = [] - for w, v in dico.items(): - array_month.append((w, v)) - array_month.sort(key=lambda tup: -tup[1]) - array_month = array_month[0:20] - - array_week = [] - for w, v in dico_week.items(): - array_week.append((w, v)) - array_week.sort(key=lambda tup: -tup[1]) - array_week = array_week[0:20] - - # convert dico_per_paste into sorted array - array_month_per_paste = [] - for w, v in dico_per_paste.items(): - array_month_per_paste.append((w, v)) - array_month_per_paste.sort(key=lambda tup: -tup[1]) - array_month_per_paste = array_month_per_paste[0:20] - - array_week_per_paste = [] - for w, v in dico_week_per_paste.items(): - array_week_per_paste.append((w, v)) - array_week_per_paste.sort(key=lambda tup: -tup[1]) - array_week_per_paste = array_week_per_paste[0:20] - - - # suppress every terms in top sets - for curr_set, curr_num_day in top_termFreq_set_array[1:3]: - for w in server_term.zrange(curr_set, 0, -1): - server_term.zrem(curr_set, w) - for w in server_term.zrange("per_paste_" + curr_set, 0, -1): - server_term.zrem("per_paste_" + curr_set, w) - - # Add top term from sorted array in their respective sorted sets - for elem in array_week: - server_term.zadd(top_termFreq_setName_week[0], float(elem[1]), elem[0]) - for elem in array_week_per_paste: - server_term.zadd("per_paste_" + top_termFreq_setName_week[0], float(elem[1]), elem[0]) - - for elem in array_month: - server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0]) - for elem in array_month_per_paste: - server_term.zadd("per_paste_" + top_termFreq_setName_month[0], float(elem[1]), elem[0]) - - timestamp = int(time.mktime(datetime.datetime.now().timetuple())) - value = str(timestamp) + ", " + "-" - r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) - print("refreshed module") - - - -if __name__ == '__main__': - # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) - # Port of the redis instance used by pubsublogger - publisher.port = 6380 - # Script is the default channel used for the modules. - publisher.channel = 'Script' - - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) - - - # For Module Manager - r_temp = redis.StrictRedis( - host=cfg.get('RedisPubSub', 'host'), - port=cfg.getint('RedisPubSub', 'port'), - db=cfg.getint('RedisPubSub', 'db'), - decode_responses=True) - - timestamp = int(time.mktime(datetime.datetime.now().timetuple())) - value = str(timestamp) + ", " + "-" - r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) - r_temp.sadd("MODULE_TYPE_"+ "CurveManageTopSets" , str(os.getpid())) - - server_term = redis.StrictRedis( - host=cfg.get("ARDB_TermFreq", "host"), - port=cfg.getint("ARDB_TermFreq", "port"), - db=cfg.getint("ARDB_TermFreq", "db"), - decode_responses=True) - - publisher.info("Script Curve_manage_top_set started") - - # Sent to the logging a description of the module - publisher.info("Manage the top sets with the data created by the module curve.") - - manage_top_set() - - while True: - # Get one message from the input queue (module only work if linked with a queue) - time.sleep(Refresh_rate) # sleep a long time then manage the set - manage_top_set() diff --git a/bin/DbCleaner.py b/bin/DbCleaner.py new file mode 100755 index 00000000..e0cf6512 --- /dev/null +++ b/bin/DbCleaner.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +The DbCleaner Module +=================== + +""" +import os +import sys +import time +import datetime + +from pubsublogger import publisher + +import NotificationHelper + +from packages import Date +from packages import Item +from packages import Term + +def clean_term_db_stat_token(): + all_stat_date = Term.get_all_token_stat_history() + + list_date_to_keep = Date.get_date_range(31) + for date in all_stat_date: + if date not in list_date_to_keep: + # remove history + Term.delete_token_statistics_by_date(date) + + print('Term Stats Cleaned') + + +if __name__ == "__main__": + + publisher.port = 6380 + publisher.channel = "Script" + publisher.info("DbCleaner started") + + # low priority + time.sleep(180) + + daily_cleaner = True + current_date = datetime.datetime.now().strftime("%Y%m%d") + + while True: + + if daily_cleaner: + clean_term_db_stat_token() + daily_cleaner = False + else: + sys.exit(0) + time.sleep(600) + + new_date = datetime.datetime.now().strftime("%Y%m%d") + if new_date != current_date: + current_date = new_date + daily_cleaner = True diff --git a/bin/Dir.py b/bin/Dir.py deleted file mode 100755 index d76a7ad5..00000000 --- a/bin/Dir.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import argparse -import redis -from pubsublogger import publisher -from packages.lib_words import create_dirfile -import configparser - - -def main(): - """Main Function""" - - # CONFIG # - cfg = configparser.ConfigParser() - cfg.read('./packages/config.cfg') - - parser = argparse.ArgumentParser( - description='''This script is a part of the Analysis Information Leak - framework. It create a redis list called "listfile" which contain - the absolute filename of all the files from the directory given in - the argument "directory".''', - epilog='Example: ./Dir.py /home/2013/03/') - - parser.add_argument('directory', type=str, - help='The directory to run inside', action='store') - - parser.add_argument('-db', type=int, default=0, - help='The name of the Redis DB (default 0)', - choices=[0, 1, 2, 3, 4], action='store') - - parser.add_argument('-ow', help='trigger the overwritting mode', - action='store_true') - - args = parser.parse_args() - - r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), - port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db"), - decode_responses=True) - - publisher.port = 6380 - publisher.channel = "Script" - - create_dirfile(r_serv, args.directory, args.ow) - -if __name__ == "__main__": - main() diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 98645165..d87ef21e 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -76,12 +76,15 @@ function helptext { Usage: ----- LAUNCH.sh - [-l | --launchAuto] - [-k | --killAll] - [-u | --update] - [-c | --configUpdate] - [-t | --thirdpartyUpdate] - [-h | --help] + [-l | --launchAuto] LAUNCH DB + Scripts + [-k | --killAll] Kill DB + Scripts + [-ks | --killscript] Scripts + [-u | --update] Update AIL + [-c | --crawler] LAUNCH Crawlers + [-f | --launchFeeder] LAUNCH Pystemon feeder + [-t | --thirdpartyUpdate] Update Web + [-m | --menu] Display Advanced Menu + [-h | --help] Help " } @@ -153,14 +156,10 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Duplicates" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Duplicates.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Lines" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Lines.py; read x" - sleep 0.1 screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DomClassifier.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "Categ" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Categ.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Tokenize" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Tokenize.py; read x" - sleep 0.1 screen -S "Script_AIL" -X screen -t "CreditCards" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./CreditCards.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "BankAccount" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./BankAccount.py; read x" @@ -175,13 +174,9 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Credential" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Credential.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Curve" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Curve.py; read x" + screen -S "Script_AIL" -X screen -t "TermTrackerMod" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./TermTrackerMod.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "CurveManageTopSets" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./CurveManageTopSets.py; read x" - sleep 0.1 - screen -S "Script_AIL" -X screen -t "RegexForTermsFrequency" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./RegexForTermsFrequency.py; read x" - sleep 0.1 - screen -S "Script_AIL" -X screen -t "SetForTermsFrequency" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SetForTermsFrequency.py; read x" + screen -S "Script_AIL" -X screen -t "RegexTracker" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./RegexTracker.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "Indexer" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Indexer.py; read x" sleep 0.1 @@ -213,6 +208,8 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x" sleep 0.1 + screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DbCleaner.py; read x" + sleep 0.1 screen -S "Script_AIL" -X screen -t "UpdateBackground" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./update-background.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./submit_paste.py; read x" @@ -404,6 +401,18 @@ function launch_feeder { fi } +function killscript { + if [[ $islogged || $isqueued || $isscripted || $isflasked || $isfeeded || $iscrawler ]]; then + echo -e $GREEN"Killing Script"$DEFAULT + kill $islogged $isqueued $isscripted $isflasked $isfeeded $iscrawler + sleep 0.2 + echo -e $ROSE`screen -ls`$DEFAULT + echo -e $GREEN"\t* $islogged $isqueued $isscripted $isflasked $isfeeded $iscrawler killed."$DEFAULT + else + echo -e $RED"\t* No script to kill"$DEFAULT + fi +} + function killall { if [[ $isredis || $isardb || $islogged || $isqueued || $isscripted || $isflasked || $isfeeded || $iscrawler ]]; then if [[ $isredis ]]; then @@ -463,76 +472,82 @@ function launch_all { launch_flask; } -#If no params, display the menu +function menu_display { + + options=("Redis" "Ardb" "Logs" "Queues" "Scripts" "Flask" "Killall" "Shutdown" "Update" "Update-config" "Update-thirdparty") + + menu() { + echo "What do you want to Launch?:" + for i in ${!options[@]}; do + printf "%3d%s) %s\n" $((i+1)) "${choices[i]:- }" "${options[i]}" + done + [[ "$msg" ]] && echo "$msg"; : + } + + prompt="Check an option (again to uncheck, ENTER when done): " + + while menu && read -rp "$prompt" numinput && [[ "$numinput" ]]; do + for num in $numinput; do + [[ "$num" != *[![:digit:]]* ]] && (( num > 0 && num <= ${#options[@]} )) || { + msg="Invalid option: $num"; break + } + ((num--)); msg="${options[num]} was ${choices[num]:+un}checked" + [[ "${choices[num]}" ]] && choices[num]="" || choices[num]="+" + done + done + + for i in ${!options[@]}; do + if [[ "${choices[i]}" ]]; then + case ${options[i]} in + Redis) + launch_redis; + ;; + Ardb) + launch_ardb; + ;; + Logs) + launch_logs; + ;; + Queues) + launch_queues; + ;; + Scripts) + launch_scripts; + ;; + Flask) + launch_flask; + ;; + Crawler) + launching_crawler; + ;; + Killall) + killall; + ;; + Shutdown) + shutdown; + ;; + Update) + update; + ;; + Update-config) + checking_configuration; + ;; + Update-thirdparty) + update_thirdparty; + ;; + esac + fi + done + + exit + +} + + +#If no params, display the help [[ $@ ]] || { helptext; - - options=("Redis" "Ardb" "Logs" "Queues" "Scripts" "Flask" "Killall" "Shutdown" "Update" "Update-config" "Update-thirdparty") - - menu() { - echo "What do you want to Launch?:" - for i in ${!options[@]}; do - printf "%3d%s) %s\n" $((i+1)) "${choices[i]:- }" "${options[i]}" - done - [[ "$msg" ]] && echo "$msg"; : - } - - prompt="Check an option (again to uncheck, ENTER when done): " - while menu && read -rp "$prompt" numinput && [[ "$numinput" ]]; do - for num in $numinput; do - [[ "$num" != *[![:digit:]]* ]] && (( num > 0 && num <= ${#options[@]} )) || { - msg="Invalid option: $num"; break - } - ((num--)); msg="${options[num]} was ${choices[num]:+un}checked" - [[ "${choices[num]}" ]] && choices[num]="" || choices[num]="+" - done - done - - for i in ${!options[@]}; do - if [[ "${choices[i]}" ]]; then - case ${options[i]} in - Redis) - launch_redis; - ;; - Ardb) - launch_ardb; - ;; - Logs) - launch_logs; - ;; - Queues) - launch_queues; - ;; - Scripts) - launch_scripts; - ;; - Flask) - launch_flask; - ;; - Crawler) - launching_crawler; - ;; - Killall) - killall; - ;; - Shutdown) - shutdown; - ;; - Update) - update; - ;; - Update-config) - checking_configuration; - ;; - Update-thirdparty) - update_thirdparty; - ;; - esac - fi - done - - exit } #echo "$@" @@ -553,6 +568,10 @@ while [ "$1" != "" ]; do ;; -k | --killAll ) killall; ;; + -ks | --killscript ) killscript; + ;; + -m | --menu ) menu_display; + ;; -u | --update ) update; ;; -t | --thirdpartyUpdate ) update_thirdparty; @@ -565,7 +584,6 @@ while [ "$1" != "" ]; do exit ;; -kh | --khelp ) helptext; - ;; * ) helptext exit 1 diff --git a/bin/Lines.py b/bin/Lines.py deleted file mode 100755 index e4187dc7..00000000 --- a/bin/Lines.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -""" -The ZMQ_PubSub_Lines Module -============================ - -This module is consuming the Redis-list created by the ZMQ_PubSub_Line_Q -Module. - -It perform a sorting on the line's length and publish/forward them to -differents channels: - -*Channel 1 if max length(line) < max -*Channel 2 if max length(line) > max - -The collected informations about the processed pastes -(number of lines and maximum length line) are stored in Redis. - -..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put -the same Subscriber name in both of them. - -Requirements ------------- - -*Need running Redis instances. (LevelDB & Redis) -*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly. - -""" -import argparse -import time -from packages import Paste -from pubsublogger import publisher - -from Helper import Process - -if __name__ == '__main__': - publisher.port = 6380 - publisher.channel = 'Script' - - config_section = 'Lines' - p = Process(config_section) - - # SCRIPT PARSER # - parser = argparse.ArgumentParser( - description='This script is a part of the Analysis Information \ - Leak framework.') - - parser.add_argument( - '-max', type=int, default=500, - help='The limit between "short lines" and "long lines"', - action='store') - - args = parser.parse_args() - - # FUNCTIONS # - tmp_string = "Lines script Subscribed to channel {} and Start to publish \ - on channel Longlines, Shortlines" - publisher.info(tmp_string) - - while True: - try: - message = p.get_from_set() - print(message) - if message is not None: - PST = Paste.Paste(message) - else: - publisher.debug("Tokeniser is idling 10s") - time.sleep(10) - continue - - # FIXME do it in the paste class - lines_infos = PST.get_lines_info() - PST.save_attribute_redis("p_nb_lines", lines_infos[0]) - PST.save_attribute_redis("p_max_length_line", lines_infos[1]) - - # FIXME Not used. - PST.store.sadd("Pastes_Objects", PST.p_rel_path) - print(PST.p_rel_path) - if lines_infos[1] < args.max: - p.populate_set_out( PST.p_rel_path , 'LinesShort') - else: - p.populate_set_out( PST.p_rel_path , 'LinesLong') - except IOError: - print("CRC Checksum Error on : ", PST.p_rel_path) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 6743cdca..cfdb82f7 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -9,7 +9,6 @@ import time import datetime import redis import os -from packages import lib_words from packages.Date import Date from pubsublogger import publisher from Helper import Process diff --git a/bin/NotificationHelper.py b/bin/NotificationHelper.py index 1bccd314..4007e56f 100755 --- a/bin/NotificationHelper.py +++ b/bin/NotificationHelper.py @@ -20,13 +20,6 @@ configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') publisher.port = 6380 publisher.channel = "Script" -# notifications enabled/disabled -TrackedTermsNotificationEnabled_Name = "TrackedNotifications" - -# associated notification email addresses for a specific term` -# Keys will be e.g. TrackedNotificationEmails -TrackedTermsNotificationEmailsPrefix_Name = "TrackedNotificationEmails_" - def sendEmailNotification(recipient, alert_name, content): if not os.path.exists(configfile): diff --git a/bin/RegexForTermsFrequency.py b/bin/RegexForTermsFrequency.py deleted file mode 100755 index cd8102c1..00000000 --- a/bin/RegexForTermsFrequency.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -This Module is used for term frequency. -It processes every paste coming from the global module and test the regexs -supplied in the term webpage. - -""" -import redis -import time -from pubsublogger import publisher -from packages import Paste -import calendar -import re -import signal -import time -from Helper import Process -# Email notifications -from NotificationHelper import * - - -class TimeoutException(Exception): - pass - - -def timeout_handler(signum, frame): - raise TimeoutException - -signal.signal(signal.SIGALRM, timeout_handler) - -# Config Variables -DICO_REFRESH_TIME = 60 # s - -BlackListTermsSet_Name = "BlackListSetTermSet" -TrackedTermsSet_Name = "TrackedSetTermSet" -TrackedRegexSet_Name = "TrackedRegexSet" - -top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set -oneDay = 60*60*24 -top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] -top_termFreq_setName_week = ["TopTermFreq_set_week", 7] -top_termFreq_setName_month = ["TopTermFreq_set_month", 31] -top_termFreq_set_array = [top_termFreq_setName_day, top_termFreq_setName_week, top_termFreq_setName_month] - -TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_" - -# create direct link in mail -full_paste_url = "/showsavedpaste/?paste=" - - -def refresh_dicos(): - dico_regex = {} - dico_regexname_to_redis = {} - for regex_str in server_term.smembers(TrackedRegexSet_Name): - dico_regex[regex_str[1:-1]] = re.compile(regex_str[1:-1]) - dico_regexname_to_redis[regex_str[1:-1]] = regex_str - - return dico_regex, dico_regexname_to_redis - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'RegexForTermsFrequency' - p = Process(config_section) - max_execution_time = p.config.getint(config_section, "max_execution_time") - - # REDIS # - server_term = redis.StrictRedis( - host=p.config.get("ARDB_TermFreq", "host"), - port=p.config.get("ARDB_TermFreq", "port"), - db=p.config.get("ARDB_TermFreq", "db"), - decode_responses=True) - - # FUNCTIONS # - publisher.info("RegexForTermsFrequency script started") - - # create direct link in mail - full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url - - # compile the regex - dico_refresh_cooldown = time.time() - dico_regex, dico_regexname_to_redis = refresh_dicos() - - message = p.get_from_set() - - # Regex Frequency - while True: - - if message is not None: - if time.time() - dico_refresh_cooldown > DICO_REFRESH_TIME: - dico_refresh_cooldown = time.time() - dico_regex, dico_regexname_to_redis = refresh_dicos() - print('dico got refreshed') - - filename = message - temp = filename.split('/') - timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) - - curr_set = top_termFreq_setName_day[0] + str(timestamp) - paste = Paste.Paste(filename) - content = paste.get_p_content() - - # iterate the word with the regex - for regex_str, compiled_regex in dico_regex.items(): - - signal.alarm(max_execution_time) - try: - matched = compiled_regex.search(content) - except TimeoutException: - print ("{0} processing timeout".format(paste.p_rel_path)) - continue - else: - signal.alarm(0) - - if matched is not None: # there is a match - print('regex matched {}'.format(regex_str)) - matched = matched.group(0) - regex_str_complete = "/" + regex_str + "/" - # Add in Regex track set only if term is not in the blacklist - if regex_str_complete not in server_term.smembers(BlackListTermsSet_Name): - # Send a notification only when the member is in the set - if regex_str_complete in server_term.smembers(TrackedTermsNotificationEnabled_Name): - - # create mail body - mail_body = ("AIL Framework,\n" - "New occurrence for regex: " + regex_str + "\n" - ''+full_paste_url + filename) - - # Send to every associated email adress - for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + regex_str_complete): - sendEmailNotification(email, 'Term', mail_body) - - # tag paste - for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + regex_str_complete): - msg = '{};{}'.format(tag, filename) - p.populate_set_out(msg, 'Tags') - - set_name = 'regex_' + dico_regexname_to_redis[regex_str] - new_to_the_set = server_term.sadd(set_name, filename) - new_to_the_set = True if new_to_the_set == 1 else False - - # consider the num of occurence of this term - regex_value = int(server_term.hincrby(timestamp, dico_regexname_to_redis[regex_str], int(1))) - # 1 term per paste - if new_to_the_set: - regex_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_regexname_to_redis[regex_str], int(1))) - server_term.zincrby("per_paste_" + curr_set, dico_regexname_to_redis[regex_str], float(1)) - server_term.zincrby(curr_set, dico_regexname_to_redis[regex_str], float(1)) - else: - pass - - else: - publisher.debug("Script RegexForTermsFrequency is Idling") - print("sleeping") - time.sleep(5) - message = p.get_from_set() diff --git a/bin/RegexTracker.py b/bin/RegexTracker.py new file mode 100755 index 00000000..260db3c9 --- /dev/null +++ b/bin/RegexTracker.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +This Module is used for regex tracking. +It processes every paste coming from the global module and test the regexs +supplied in the term webpage. + +""" +import os +import re +import sys +import time +import signal + +from Helper import Process +from pubsublogger import publisher + +import NotificationHelper + +from packages import Item +from packages import Term + +full_item_url = "/showsavedpaste/?paste=" +mail_body_template = "AIL Framework,\nNew occurrence for term tracked regex: {}\nitem id: {}\nurl: {}{}" + +dict_regex_tracked = Term.get_regex_tracked_words_dict() +last_refresh = time.time() + +class TimeoutException(Exception): + pass +def timeout_handler(signum, frame): + raise TimeoutException +signal.signal(signal.SIGALRM, timeout_handler) + +def new_term_found(term, term_type, item_id, item_date): + uuid_list = Term.get_term_uuid_list(term, 'regex') + print('new tracked term found: {} in {}'.format(term, item_id)) + + for term_uuid in uuid_list: + Term.add_tracked_item(term_uuid, item_id, item_date) + + tags_to_add = Term.get_term_tags(term_uuid) + for tag in tags_to_add: + msg = '{};{}'.format(tag, item_id) + p.populate_set_out(msg, 'Tags') + + mail_to_notify = Term.get_term_mails(term_uuid) + if mail_to_notify: + mail_body = mail_body_template.format(term, item_id, full_item_url, item_id) + for mail in mail_to_notify: + NotificationHelper.sendEmailNotification(mail, 'Term Tracker', mail_body) + +if __name__ == "__main__": + publisher.port = 6380 + publisher.channel = "Script" + publisher.info("Script RegexTracker started") + + config_section = 'RegexTracker' + p = Process(config_section) + max_execution_time = p.config.getint(config_section, "max_execution_time") + + ull_item_url = p.config.get("Notifications", "ail_domain") + full_item_url + + # Regex Frequency + while True: + + item_id = p.get_from_set() + + if item_id is not None: + + item_date = Item.get_item_date(item_id) + item_content = Item.get_item_content(item_id) + + for regex in dict_regex_tracked: + + signal.alarm(max_execution_time) + try: + matched = dict_regex_tracked[regex].search(item_content) + except TimeoutException: + print ("{0} processing timeout".format(paste.p_rel_path)) + continue + else: + signal.alarm(0) + + if matched: + new_term_found(regex, 'regex', item_id, item_date) + + + else: + time.sleep(5) + + # refresh Tracked term + if last_refresh < Term.get_tracked_term_last_updated_by_type('regex'): + dict_regex_tracked = Term.get_regex_tracked_words_dict() + last_refresh = time.time() + print('Tracked set refreshed') diff --git a/bin/SetForTermsFrequency.py b/bin/SetForTermsFrequency.py deleted file mode 100755 index 19ed7210..00000000 --- a/bin/SetForTermsFrequency.py +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -This Module is used for term frequency. -It processes every paste coming from the global module and test the sets -supplied in the term webpage. - -""" -import redis -import time -from pubsublogger import publisher -from packages import lib_words -from packages import Paste -import os -import datetime -import calendar -import re -import ast -from Helper import Process - -# Email notifications -from NotificationHelper import * - -# Config Variables -BlackListTermsSet_Name = "BlackListSetTermSet" -TrackedTermsSet_Name = "TrackedSetTermSet" -TrackedRegexSet_Name = "TrackedRegexSet" -TrackedSetSet_Name = "TrackedSetSet" - -top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set -oneDay = 60*60*24 -top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] -top_termFreq_setName_week = ["TopTermFreq_set_week", 7] -top_termFreq_setName_month = ["TopTermFreq_set_month", 31] -top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] - -TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_" - -# create direct link in mail -full_paste_url = "/showsavedpaste/?paste=" - -def add_quote_inside_tab(tab): - quoted_tab = "[" - for elem in tab[1:-1].split(','): - elem = elem.lstrip().strip() - quoted_tab += "\'{}\', ".format(elem) - quoted_tab = quoted_tab[:-2] #remove trailing , - quoted_tab += "]" - return str(quoted_tab) - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'SetForTermsFrequency' - p = Process(config_section) - - # REDIS # - server_term = redis.StrictRedis( - host=p.config.get("ARDB_TermFreq", "host"), - port=p.config.get("ARDB_TermFreq", "port"), - db=p.config.get("ARDB_TermFreq", "db"), - decode_responses=True) - - # FUNCTIONS # - publisher.info("RegexForTermsFrequency script started") - - # create direct link in mail - full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url - - #get the dico and matching percent - dico_percent = {} - dico_set_tab = {} - dico_setname_to_redis = {} - for set_str in server_term.smembers(TrackedSetSet_Name): - tab_set = set_str[1:-1] - tab_set = add_quote_inside_tab(tab_set) - perc_finder = re.compile("\[[0-9]{1,3}\]").search(tab_set) - if perc_finder is not None: - match_percent = perc_finder.group(0)[1:-1] - dico_percent[tab_set] = float(match_percent) - dico_set_tab[tab_set] = ast.literal_eval(tab_set) - dico_setname_to_redis[tab_set] = set_str - else: - continue - - message = p.get_from_set() - - while True: - - if message is not None: - filename = message - temp = filename.split('/') - timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) - content = Paste.Paste(filename).get_p_content() - - curr_set = top_termFreq_setName_day[0] + str(timestamp) - - #iterate over the words of the file - match_dico = {} - for word in content.split(): - for cur_set, array_set in dico_set_tab.items(): - for w_set in array_set[:-1]: #avoid the percent matching - if word == w_set: - try: - match_dico[str(array_set)] += 1 - except KeyError: - match_dico[str(array_set)] = 1 - - #compute matching % - for the_set, matchingNum in match_dico.items(): - eff_percent = float(matchingNum) / float((len(ast.literal_eval(the_set))-1)) * 100 #-1 bc if the percent matching - if eff_percent >= dico_percent[the_set]: - # Send a notification only when the member is in the set - if dico_setname_to_redis[str(the_set)] in server_term.smembers(TrackedTermsNotificationEnabled_Name): - - # create mail body - mail_body = ("AIL Framework,\n" - "New occurrence for term: " + dico_setname_to_redis[str(the_set)] + "\n" - ''+full_paste_url + filename) - - # Send to every associated email adress - for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + dico_setname_to_redis[str(the_set)]): - sendEmailNotification(email, 'Term', mail_body) - - # tag paste - for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + dico_setname_to_redis[str(the_set)]): - msg = '{};{}'.format(tag, filename) - p.populate_set_out(msg, 'Tags') - - print(the_set, "matched in", filename) - set_name = 'set_' + dico_setname_to_redis[the_set] - new_to_the_set = server_term.sadd(set_name, filename) - new_to_the_set = True if new_to_the_set == 1 else False - - #consider the num of occurence of this set - set_value = int(server_term.hincrby(timestamp, dico_setname_to_redis[the_set], int(1))) - - # FIXME - avoid using per paste as a set is checked over the entire paste - #1 term per paste - if new_to_the_set: - set_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_setname_to_redis[the_set], int(1))) - server_term.zincrby("per_paste_" + curr_set, dico_setname_to_redis[the_set], float(1)) - server_term.zincrby(curr_set, dico_setname_to_redis[the_set], float(1)) - - - else: - publisher.debug("Script RegexForTermsFrequency is Idling") - print("sleeping") - time.sleep(5) - message = p.get_from_set() diff --git a/bin/TermTrackerMod.py b/bin/TermTrackerMod.py new file mode 100755 index 00000000..fca0439f --- /dev/null +++ b/bin/TermTrackerMod.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +The TermTracker Module +=================== + +""" +import os +import sys +import time +import signal + +from Helper import Process +from pubsublogger import publisher + +import NotificationHelper + +from packages import Item +from packages import Term + +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) +import Flask_config + +full_item_url = "/showsavedpaste/?paste=" + +mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}" + +# loads tracked words +list_tracked_words = Term.get_tracked_words_list() +last_refresh_word = time.time() +set_tracked_words_list = Term.get_set_tracked_words_list() +last_refresh_set = time.time() + +class TimeoutException(Exception): + pass +def timeout_handler(signum, frame): + raise TimeoutException +signal.signal(signal.SIGALRM, timeout_handler) + +def new_term_found(term, term_type, item_id, item_date): + uuid_list = Term.get_term_uuid_list(term, term_type) + print('new tracked term found: {} in {}'.format(term, item_id)) + + for term_uuid in uuid_list: + Term.add_tracked_item(term_uuid, item_id, item_date) + + tags_to_add = Term.get_term_tags(term_uuid) + for tag in tags_to_add: + msg = '{};{}'.format(tag, item_id) + p.populate_set_out(msg, 'Tags') + + mail_to_notify = Term.get_term_mails(term_uuid) + if mail_to_notify: + mail_body = mail_body_template.format(term, item_id, full_item_url, item_id) + for mail in mail_to_notify: + NotificationHelper.sendEmailNotification(mail, 'Term Tracker', mail_body) + + +if __name__ == "__main__": + + publisher.port = 6380 + publisher.channel = "Script" + publisher.info("Script TermTrackerMod started") + + config_section = 'TermTrackerMod' + p = Process(config_section) + max_execution_time = p.config.getint(config_section, "max_execution_time") + + full_item_url = p.config.get("Notifications", "ail_domain") + full_item_url + + while True: + + item_id = p.get_from_set() + + if item_id is not None: + + item_date = Item.get_item_date(item_id) + item_content = Item.get_item_content(item_id) + + signal.alarm(max_execution_time) + try: + dict_words_freq = Term.get_text_word_frequency(item_content) + except TimeoutException: + print ("{0} processing timeout".format(paste.p_rel_path)) + continue + else: + signal.alarm(0) + + # create token statistics + for word in dict_words_freq: + Term.create_token_statistics(item_date, word, dict_words_freq[word]) + + # check solo words + for word in list_tracked_words: + if word in dict_words_freq: + new_term_found(word, 'word', item_id, item_date) + + # check words set + for elem in set_tracked_words_list: + list_words = elem[0] + nb_words_threshold = elem[1] + word_set = elem[2] + nb_uniq_word = 0 + + for word in list_words: + if word in dict_words_freq: + nb_uniq_word += 1 + if nb_uniq_word >= nb_words_threshold: + new_term_found(word_set, 'set', item_id, item_date) + + else: + time.sleep(5) + + + # refresh Tracked term + if last_refresh_word < Term.get_tracked_term_last_updated_by_type('word'): + list_tracked_words = Term.get_tracked_words_list() + last_refresh_word = time.time() + print('Tracked word refreshed') + + if last_refresh_set < Term.get_tracked_term_last_updated_by_type('set'): + set_tracked_words_list = Term.get_set_tracked_words_list() + last_refresh_set = time.time() + print('Tracked set refreshed') diff --git a/bin/Tokenize.py b/bin/Tokenize.py deleted file mode 100755 index 4e13b9ff..00000000 --- a/bin/Tokenize.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -The Tokenize Module -=================== - -This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q -Module. - -It tokenize the content of the paste and publish the result in the following -format: - channel_name+' '+/path/of/the/paste.gz+' '+tokenized_word+' '+scoring - - ..seealso:: Paste method (_get_top_words) - -..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put -the same Subscriber name in both of them. - -Requirements ------------- - -*Need running Redis instances. (Redis) -*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. - -""" -import time -from packages import Paste -from pubsublogger import publisher - -from Helper import Process -import signal - -class TimeoutException(Exception): - pass - -def timeout_handler(signum, frame): - raise TimeoutException - -signal.signal(signal.SIGALRM, timeout_handler) - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Tokenize' - p = Process(config_section) - - # LOGGING # - publisher.info("Tokeniser started") - - while True: - message = p.get_from_set() - print(message) - if message is not None: - paste = Paste.Paste(message) - signal.alarm(5) - try: - for word, score in paste._get_top_words().items(): - if len(word) >= 4: - msg = '{} {} {}'.format(paste.p_rel_path, word, score) - p.populate_set_out(msg) - except TimeoutException: - p.incr_module_timeout_statistic() - print ("{0} processing timeout".format(paste.p_rel_path)) - continue - else: - signal.alarm(0) - else: - publisher.debug("Tokeniser is idling 10s") - time.sleep(10) - print("Sleeping") diff --git a/bin/packages/Date.py b/bin/packages/Date.py index 85edb0be..c9997bab 100644 --- a/bin/packages/Date.py +++ b/bin/packages/Date.py @@ -1,5 +1,7 @@ #!/usr/bin/python3 +import datetime + class Date(object): """docstring for Date""" def __init__(self, *args): @@ -34,7 +36,6 @@ class Date(object): self.day = day def substract_day(self, numDay): - import datetime computed_date = datetime.date(int(self.year), int(self.month), int(self.day)) - datetime.timedelta(numDay) comp_year = str(computed_date.year) comp_month = str(computed_date.month).zfill(2) @@ -50,3 +51,22 @@ def date_substract_day(date, num_day=1): new_date = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8])) - datetime.timedelta(num_day) new_date = str(new_date).replace('-', '') return new_date + +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return list(reversed(date_list)) + +def substract_date(date_from, date_to): + date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) + date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8])) + delta = date_to - date_from # timedelta + l_date = [] + for i in range(delta.days + 1): + date = date_from + datetime.timedelta(i) + l_date.append( date.strftime('%Y%m%d') ) + return l_date diff --git a/bin/packages/Item.py b/bin/packages/Item.py index 92c59a2f..264ef300 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -2,10 +2,13 @@ # -*-coding:UTF-8 -* import os +import sys import gzip import redis +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) import Flask_config +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Date import Tag @@ -19,6 +22,9 @@ def exist_item(item_id): else: return False +def get_item_id(full_path): + return full_path.replace(PASTES_FOLDER, '', 1) + def get_item_date(item_id): l_directory = item_id.split('/') return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2]) @@ -131,3 +137,13 @@ def get_item_pgp_name(item_id): def get_item_pgp_mail(item_id): return _get_item_correlation('pgpdump', 'mail', item_id) + + +### +### GET Internal Module DESC +### +def get_item_list_desc(list_item_id): + desc_list = [] + for item_id in list_item_id: + desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} ) + return desc_list diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index dd1e858c..70d7e72e 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -82,7 +82,7 @@ def get_item_tags(item_id): if tags: return list(tags) else: - return '[]' + return [] # TEMPLATE + API QUERY def add_items_tag(tags=[], galaxy_tags=[], item_id=None): diff --git a/bin/packages/Term.py b/bin/packages/Term.py new file mode 100755 index 00000000..b4fcd832 --- /dev/null +++ b/bin/packages/Term.py @@ -0,0 +1,483 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import uuid +import redis +import datetime + +from collections import defaultdict + +from nltk.tokenize import RegexpTokenizer +from textblob import TextBlob + +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) +import Flask_config + +import Date +import Item + +r_serv_term = Flask_config.r_serv_term +email_regex = Flask_config.email_regex + +special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\') +special_characters.add('\\s') + +# NLTK tokenizer +tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', + gaps=True, discard_empty=True) + +def is_valid_uuid_v4(UUID): + UUID = UUID.replace('-', '') + try: + uuid_test = uuid.UUID(hex=UUID, version=4) + return uuid_test.hex == UUID + except: + return False + +# # TODO: use new package => duplicate fct +def is_in_role(user_id, role): + if r_serv_db.sismember('user_role:{}'.format(role), user_id): + return True + else: + return False + +def check_term_uuid_valid_access(term_uuid, user_id): + if not is_valid_uuid_v4(term_uuid): + return ({"status": "error", "reason": "Invalid uuid"}, 400) + level = r_serv_term.hget('tracker:{}'.format(term_uuid), 'level') + if not level: + return ({"status": "error", "reason": "Unknown uuid"}, 404) + if level == 0: + if r_serv_term.hget('tracker:{}'.format(term_uuid), 'user_id') != user_id: + if not is_in_role(user_id, 'admin'): + return ({"status": "error", "reason": "Unknown uuid"}, 404) + return None + + +def is_valid_mail(email): + result = email_regex.match(email) + if result: + return True + else: + return False + +def verify_mail_list(mail_list): + for mail in mail_list: + if not is_valid_mail(mail): + return ({'status': 'error', 'reason': 'Invalid email', 'value': mail}, 400) + return None + +def is_valid_regex(term_regex): + try: + re.compile(term_regex) + return True + except: + return False + +def get_text_word_frequency(item_content, filtering=True): + item_content = item_content.lower() + words_dict = defaultdict(int) + + if filtering: + blob = TextBlob(item_content , tokenizer=tokenizer) + else: + blob = TextBlob(item_content) + for word in blob.tokens: + words_dict[word] += 1 + return words_dict + +# # TODO: create all tracked words +def get_tracked_words_list(): + return list(r_serv_term.smembers('all:tracker:word')) + +def get_set_tracked_words_list(): + set_list = r_serv_term.smembers('all:tracker:set') + all_set_list = [] + for elem in set_list: + res = elem.split(';') + num_words = int(res[1]) + ter_set = res[0].split(',') + all_set_list.append((ter_set, num_words, elem)) + return all_set_list + +def get_regex_tracked_words_dict(): + regex_list = r_serv_term.smembers('all:tracker:regex') + dict_tracked_regex = {} + for regex in regex_list: + dict_tracked_regex[regex] = re.compile(regex) + return dict_tracked_regex + +def get_tracked_term_list_item(term_uuid, date_from, date_to): + all_item_id = [] + if date_from and date_to: + for date in r_serv_term.zrangebyscore('tracker:stat:{}'.format(term_uuid), int(date_from), int(date_to)): + all_item_id = all_item_id + list(r_serv_term.smembers('tracker:item:{}:{}'.format(term_uuid, date))) + return all_item_id + +def is_term_tracked_in_global_level(term, term_type): + res = r_serv_term.smembers('all:tracker_uuid:{}:{}'.format(term_type, term)) + if res: + for elem_uuid in res: + if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'level')=='1': + return True + return False + +def is_term_tracked_in_user_level(term, term_type, user_id): + res = r_serv_term.smembers('user:tracker:{}'.format(user_id)) + if res: + for elem_uuid in res: + if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'tracked')== term: + if r_serv_term.hget('tracker:{}'.format(elem_uuid), 'type')== term_type: + return True + return False + +def parse_json_term_to_add(dict_input, user_id): + term = dict_input.get('term', None) + if not term: + return ({"status": "error", "reason": "Term not provided"}, 400) + term_type = dict_input.get('type', None) + if not term_type: + return ({"status": "error", "reason": "Term type not provided"}, 400) + nb_words = dict_input.get('nb_words', 1) + + res = parse_tracked_term_to_add(term , term_type, nb_words=nb_words) + if res[1]!=200: + return res + term = res[0]['term'] + term_type = res[0]['type'] + + tags = dict_input.get('tags', []) + mails = dict_input.get('mails', []) + res = verify_mail_list(mails) + if res: + return res + + ## TODO: add dashboard key + level = dict_input.get('level', 1) + try: + level = int(level) + if level not in range(0, 1): + level = 1 + except: + level = 1 + + # check if term already tracked in global + if level==1: + if is_term_tracked_in_global_level(term, term_type): + return ({"status": "error", "reason": "Term already tracked"}, 409) + else: + if is_term_tracked_in_user_level(term, term_type, user_id): + return ({"status": "error", "reason": "Term already tracked"}, 409) + + term_uuid = add_tracked_term(term , term_type, user_id, level, tags, mails) + + return ({'term': term, 'type': term_type, 'uuid': term_uuid}, 200) + + +def parse_tracked_term_to_add(term , term_type, nb_words=1): + if term_type=='regex': + if not is_valid_regex(term): + return ({"status": "error", "reason": "Invalid regex"}, 400) + elif term_type=='word' or term_type=='set': + # force lowercase + term = term.lower() + word_set = set(term) + set_inter = word_set.intersection(special_characters) + if set_inter: + return ({"status": "error", "reason": "special character not allowed", "message": "Please use a regex or remove all special characters"}, 400) + words = term.split() + # not a word + if term_type=='word' and len(words)>1: + term_type = 'set' + + # ouput format: term1,term2,term3;2 + if term_type=='set': + try: + nb_words = int(nb_words) + except: + nb_words = 1 + if nb_words==0: + nb_words = 1 + + words_set = set(words) + words_set = sorted(words_set) + + term = ",".join(words_set) + term = "{};{}".format(term, nb_words) + + if nb_words > len(words_set): + nb_words = len(words_set) + + else: + return ({"status": "error", "reason": "Incorrect type"}, 400) + return ({"status": "success", "term": term, "type": term_type}, 200) + +def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0): + + term_uuid = str(uuid.uuid4()) + + # create metadata + r_serv_term.hset('tracker:{}'.format(term_uuid), 'tracked',term) + r_serv_term.hset('tracker:{}'.format(term_uuid), 'type', term_type) + r_serv_term.hset('tracker:{}'.format(term_uuid), 'date', datetime.date.today().strftime("%Y%m%d")) + r_serv_term.hset('tracker:{}'.format(term_uuid), 'user_id', user_id) + r_serv_term.hset('tracker:{}'.format(term_uuid), 'level', level) + r_serv_term.hset('tracker:{}'.format(term_uuid), 'dashboard', dashboard) + + # create all term set + r_serv_term.sadd('all:tracker:{}'.format(term_type), term) + + # create term - uuid map + r_serv_term.sadd('all:tracker_uuid:{}:{}'.format(term_type, term), term_uuid) + + # add display level set + if level == 0: # user only + r_serv_term.sadd('user:tracker:{}'.format(user_id), term_uuid) + r_serv_term.sadd('user:tracker:{}:{}'.format(user_id, term_type), term_uuid) + elif level == 1: # global + r_serv_term.sadd('global:tracker', term_uuid) + r_serv_term.sadd('global:tracker:{}'.format(term_type), term_uuid) + + # create term tags list + for tag in tags: + r_serv_term.sadd('tracker:tags:{}'.format(term_uuid), tag) + + # create term tags mail notification list + for mail in mails: + r_serv_term.sadd('tracker:mail:{}'.format(term_uuid), mail) + + # toggle refresh module tracker list/set + r_serv_term.set('tracker:refresh:{}'.format(term_type), time.time()) + + return term_uuid + +def parse_tracked_term_to_delete(dict_input, user_id): + term_uuid = dict_input.get("uuid", None) + res = check_term_uuid_valid_access(term_uuid, user_id) + if res: + return res + + delete_term(term_uuid) + return ({"uuid": term_uuid}, 200) + +def delete_term(term_uuid): + term = r_serv_term.hget('tracker:{}'.format(term_uuid), 'tracked') + term_type = r_serv_term.hget('tracker:{}'.format(term_uuid), 'type') + level = r_serv_term.hget('tracker:{}'.format(term_uuid), 'level') + r_serv_term.srem('all:tracker_uuid:{}:{}'.format(term_type, term), term_uuid) + # Term not tracked by other users + if not r_serv_term.exists('all:tracker_uuid:{}:{}'.format(term_type, term)): + r_serv_term.srem('all:tracker:{}'.format(term_type), term) + + # toggle refresh module tracker list/set + r_serv_term.set('tracker:refresh:{}'.format(term_type), time.time()) + + if level == '0': # user only + user_id = term_type = r_serv_term.hget('tracker:{}'.format(term_uuid), 'user_id') + r_serv_term.srem('user:tracker:{}'.format(user_id), term_uuid) + r_serv_term.srem('user:tracker:{}:{}'.format(user_id, term_type), term_uuid) + elif level == '1': # global + r_serv_term.srem('global:tracker', term_uuid) + r_serv_term.srem('global:tracker:{}'.format(term_type), term_uuid) + + # delete metatadata + r_serv_term.delete('tracker:{}'.format(term_uuid)) + + # remove tags + r_serv_term.delete('tracker:tags:{}'.format(term_uuid)) + + # remove mails + r_serv_term.delete('tracker:mail:{}'.format(term_uuid)) + + # remove item set + all_item_date = r_serv_term.zrange('tracker:stat:{}'.format(term_uuid), 0, -1) + for date in all_item_date: + r_serv_term.delete('tracker:item:{}:{}'.format(term_uuid, date)) + r_serv_term.delete('tracker:stat:{}'.format(term_uuid)) + +def replace_tracked_term_tags(term_uuid, tags): + r_serv_term.delete('tracker:tags:{}'.format(term_uuid)) + for tag in tags: + r_serv_term.sadd('tracker:tags:{}'.format(term_uuid), tag) + +def replace_tracked_term_mails(term_uuid, mails): + res = verify_mail_list(mails) + if res: + return res + else: + r_serv_term.delete('tracker:mail:{}'.format(term_uuid)) + for mail in mails: + r_serv_term.sadd('tracker:mail:{}'.format(term_uuid), mail) + +def get_term_uuid_list(term, term_type): + return list(r_serv_term.smembers('all:tracker_uuid:{}:{}'.format(term_type, term))) + +def get_term_tags(term_uuid): + return list(r_serv_term.smembers('tracker:tags:{}'.format(term_uuid))) + +def get_term_mails(term_uuid): + return list(r_serv_term.smembers('tracker:mail:{}'.format(term_uuid))) + +def add_tracked_item(term_uuid, item_id, item_date): + # track item + r_serv_term.sadd('tracker:item:{}:{}'.format(term_uuid, item_date), item_id) + # track nb item by date + r_serv_term.zadd('tracker:stat:{}'.format(term_uuid), item_date, int(item_date)) + +def create_token_statistics(item_date, word, nb): + r_serv_term.zincrby('stat_token_per_item_by_day:{}'.format(item_date), word, 1) + r_serv_term.zincrby('stat_token_total_by_day:{}'.format(item_date), word, nb) + r_serv_term.sadd('stat_token_history', item_date) + +def delete_token_statistics_by_date(item_date): + r_serv_term.delete('stat_token_per_item_by_day:{}'.format(item_date)) + r_serv_term.delete('stat_token_total_by_day:{}'.format(item_date)) + r_serv_term.srem('stat_token_history', item_date) + +def get_all_token_stat_history(): + return r_serv_term.smembers('stat_token_history') + +def get_tracked_term_last_updated_by_type(term_type): + epoch_update = r_serv_term.get('tracker:refresh:{}'.format(term_type)) + if not epoch_update: + epoch_update = 0 + return float(epoch_update) + +def parse_get_tracker_term_item(dict_input, user_id): + term_uuid = dict_input.get('uuid', None) + res = check_term_uuid_valid_access(term_uuid, user_id) + if res: + return res + + + date_from = dict_input.get('date_from', None) + date_to = dict_input.get('date_to', None) + + if date_from is None: + date_from = get_tracked_term_first_seen(term_uuid) + if date_from: + date_from = date_from[0] + + if date_to is None: + date_to = date_from + + if date_from > date_to: + date_from = date_to + + all_item_id = get_tracked_term_list_item(term_uuid, date_from, date_to) + all_item_id = Item.get_item_list_desc(all_item_id) + + res_dict = {} + res_dict['uuid'] = term_uuid + res_dict['date_from'] = date_from + res_dict['date_to'] = date_to + res_dict['items'] = all_item_id + return (res_dict, 200) + +def get_tracked_term_first_seen(term_uuid): + res = r_serv_term.zrange('tracker:stat:{}'.format(term_uuid), 0, 0) + if res: + return res[0] + else: + return None + + +def get_tracked_term_last_seen(term_uuid): + res = r_serv_term.zrevrange('tracker:stat:{}'.format(term_uuid), 0, 0) + if res: + return res[0] + else: + return None + +def get_term_metedata(term_uuid, user_id=False, level=False, tags=False, mails=False, sparkline=False): + dict_uuid = {} + dict_uuid['term'] = r_serv_term.hget('tracker:{}'.format(term_uuid), 'tracked') + dict_uuid['type'] = r_serv_term.hget('tracker:{}'.format(term_uuid), 'type') + dict_uuid['date'] = r_serv_term.hget('tracker:{}'.format(term_uuid), 'date') + dict_uuid['first_seen'] = get_tracked_term_first_seen(term_uuid) + dict_uuid['last_seen'] = get_tracked_term_last_seen(term_uuid) + if user_id: + dict_uuid['user_id'] = r_serv_term.hget('tracker:{}'.format(term_uuid), 'user_id') + if level: + dict_uuid['level'] = r_serv_term.hget('tracker:{}'.format(term_uuid), 'level') + if mails: + dict_uuid['mails'] = get_list_trackeed_term_mails(term_uuid) + if tags: + dict_uuid['tags'] = get_list_trackeed_term_tags(term_uuid) + if sparkline: + dict_uuid['sparkline'] = get_tracked_term_sparkline(term_uuid) + dict_uuid['uuid'] = term_uuid + return dict_uuid + +def get_tracked_term_sparkline(tracker_uuid, num_day=6): + date_range_sparkline = Date.get_date_range(num_day) + sparklines_value = [] + for date_day in date_range_sparkline: + nb_seen_this_day = r_serv_term.scard('tracker:item:{}:{}'.format(tracker_uuid, date_day)) + if nb_seen_this_day is None: + nb_seen_this_day = 0 + sparklines_value.append(int(nb_seen_this_day)) + return sparklines_value + +def get_list_tracked_term_stats_by_day(list_tracker_uuid, num_day=31, date_from=None, date_to=None): + if date_from and date_to: + date_range = Date.substract_date(date_from, date_to) + else: + date_range = Date.get_date_range(num_day) + list_tracker_stats = [] + for tracker_uuid in list_tracker_uuid: + dict_tracker_data = [] + tracker = r_serv_term.hget('tracker:{}'.format(tracker_uuid), 'tracked') + for date_day in date_range: + nb_seen_this_day = r_serv_term.scard('tracker:item:{}:{}'.format(tracker_uuid, date_day)) + if nb_seen_this_day is None: + nb_seen_this_day = 0 + dict_tracker_data.append({"date": date_day,"value": int(nb_seen_this_day)}) + list_tracker_stats.append({"name": tracker,"Data": dict_tracker_data}) + return list_tracker_stats + +def get_list_trackeed_term_tags(term_uuid): + res = r_serv_term.smembers('tracker:tags:{}'.format(term_uuid)) + if res: + return list(res) + else: + return [] + +def get_list_trackeed_term_mails(term_uuid): + res = r_serv_term.smembers('tracker:mail:{}'.format(term_uuid)) + if res: + return list(res) + else: + return [] + +def get_user_tracked_term_uuid(user_id, filter_type=None): + if filter_type: + return list(r_serv_term.smembers('user:tracker:{}:{}'.format(user_id,filter_type))) + else: + return list(r_serv_term.smembers('user:tracker:{}'.format(user_id))) + +def get_global_tracked_term_uuid(filter_type=None): + if filter_type: + return list(r_serv_term.smembers('global:tracker:{}'.format(filter_type))) + else: + return list(r_serv_term.smembers('global:tracker')) + +def get_all_user_tracked_terms(user_id, filter_type=None): + all_user_term = [] + all_user_term_uuid = get_user_tracked_term_uuid(user_id, filter_type=filter_type) + + for term_uuid in all_user_term_uuid: + all_user_term.append(get_term_metedata(term_uuid, tags=True, mails=True, sparkline=True)) + return all_user_term + +def get_all_global_tracked_terms(filter_type=None): + all_user_term = [] + all_user_term_uuid = get_global_tracked_term_uuid(filter_type=filter_type) + + for term_uuid in all_user_term_uuid: + all_user_term.append(get_term_metedata(term_uuid, user_id=True, tags=True, mails=True, sparkline=True)) + return all_user_term diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index b6f26231..c59e5c6b 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -23,7 +23,7 @@ sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon ##### Notifications ###### [Notifications] -ail_domain = http://localhost:7000 +ail_domain = https://localhost:7000 sender = sender@example.com sender_host = smtp.example.com sender_port = 1337 @@ -107,7 +107,10 @@ operation_mode = 3 ttl_duplicate = 86400 default_unnamed_feed_name = unnamed_feeder -[RegexForTermsFrequency] +[TermTrackerMod] +max_execution_time = 120 + +[RegexTracker] max_execution_time = 60 ##### Redis ##### @@ -177,6 +180,11 @@ host = localhost port = 6382 db = 3 +[ARDB_Tracker] +host = localhost +port = 6382 +db = 3 + [ARDB_Hashs] host = localhost db = 1 diff --git a/bin/packages/lib_words.py b/bin/packages/lib_words.py index 54581403..e44a922c 100644 --- a/bin/packages/lib_words.py +++ b/bin/packages/lib_words.py @@ -11,62 +11,10 @@ from dateutil.rrule import rrule, DAILY import csv -def listdirectory(path): - """Path Traversing Function. - - :param path: -- The absolute pathname to a directory. - - This function is returning all the absolute path of the files contained in - the argument directory. - - """ - fichier = [] - for root, dirs, files in os.walk(path): - - for i in files: - - fichier.append(os.path.join(root, i)) - - return fichier - clean = lambda dirty: ''.join(filter(string.printable.__contains__, dirty)) """It filters out non-printable characters from the string it receives.""" -def create_dirfile(r_serv, directory, overwrite): - """Create a file of path. - - :param r_serv: -- connexion to redis database - :param directory: -- The folder where to launch the listing of the .gz files - - This function create a list in redis with inside the absolute path - of all the pastes needed to be proceeded by function using parallel - (like redis_words_ranking) - - """ - if overwrite: - r_serv.delete("filelist") - - for x in listdirectory(directory): - r_serv.lpush("filelist", x) - - publisher.info("The list was overwritten") - - else: - if r_serv.llen("filelist") == 0: - - for x in listdirectory(directory): - r_serv.lpush("filelist", x) - - publisher.info("New list created") - else: - - for x in listdirectory(directory): - r_serv.lpush("filelist", x) - - publisher.info("The list was updated with new elements") - - def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month): """Create a csv file used with dygraph. diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index f1fe5e3d..7c8e3138 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -19,32 +19,17 @@ subscribe = Redis_Global [Attributes] subscribe = Redis_Global -[Lines] -subscribe = Redis_Global -publish = Redis_LinesShort,Redis_LinesLong - [DomClassifier] subscribe = Redis_Global -[Tokenize] -subscribe = Redis_LinesShort -publish = Redis_Words - -[Curve] -subscribe = Redis_Words -publish = Redis_CurveManageTopSets,Redis_Tags - -[RegexForTermsFrequency] +[TermTrackerMod] subscribe = Redis_Global publish = Redis_Tags -[SetForTermsFrequency] +[RegexTracker] subscribe = Redis_Global publish = Redis_Tags -[CurveManageTopSets] -subscribe = Redis_CurveManageTopSets - [Categ] subscribe = Redis_Global publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey diff --git a/doc/README.md b/doc/README.md index dc116920..8750c475 100644 --- a/doc/README.md +++ b/doc/README.md @@ -630,9 +630,6 @@ curl https://127.0.0.1:7000/api/v1/get/tag/metadata --header "Authorization: iHc - - - ## Cryptocurrency @@ -743,6 +740,202 @@ curl https://127.0.0.1:7000/api/v1/get/cryptocurrency/bitcoin/item --header "Aut + + + + +## Tracker + + + +### Add term tracker: `api/v1/add/tracker/term` + +#### Description +Add term tracker + +**Method** : `POST` + +#### Parameters +- `term` + - term to add + - *str - word(s)* + - mandatory +- `nb_words` + - number of words in set + - *int* + - default: `1` +- `type` + - term type + - *str* + - mandatory: `word`, `set`, `regex` +- `tags` + - list of tags + - *list* + - default: `[]` +- `mails` + - list of mails to notify + - *list* + - default: `[]` +- `level` + - tracker visibility + - *int - 0: user only, 1: all users* + - default: `1` + +#### JSON response +- `uuid` + - import uuid + - *uuid4* + +#### Example +``` +curl https://127.0.0.1:7000/api/v1/add/tracker/term --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST +``` + +#### input.json Example +```json + { + + } +``` + +#### Expected Success Response +**HTTP Status Code** : `200` + +```json + { + + } +``` + +#### Expected Fail Response +**HTTP Status Code** : `400` + +```json + +``` + + + + +### Delete term tracker: `api/v1/delete/tracker/term` + +#### Description +Delete term tracker + +**Method** : `DELETE` + +#### Parameters +- `uuid` + - tracked term uuid + - *uuid4* + - mandatory + +#### JSON response +- `uuid` + - deleted uuid + - *uuid4* + +#### Example +``` +curl https://127.0.0.1:7000/api/v1/add/tracker/term --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST +``` + +#### input.json Example +```json + { + + } +``` + +#### Expected Success Response +**HTTP Status Code** : `200` + +```json + { + + } +``` + +#### Expected Fail Response +**HTTP Status Code** : `400` + +```json + +``` + + + + + +### Delete term tracker: `api/v1/delete/tracker/term/item` + +#### Description +Delete term tracker + +**Method** : `POST` + +#### Parameters +- `uuid` + - tracked term uuid + - *uuid4* + - mandatory +- `date_from` + - date from + - *str - YYMMDD* + - default: last tracked items date +- `date_to` + - date to + - *str - YYMMDD* + - default: `None` + +#### JSON response +- `uuid` + - term uuid + - *uuid4* +- `date_from` + - date from + - *str - YYMMDD* +- `date_to` + - date to + - *str - YYMMDD* +- `items` + - list of item id + - *list* + +#### Example +``` +curl https://127.0.0.1:7000/api/v1/add/tracker/term --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST +``` + +#### input.json Example +```json + { + + } +``` + +#### Expected Success Response +**HTTP Status Code** : `200` + +```json + { + + } +``` + +#### Expected Fail Response +**HTTP Status Code** : `400` + +```json + +``` + + + + + + + ## Import management diff --git a/update/default_update/Update.sh b/update/default_update/Update.sh index 5809fb38..189ae846 100755 --- a/update/default_update/Update.sh +++ b/update/default_update/Update.sh @@ -20,8 +20,8 @@ export PATH=$AIL_FLASK:$PATH GREEN="\\033[1;32m" DEFAULT="\\033[0;39m" -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -k +echo -e $GREEN"Shutting down AIL Script ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks wait echo "" @@ -37,8 +37,8 @@ echo "" echo "" echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -k +echo -e $GREEN"Killing Script ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks wait echo "" diff --git a/update/v2.2/Update.py b/update/v2.2/Update.py new file mode 100755 index 00000000..6a762f6c --- /dev/null +++ b/update/v2.2/Update.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import redis +import datetime +import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) +import Item +import Term + + +def rreplace(s, old, new, occurrence): + li = s.rsplit(old, occurrence) + return new.join(li) + + +if __name__ == '__main__': + + start_deb = time.time() + + configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + cfg = configparser.ConfigParser() + cfg.read(configfile) + + r_serv_term_stats = redis.StrictRedis( + host=cfg.get("ARDB_Trending", "host"), + port=cfg.getint("ARDB_Trending", "port"), + db=cfg.getint("ARDB_Trending", "db"), + decode_responses=True) + + r_serv_termfreq = redis.StrictRedis( + host=cfg.get("ARDB_TermFreq", "host"), + port=cfg.getint("ARDB_TermFreq", "port"), + db=cfg.getint("ARDB_TermFreq", "db"), + decode_responses=True) + + r_serv_term_stats.flushdb() + + #convert all regex: + all_regex = r_serv_termfreq.smembers('TrackedRegexSet') + for regex in all_regex: + tags = list( r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(regex)) ) + mails = list( r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(regex)) ) + + new_term = regex[1:-1] + res = Term.parse_json_term_to_add({"term": new_term, "type": 'regex', "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test') + if res[1] == 200: + term_uuid = res[0]['uuid'] + list_items = r_serv_termfreq.smembers('regex_{}'.format(regex)) + for paste_item in list_items: + item_id = Item.get_item_id(paste_item) + item_date = Item.get_item_date(item_id) + Term.add_tracked_item(term_uuid, item_id, item_date) + + # Invalid Tracker => remove it + else: + print('Invalid Regex Removed: {}'.format(regex)) + print(res[0]) + # allow reprocess + r_serv_termfreq.srem('TrackedRegexSet', regex) + + all_tokens = r_serv_termfreq.smembers('TrackedSetTermSet') + for token in all_tokens: + tags = list( r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(token)) ) + mails = list( r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(token)) ) + + res = Term.parse_json_term_to_add({"term": token, "type": 'word', "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test') + if res[1] == 200: + term_uuid = res[0]['uuid'] + list_items = r_serv_termfreq.smembers('tracked_{}'.format(token)) + for paste_item in list_items: + item_id = Item.get_item_id(paste_item) + item_date = Item.get_item_date(item_id) + Term.add_tracked_item(term_uuid, item_id, item_date) + # Invalid Tracker => remove it + else: + print('Invalid Token Removed: {}'.format(token)) + print(res[0]) + # allow reprocess + r_serv_termfreq.srem('TrackedSetTermSet', token) + + all_set = r_serv_termfreq.smembers('TrackedSetSet') + for curr_set in all_set: + tags = list( r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(curr_set)) ) + mails = list( r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(curr_set)) ) + + to_remove = ',{}'.format(curr_set.split(',')[-1]) + new_set = rreplace(curr_set, to_remove, '', 1) + new_set = new_set[2:] + new_set = new_set.replace(',', '') + + res = Term.parse_json_term_to_add({"term": new_set, "type": 'set', "nb_words": 1, "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test') + if res[1] == 200: + term_uuid = res[0]['uuid'] + list_items = r_serv_termfreq.smembers('tracked_{}'.format(curr_set)) + for paste_item in list_items: + item_id = Item.get_item_id(paste_item) + item_date = Item.get_item_date(item_id) + Term.add_tracked_item(term_uuid, item_id, item_date) + # Invalid Tracker => remove it + else: + print('Invalid Set Removed: {}'.format(curr_set)) + print(res[0]) + # allow reprocess + r_serv_termfreq.srem('TrackedSetSet', curr_set) + + r_serv_termfreq.flushdb() + + #Set current ail version + r_serv.set('ail:version', 'v2.2') + + #Set current ail version + r_serv.hset('ail:update_date', 'v2.2', datetime.datetime.now().strftime("%Y%m%d")) diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 0e3852e7..49f2919c 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -61,9 +61,9 @@ r_serv_sentiment = redis.StrictRedis( decode_responses=True) r_serv_term = redis.StrictRedis( - host=cfg.get("ARDB_TermFreq", "host"), - port=cfg.getint("ARDB_TermFreq", "port"), - db=cfg.getint("ARDB_TermFreq", "db"), + host=cfg.get("ARDB_Tracker", "host"), + port=cfg.getint("ARDB_Tracker", "port"), + db=cfg.getint("ARDB_Tracker", "db"), decode_responses=True) r_serv_cred = redis.StrictRedis( diff --git a/var/www/modules/hashDecoded/templates/DaysCorrelation.html b/var/www/modules/hashDecoded/templates/DaysCorrelation.html index 2051cef0..1796dc66 100644 --- a/var/www/modules/hashDecoded/templates/DaysCorrelation.html +++ b/var/www/modules/hashDecoded/templates/DaysCorrelation.html @@ -20,6 +20,7 @@ + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'hunter/menu_sidebar.html' %} + +
+ +
+
+

{{ tracker_metadata['uuid'] }}

+
    +
  • +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    TypeTrackerDate addedLevelCreated byFirst seenLast seenTags Email
    {{ tracker_metadata['type'] }}{{ tracker_metadata['term'] }}{{ tracker_metadata['date'][0:4] }}/{{ tracker_metadata['date'][4:6] }}/{{ tracker_metadata['date'][6:8] }}{{ tracker_metadata['level'] }}{{ tracker_metadata['user_id'] }} + {% if tracker_metadata['first_seen'] %} + {{ tracker_metadata['first_seen'][0:4] }}/{{ tracker_metadata['first_seen'][4:6] }}/{{ tracker_metadata['first_seen'][6:8] }} + {% endif %} + + {% if tracker_metadata['last_seen'] %} + {{ tracker_metadata['last_seen'][0:4] }}/{{ tracker_metadata['last_seen'][4:6] }}/{{ tracker_metadata['last_seen'][6:8] }} + {% endif %} + + {% for tag in tracker_metadata['tags'] %} + + {{ tag }} + + {% endfor %} + + + {% for mail in tracker_metadata['mails'] %} + {{ mail }}
    + {% endfor %} +
    +
    +
    +
    +
    +
    +
  • +
+ +
+
+ +
All Tags added for this tracker, space separated:
+
+
+
+
+ +
+ + +
+ +
+ +
+
+ +
All E-Mails to Notify for this tracker, space separated:
+
+
+
+
+ +
+ + +
+ +
+ + + + +
+
+ +
+ +
+
+ +
+
+
+
+ +
+
+
+
+
+ +
+
+
+ + + +
+
+ + {%if tracker_metadata['items']%} +
+ + + + + + + + + + {% for item in tracker_metadata['items'] %} + + + + + {% endfor %} + + +
DateItem Id
+ {{item['date'][0:4]}}/{{item['date'][4:6]}}/{{item['date'][6:8]}} + + +
{{ item['id'] }}
+
+
+ {% for tag in item['tags'] %} + + {{ tag }} + + {% endfor %} +
+
+
+ {% endif %} + +
+
+
+ + + + + + + diff --git a/var/www/modules/hunter/templates/trackersManagement.html b/var/www/modules/hunter/templates/trackersManagement.html new file mode 100644 index 00000000..b11cc94c --- /dev/null +++ b/var/www/modules/hunter/templates/trackersManagement.html @@ -0,0 +1,206 @@ + + + + + + + + Tracker Management + + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'hunter/menu_sidebar.html' %} + +
+ +
+
+
Your {{filter_type}} Trackers
+
+
+ + + + + + + + + + + + + {% for dict_uuid in user_term %} + + + + + + + + + {% endfor %} + +
TypeTrackerFirst seenLast seenEmail notificationsparkline
{{dict_uuid['type']}} + {{dict_uuid['term']}} +
+ {% for tag in dict_uuid['tags'] %} + + {{ tag }} + + {% endfor %} +
+
+ {% if dict_uuid['first_seen'] %} + {{dict_uuid['first_seen'][0:4]}}/{{dict_uuid['first_seen'][4:6]}}/{{dict_uuid['first_seen'][6:8]}} + {% endif %} + + {% if dict_uuid['last_seen'] %} + {{dict_uuid['last_seen'][0:4]}}/{{dict_uuid['last_seen'][4:6]}}/{{dict_uuid['last_seen'][6:8]}} + {% endif %} + + {% for mail in dict_uuid['mails'] %} + {{ mail }}
+ {% endfor %} +
+
+
+ +
+
+
Global {{filter_type}} Trackers
+
+
+ + + + + + + + + + + + + {% for dict_uuid in global_term %} + + + + + + + + + {% endfor %} + +
TypeTrackerFirst seenLast seenEmail notificationsparkline
{{dict_uuid['type']}} + {{dict_uuid['term']}} +
+ {% for tag in dict_uuid['tags'] %} + + {{ tag }} + + {% endfor %} +
+
+ {% if dict_uuid['first_seen'] %} + {{dict_uuid['first_seen'][0:4]}}/{{dict_uuid['first_seen'][4:6]}}/{{dict_uuid['first_seen'][6:8]}} + {% endif %} + + {% if dict_uuid['last_seen'] %} + {{dict_uuid['last_seen'][0:4]}}/{{dict_uuid['last_seen'][4:6]}}/{{dict_uuid['last_seen'][6:8]}} + {% endif %} + + {% for mail in dict_uuid['mails'] %} + {{ mail }}
+ {% endfor %} +
+
+
+ + + + + + Create New Tracker + + +
+
+
+ + + + + diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py index f942e432..01b08a44 100644 --- a/var/www/modules/restApi/Flask_restApi.py +++ b/var/www/modules/restApi/Flask_restApi.py @@ -19,6 +19,7 @@ import Pgp import Item import Paste import Tag +import Term from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response from flask_login import login_required @@ -57,8 +58,11 @@ def verify_token(token): else: return False +def get_user_from_token(token): + return r_serv_db.hget('user:tokens', token) + def verify_user_role(role, token): - user_id = r_serv_db.hget('user:tokens', token) + user_id = get_user_from_token(token) if user_id: if is_in_role(user_id, role): return True @@ -310,6 +314,37 @@ def get_all_tags(): res = {'tags': Tag.get_all_tags()} return Response(json.dumps(res, indent=2, sort_keys=True), mimetype='application/json'), 200 +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # +# # # # # # # # # # # # # # TRACKER # # # # # # # # # # # # # # # # # +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # +@restApi.route("api/v1/add/tracker/term", methods=['POST']) +@token_required('analyst') +def add_tracker_term(): + data = request.get_json() + user_token = get_auth_from_header() + user_id = get_user_from_token(user_token) + res = Term.parse_json_term_to_add(data, user_id) + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] + +@restApi.route("api/v1/delete/tracker/term", methods=['DELETE']) +@token_required('analyst') +def delete_tracker_term(): + data = request.get_json() + user_token = get_auth_from_header() + user_id = get_user_from_token(user_token) + res = Term.parse_tracked_term_to_delete(data, user_id) + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] + +@restApi.route("api/v1/get/tracker/term/item", methods=['POST']) +@token_required('analyst') +def get_tracker_term_item(): + data = request.get_json() + user_token = get_auth_from_header() + user_id = get_user_from_token(user_token) + res = Term.parse_get_tracker_term_item(data, user_id) + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # CRYPTOCURRENCY # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -420,7 +455,6 @@ def get_item_cryptocurrency_bitcoin(): return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] ''' - # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # IMPORT # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # diff --git a/var/www/modules/terms/Flask_terms.py b/var/www/modules/terms/Flask_terms.py index f3b8c7de..c594839a 100644 --- a/var/www/modules/terms/Flask_terms.py +++ b/var/www/modules/terms/Flask_terms.py @@ -6,20 +6,25 @@ note: The matching of credential against supplied credential is done using Levenshtein distance ''' +import json import redis import datetime import calendar import flask -from flask import Flask, render_template, jsonify, request, Blueprint, url_for, redirect +from flask import Flask, render_template, jsonify, request, Blueprint, url_for, redirect, Response from Role_Manager import login_admin, login_analyst -from flask_login import login_required +from flask_login import login_required, current_user import re -import Paste from pprint import pprint import Levenshtein +# --------------------------------------------------------------- + +import Paste +import Term + # ============ VARIABLES ============ import Flask_config @@ -146,338 +151,6 @@ def save_tag_to_auto_push(list_tag): # ============ ROUTES ============ -@terms.route("/terms_management/") -@login_required -@login_analyst -def terms_management(): - per_paste = request.args.get('per_paste') - if per_paste == "1" or per_paste is None: - per_paste_text = "per_paste_" - per_paste = 1 - else: - per_paste_text = "" - per_paste = 0 - - today = datetime.datetime.now() - today = today.replace(hour=0, minute=0, second=0, microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - - # Map tracking if notifications are enabled for a specific term - notificationEnabledDict = {} - - # Maps a specific term to the associated email addresses - notificationEMailTermMapping = {} - notificationTagsTermMapping = {} - - #Regex - trackReg_list = [] - trackReg_list_values = [] - trackReg_list_num_of_paste = [] - for tracked_regex in r_serv_term.smembers(TrackedRegexSet_Name): - - notificationEMailTermMapping[tracked_regex] = r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex) - notificationTagsTermMapping[tracked_regex] = r_serv_term.smembers(TrackedTermsNotificationTagsPrefix_Name + tracked_regex) - - if tracked_regex not in notificationEnabledDict: - notificationEnabledDict[tracked_regex] = False - - trackReg_list.append(tracked_regex) - value_range = Term_getValueOverRange(tracked_regex, today_timestamp, [1, 7, 31], per_paste=per_paste_text) - - term_date = r_serv_term.hget(TrackedRegexDate_Name, tracked_regex) - - set_paste_name = "regex_" + tracked_regex - trackReg_list_num_of_paste.append(r_serv_term.scard(set_paste_name)) - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(term_date) - trackReg_list_values.append(value_range) - - if tracked_regex in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): - notificationEnabledDict[tracked_regex] = True - - #Set - trackSet_list = [] - trackSet_list_values = [] - trackSet_list_num_of_paste = [] - for tracked_set in r_serv_term.smembers(TrackedSetSet_Name): - tracked_set = tracked_set - - notificationEMailTermMapping[tracked_set] = r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_set) - notificationTagsTermMapping[tracked_set] = r_serv_term.smembers(TrackedTermsNotificationTagsPrefix_Name + tracked_set) - - if tracked_set not in notificationEnabledDict: - notificationEnabledDict[tracked_set] = False - - trackSet_list.append(tracked_set) - value_range = Term_getValueOverRange(tracked_set, today_timestamp, [1, 7, 31], per_paste=per_paste_text) - - term_date = r_serv_term.hget(TrackedSetDate_Name, tracked_set) - - set_paste_name = "set_" + tracked_set - trackSet_list_num_of_paste.append(r_serv_term.scard(set_paste_name)) - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(term_date) - trackSet_list_values.append(value_range) - - if tracked_set in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): - notificationEnabledDict[tracked_set] = True - - #Tracked terms - track_list = [] - track_list_values = [] - track_list_num_of_paste = [] - for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): - - notificationEMailTermMapping[tracked_term] = r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term) - notificationTagsTermMapping[tracked_term] = r_serv_term.smembers(TrackedTermsNotificationTagsPrefix_Name + tracked_term) - - if tracked_term not in notificationEnabledDict: - notificationEnabledDict[tracked_term] = False - - track_list.append(tracked_term) - value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31], per_paste=per_paste_text) - - term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) - - set_paste_name = "tracked_" + tracked_term - - track_list_num_of_paste.append( r_serv_term.scard(set_paste_name) ) - - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(term_date) - track_list_values.append(value_range) - - if tracked_term in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): - notificationEnabledDict[tracked_term] = True - - #blacklist terms - black_list = [] - for blacked_term in r_serv_term.smembers(BlackListTermsSet_Name): - term_date = r_serv_term.hget(BlackListTermsDate_Name, blacked_term) - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - black_list.append([blacked_term, term_date]) - - return render_template("terms_management.html", - black_list=black_list, track_list=track_list, trackReg_list=trackReg_list, trackSet_list=trackSet_list, - track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste, - trackReg_list_values=trackReg_list_values, trackReg_list_num_of_paste=trackReg_list_num_of_paste, - trackSet_list_values=trackSet_list_values, trackSet_list_num_of_paste=trackSet_list_num_of_paste, - per_paste=per_paste, notificationEnabledDict=notificationEnabledDict, bootstrap_label=bootstrap_label, - notificationEMailTermMapping=notificationEMailTermMapping, notificationTagsTermMapping=notificationTagsTermMapping) - - -@terms.route("/terms_management_query_paste/") -@login_required -@login_analyst -def terms_management_query_paste(): - term = request.args.get('term') - paste_info = [] - - # check if regex or not - if term.startswith('/') and term.endswith('/'): - set_paste_name = "regex_" + term - track_list_path = r_serv_term.smembers(set_paste_name) - elif term.startswith('\\') and term.endswith('\\'): - set_paste_name = "set_" + term - track_list_path = r_serv_term.smembers(set_paste_name) - else: - set_paste_name = "tracked_" + term - track_list_path = r_serv_term.smembers(set_paste_name) - - for path in track_list_path: - paste = Paste.Paste(path) - p_date = str(paste._get_p_date()) - p_date = p_date[0:4]+'/'+p_date[4:6]+'/'+p_date[6:8] - p_source = paste.p_source - p_size = paste.p_size - p_mime = paste.p_mime - p_lineinfo = paste.get_lines_info() - p_content = paste.get_p_content() - if p_content != 0: - p_content = p_content[0:400] - paste_info.append({"path": path, "date": p_date, "source": p_source, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) - - return jsonify(paste_info) - - -@terms.route("/terms_management_query/") -@login_required -@login_analyst -def terms_management_query(): - TrackedTermsDate_Name = "TrackedTermDate" - BlackListTermsDate_Name = "BlackListTermDate" - term = request.args.get('term') - section = request.args.get('section') - - today = datetime.datetime.now() - today = today.replace(hour=0, minute=0, second=0, microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - value_range = Term_getValueOverRange(term, today_timestamp, [1, 7, 31]) - - if section == "followTerm": - term_date = r_serv_term.hget(TrackedTermsDate_Name, term) - elif section == "blacklistTerm": - term_date = r_serv_term.hget(BlackListTermsDate_Name, term) - - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(str(term_date)) - return jsonify(value_range) - - -@terms.route("/terms_management_action/", methods=['GET']) -@login_required -@login_analyst -def terms_management_action(): - today = datetime.datetime.now() - today = today.replace(microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - - - section = request.args.get('section') - action = request.args.get('action') - term = request.args.get('term') - notificationEmailsParam = request.args.get('emailAddresses') - input_tags = request.args.get('tags') - - if action is None or term is None or notificationEmailsParam is None: - return "None" - else: - if section == "followTerm": - if action == "add": - - # Make a list of all passed email addresses - notificationEmails = notificationEmailsParam.split() - - validNotificationEmails = [] - # check for valid email addresses - for email in notificationEmails: - # Really basic validation: - # has exactly one @ sign, and at least one . in the part after the @ - if re.match(r"[^@]+@[^@]+\.[^@]+", email): - validNotificationEmails.append(email) - - # create tags list - list_tags = input_tags.split() - - # check if regex/set or simple term - #regex - if term.startswith('/') and term.endswith('/'): - r_serv_term.sadd(TrackedRegexSet_Name, term) - r_serv_term.hset(TrackedRegexDate_Name, term, today_timestamp) - # add all valid emails to the set - for email in validNotificationEmails: - r_serv_term.sadd(TrackedTermsNotificationEmailsPrefix_Name + term, email) - # enable notifications by default - r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, term) - # add tags list - for tag in list_tags: - r_serv_term.sadd(TrackedTermsNotificationTagsPrefix_Name + term, tag) - save_tag_to_auto_push(list_tags) - - #set - elif term.startswith('\\') and term.endswith('\\'): - tab_term = term[1:-1] - perc_finder = re.compile("\[[0-9]{1,3}\]").search(tab_term) - if perc_finder is not None: - match_percent = perc_finder.group(0)[1:-1] - set_to_add = term - else: - match_percent = DEFAULT_MATCH_PERCENT - set_to_add = "\\" + tab_term[:-1] + ", [{}]]\\".format(match_percent) - r_serv_term.sadd(TrackedSetSet_Name, set_to_add) - r_serv_term.hset(TrackedSetDate_Name, set_to_add, today_timestamp) - # add all valid emails to the set - for email in validNotificationEmails: - r_serv_term.sadd(TrackedTermsNotificationEmailsPrefix_Name + set_to_add, email) - # enable notifications by default - r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, set_to_add) - # add tags list - for tag in list_tags: - r_serv_term.sadd(TrackedTermsNotificationTagsPrefix_Name + set_to_add, tag) - save_tag_to_auto_push(list_tags) - - #simple term - else: - r_serv_term.sadd(TrackedTermsSet_Name, term.lower()) - r_serv_term.hset(TrackedTermsDate_Name, term.lower(), today_timestamp) - # add all valid emails to the set - for email in validNotificationEmails: - r_serv_term.sadd(TrackedTermsNotificationEmailsPrefix_Name + term.lower(), email) - # enable notifications by default - r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, term.lower()) - # add tags list - for tag in list_tags: - r_serv_term.sadd(TrackedTermsNotificationTagsPrefix_Name + term.lower(), tag) - save_tag_to_auto_push(list_tags) - - elif action == "toggleEMailNotification": - # get the current state - if term in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): - # remove it - r_serv_term.srem(TrackedTermsNotificationEnabled_Name, term.lower()) - else: - # add it - r_serv_term.sadd(TrackedTermsNotificationEnabled_Name, term.lower()) - - #del action - else: - if term.startswith('/') and term.endswith('/'): - r_serv_term.srem(TrackedRegexSet_Name, term) - r_serv_term.hdel(TrackedRegexDate_Name, term) - elif term.startswith('\\') and term.endswith('\\'): - r_serv_term.srem(TrackedSetSet_Name, term) - r_serv_term.hdel(TrackedSetDate_Name, term) - else: - r_serv_term.srem(TrackedTermsSet_Name, term.lower()) - r_serv_term.hdel(TrackedTermsDate_Name, term.lower()) - - # delete the associated notification emails too - r_serv_term.delete(TrackedTermsNotificationEmailsPrefix_Name + term) - # delete the associated tags set - r_serv_term.delete(TrackedTermsNotificationTagsPrefix_Name + term) - - elif section == "blacklistTerm": - if action == "add": - r_serv_term.sadd(BlackListTermsSet_Name, term.lower()) - r_serv_term.hset(BlackListTermsDate_Name, term, today_timestamp) - else: - r_serv_term.srem(BlackListTermsSet_Name, term.lower()) - else: - return "None" - - to_return = {} - to_return["section"] = section - to_return["action"] = action - to_return["term"] = term - return jsonify(to_return) - -@terms.route("/terms_management/delete_terms_tags", methods=['POST']) -@login_required -@login_analyst -def delete_terms_tags(): - term = request.form.get('term') - tags_to_delete = request.form.getlist('tags_to_delete') - - if term is not None and tags_to_delete is not None: - for tag in tags_to_delete: - r_serv_term.srem(TrackedTermsNotificationTagsPrefix_Name + term, tag) - return redirect(url_for('terms.terms_management')) - else: - return 'None args', 400 - -@terms.route("/terms_management/delete_terms_email", methods=['GET']) -@login_required -@login_analyst -def delete_terms_email(): - term = request.args.get('term') - email = request.args.get('email') - - if term is not None and email is not None: - r_serv_term.srem(TrackedTermsNotificationEmailsPrefix_Name + term, email) - return redirect(url_for('terms.terms_management')) - else: - return 'None args', 400 - @terms.route("/terms_plot_tool/") @login_required diff --git a/var/www/modules/terms/templates/header_terms.html b/var/www/modules/terms/templates/header_terms.html index a19290a5..90e0f6e6 100644 --- a/var/www/modules/terms/templates/header_terms.html +++ b/var/www/modules/terms/templates/header_terms.html @@ -1,7 +1,6 @@ -
  • Terms frequency +
  • Terms frequency