diff --git a/README.md b/README.md index 4dcaf8af..0dd7a325 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,7 @@ LICENSE Copyright (C) 2014-2016 CIRCL - Computer Incident Response Center Luxembourg (c/o smile, security made in Lëtzebuerg, Groupement d'Intérêt Economique) Copyright (c) 2014-2016 Raphaël Vinot Copyright (c) 2014-2016 Alexandre Dulaunoy + Copyright (c) 2016 Sami Mokaddem This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by diff --git a/bin/Curve.py b/bin/Curve.py index cd641e54..f804f2bd 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -33,7 +33,7 @@ import calendar from Helper import Process # Config Variables -top_term_freq_max_set_cardinality = 50 # Max cardinality of the terms frequences set +top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set def getValueOverRange(word, startDate, num_day): @@ -76,29 +76,49 @@ if __name__ == "__main__": message = p.get_from_set() prec_filename = None generate_new_graph = False - iii = 0 + + # Term Frequency + top_termFreq_setName_day = ["TopTermFreq_set_day", 1] + top_termFreq_setName_week = ["TopTermFreq_set_week", 7] + top_termFreq_setName_month = ["TopTermFreq_set_month", 31] + top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] + while True: + if message is not None: generate_new_graph = True filename, word, score = message.split() temp = filename.split('/') date = temp[-4] + temp[-3] + temp[-2] + timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) + # If set size is greater then the one authorized + # suppress smaller elements + for curr_set, curr_num_day in top_termFreq_set_array: + diffCard = server_term.scard(curr_set) - top_term_freq_max_set_cardinality + if diffCard > 0: + top_termFreq = server_term.smembers(curr_set) + sorted_top_termFreq_set = [] + for word in top_termFreq: + word_value = getValueOverRange(word, timestamp, curr_num_day) + sorted_top_termFreq_set.append((word, word_value)) + + sorted_top_termFreq_set.sort(key=lambda tup: tup[1]) + for i in range(0, diffCard): + print 'set oversized, dropping', sorted_top_termFreq_set[i][0] + server_term.srem(curr_set, sorted_top_termFreq_set[i][0]) + + + #timer = time.clock() low_word = word.lower() + #print 'wordlower', time.clock() - timer r_serv1.hincrby(low_word, date, int(score)) - # Term Frequency - top_termFreq_setName_day = ["TopTermFreq_set_day", 1] - top_termFreq_setName_week = ["TopTermFreq_set_week", 7] - top_termFreq_setName_month = ["TopTermFreq_set_month", 31] - top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] - timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) # Update redis curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score))) -# print '+----------------------------------------------------------------' # Manage Top set for curr_set, curr_num_day in top_termFreq_set_array: @@ -108,27 +128,23 @@ if __name__ == "__main__": continue else: + #timer = time.clock() + curr_word_value = getValueOverRange(low_word, timestamp, curr_num_day) + #print 'curr_range', time.clock() - timer top_termFreq = server_term.smembers(curr_set) sorted_top_termFreq_set = [] + #timer = time.clock() for word in top_termFreq: word_value = getValueOverRange(word, timestamp, curr_num_day) sorted_top_termFreq_set.append((word, word_value)) sorted_top_termFreq_set.sort(key=lambda tup: tup[1]) -# if curr_num_day == 1: -# print sorted_top_termFreq_set - curr_word_value = getValueOverRange(low_word, timestamp, curr_num_day) + #print 'whole_range', time.clock() - timer if curr_word_value > int(sorted_top_termFreq_set[0][1]): print str(curr_num_day)+':', low_word, curr_word_value, '\t', sorted_top_termFreq_set[0][0], sorted_top_termFreq_set[0][1], '\t', curr_word_value > sorted_top_termFreq_set[0][1] - #print sorted_top_termFreq_set server_term.srem(curr_set, sorted_top_termFreq_set[0][0]) server_term.sadd(curr_set, low_word) - if iii == 2: - iii-=1 - else: - iii+=1 - else: if generate_new_graph: diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index d4a457cd..62700d1f 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -136,7 +136,7 @@ function launching_scripts { sleep 0.1 screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' + #screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' sleep 0.1 @@ -157,6 +157,13 @@ function launching_scripts { screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' sleep 0.1 screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x' + + sleep 0.1 + screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' } #If no params, display the help diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 2a50be6b..7ce34bdb 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -55,6 +55,11 @@ r_serv_sentiment = redis.StrictRedis( port=cfg.getint("Redis_Level_DB_Sentiment", "port"), db=cfg.getint("Redis_Level_DB_Sentiment", "db")) +r_serv_term = redis.StrictRedis( + host=cfg.get("Redis_Level_DB_TermFreq", "host"), + port=cfg.getint("Redis_Level_DB_TermFreq", "port"), + db=cfg.getint("Redis_Level_DB_TermFreq", "db")) + app = Flask(__name__, static_url_path='/static/') @@ -552,19 +557,71 @@ def sentiment_analysis_plot_tool_getdata(): return jsonify(to_return) +@app.route("/terms_management/") +def terms_management(): + TrackedTermsSet_Name = "TrackedSetTermSet" + BlackListTermsSet_Name = "BlackListSetTermSet" + + track_list = [] + track_list_values = [] + for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): + track_list.append(tracked_term) + + black_list = [] + black_list_values = [] + for blacked_term in r_serv_term.smembers(BlackListTermsSet_Name): + black_list.append(blacked_term) + + return render_template("terms_management.html", black_list=black_list, track_list=track_list) + + +@app.route("/terms_management_action/", methods=['GET']) +def terms_management_action(): + TrackedTermsSet_Name = "TrackedSetTermSet" + BlackListTermsSet_Name = "BlackListSetTermSet" + + section = request.args.get('section') + action = request.args.get('action') + term = request.args.get('term') + if action is None or term is None: + return "None" + else: + if section == "followTerm": + if action == "add": + r_serv_term.sadd(TrackedTermsSet_Name, term) + else: + r_serv_term.srem(TrackedTermsSet_Name, term) + elif section == "blacklistTerm": + if action == "add": + r_serv_term.sadd(BlackListTermsSet_Name, term) + else: + r_serv_term.srem(BlackListTermsSet_Name, term) + else: + return "None" + + to_return = {} + to_return["section"] = section + to_return["action"] = action + to_return["term"] = term + return jsonify(to_return) + + + +@app.route("/terms_plot_tool/") +def terms_plot_tool(): + return render_template("terms_plot_tool.html") + + + @app.route("/test/") #completely shows the paste in a new tab def test(): - server = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_TermFreq", "host"), - port=cfg.getint("Redis_Level_DB_TermFreq", "port"), - db=cfg.getint("Redis_Level_DB_TermFreq", "db")) - + server = r_serv_term array1 = [] for w in server.smembers('TopTermFreq_set_day'): - val = server.hget('1471478400', w) + val = server.hget('1471564800', w) val = val if val is not None else 0 - val2 = server.hget('1471392000', w) + val2 = server.hget('1471478400', w) val2 = val2 if val2 is not None else 0 array1.append((w, (int(val), int(val2)))) @@ -572,7 +629,7 @@ def test(): # for w in server.smembers('TopTermFreq_set_week'): # array2.append((w, int(server.hget('1471478400', w)))) - array1.sort(key=lambda tup: tup[1][0]+tup[1][1]) + array1.sort(key=lambda tup: tup[1][0]) stri = "

day

" for e in array1: stri += "

"+ e[0] + "\t" + str(e[1]) +"

" diff --git a/var/www/templates/header.html b/var/www/templates/header.html index 0ffce67a..07e1bdb7 100644 --- a/var/www/templates/header.html +++ b/var/www/templates/header.html @@ -7,8 +7,15 @@
  • Sentiment Analysis +
  • +
  • Terms frequency + +
  • diff --git a/var/www/templates/terms_management.html b/var/www/templates/terms_management.html new file mode 100644 index 00000000..7738f37d --- /dev/null +++ b/var/www/templates/terms_management.html @@ -0,0 +1,247 @@ + + + + + + + + Analysis Information Leak framework Dashboard + + + + + + + + + + + + + + + + + + + +
    + +
    + +
    +
    +
    +

    Terms management interface

    +
    + +
    + +
    + + +
    +
    +
    +
    +
    +
    + Manage tracked terms +
    +
    + +
    + + + +
    + + + + + + + + + + + + + + {% set i = 0 %} + {% for term in track_list %} + + + + + + + + + {% set i = i + 1 %} + {% endfor %} + +
    Term nameDateDay occurenceWeek occurenceMonth occurenceAction
    {{ term }}{{ i }}1555459

    + + +

    + +
    +
    + +
    + +
    +
    + + +
    +
    +
    +
    +
    + Manage blacklisted terms +
    +
    + +
    + + + +
    + + + + + + + + + + + {% set i = 0 %} + {% for term in black_list %} + + + + + + {% set i = i + 1 %} + {% endfor %} + +
    Term nameDateAction
    {{ term }}{{ i }}

    + +

    + +
    +
    + +
    + +
    +
    + + +
    + +
    + + + + + + + + + diff --git a/var/www/templates/terms_plot_tool.html b/var/www/templates/terms_plot_tool.html new file mode 100644 index 00000000..73121d57 --- /dev/null +++ b/var/www/templates/terms_plot_tool.html @@ -0,0 +1,150 @@ + + + + + + + + Analysis Information Leak framework Dashboard + + + + + + + + + + + + + + + + + + + + +
    + +
    + +
    +
    +
    +

    Terms plot tool

    +
    + +
    + +
    + + +
    +
    +
    +
    +
    +
    + Select options +
    +
    +
    + +
    + +
    +
    +
      +
    +
    +
    +
    +
    +
      +
    +
    +
    +
    + +
    +
    + Date: +
    +
    + +
    + + +
    +
    + +
    +
    + +
    + +
    +
    + + +
    +
    +
    +
    +
    +
    + Graph +
    +
    +
    +
    + +
    +
    + +
    + +
    +
    + + + +
    + +
    + + + + + + + +