diff --git a/bin/Curve.py b/bin/Curve.py index 0a533a35..9337bc5b 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -1,9 +1,6 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* """ -The ZMQ_Sub_Curve Module -============================ - This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module. This modules update a .csv file used to draw curves representing selected @@ -15,11 +12,9 @@ words and their occurency per day. the same Subscriber name in both of them. +This Module is also used for term frequency. - -zrank for each day -week -> top zrank for each day - +/!\ Top set management is done in the module Curve_manage_top_set Requirements @@ -117,48 +112,21 @@ if __name__ == "__main__": low_word = word.lower() - #Old curve + #Old curve with words in file r_serv1.hincrby(low_word, date, int(score)) - # Update redis curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score))) + + # Add in set only if term is not in the blacklist if low_word not in server_term.smembers(BlackListTermsSet_Name): server_term.zincrby(curr_set, low_word, float(score)) #Add more info for tracked terms check_if_tracked_term(low_word, filename) - # Manage Top set is done in module Curve_manage_top_sets - - ''' - if server_term.scard(curr_set) < top_term_freq_max_set_cardinality: - server_term.sadd(curr_set, low_word) - elif server_term.sismember(curr_set, low_word): - continue - - else: - - - #timer = time.clock() - curr_word_value = getValueOverRange(low_word, timestamp, curr_num_day) - #print 'curr_range', time.clock() - timer - top_termFreq = server_term.smembers(curr_set) - sorted_top_termFreq_set = [] - #timer = time.clock() - for word in top_termFreq: - word_value = getValueOverRange(word, timestamp, curr_num_day) - sorted_top_termFreq_set.append((word, word_value)) - - sorted_top_termFreq_set.sort(key=lambda tup: tup[1]) - #print 'whole_range', time.clock() - timer - - if curr_word_value > int(sorted_top_termFreq_set[0][1]): - print str(curr_num_day)+':', low_word, curr_word_value, '\t', sorted_top_termFreq_set[0][0], sorted_top_termFreq_set[0][1], '\t', curr_word_value > sorted_top_termFreq_set[0][1] - server_term.srem(curr_set, sorted_top_termFreq_set[0][0]) - server_term.sadd(curr_set, low_word) - ''' else: + if generate_new_graph: generate_new_graph = False print 'Building graph' diff --git a/bin/Curve_manage_top_sets.py b/bin/Curve_manage_top_sets.py index 92d63b33..34c1c238 100755 --- a/bin/Curve_manage_top_sets.py +++ b/bin/Curve_manage_top_sets.py @@ -2,10 +2,8 @@ # -*-coding:UTF-8 -* """ - - -zrank for each day -week -> top zrank for each day +This module manage top sets for terms frequency. +Every 'refresh_rate' update the weekly and monthly set Requirements @@ -22,13 +20,13 @@ import time import copy from pubsublogger import publisher from packages import lib_words -import os import datetime import calendar from Helper import Process # Config Variables +Refresh_rate = 60*5 #sec BlackListTermsSet_Name = "BlackListSetTermSet" TrackedTermsSet_Name = "TrackedSetTermSet" top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set @@ -91,7 +89,6 @@ def manage_top_set(): - if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger @@ -107,7 +104,6 @@ if __name__ == '__main__': port=p.config.get("Redis_Level_DB_TermFreq", "port"), db=p.config.get("Redis_Level_DB_TermFreq", "db")) - # FUNCTIONS # publisher.info("Script Curve_manage_top_set started") # Sent to the logging a description of the module @@ -116,15 +112,12 @@ if __name__ == '__main__': manage_top_set() while True: - # Get one message from the input queue + # Get one message from the input queue (module only work if linked with a queue) message = p.get_from_set() if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) print 'sleeping' - time.sleep(60) # sleep a long time then manage the set + time.sleep(Refresh_rate) # sleep a long time then manage the set manage_top_set() continue - # Do something with the message from the queue - #manage_top_set() - diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index b85c2ae5..973d15ae 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -1,7 +1,8 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* """ - Template for new modules + This module makes statistics for some modules and providers + """ import time @@ -56,29 +57,6 @@ def compute_most_posted(server, message): server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) print redis_progression_name_set -# if keyword in server.smembers(redis_progression_name_set): # if it is already in the set -# return -# -# if (server.scard(redis_progression_name_set) < max_set_cardinality): -# server.sadd(redis_progression_name_set, keyword) - -# else: #not in the set -# #Check value for all members -# member_set = [] -# for keyw in server.smembers(redis_progression_name_set): -# keyw_value = server.hget(paste_date, module+'-'+keyw) -# if keyw_value is not None: -# member_set.append((keyw, int(keyw_value))) -# else: #No data for this set for today -# member_set.append((keyw, int(0))) -# member_set.sort(key=lambda tup: tup[1]) -# if len(member_set) > 0: -# if member_set[0][1] < keyword_total_sum: -# #remove min from set and add the new one -# print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' -# server.srem(redis_progression_name_set, member_set[0][0]) -# server.sadd(redis_progression_name_set, keyword) - def compute_provider_info(server, path): redis_all_provider = 'all_provider_set' @@ -100,22 +78,6 @@ def compute_provider_info(server, path): new_avg = float(sum_size) / float(num_paste) server.hset(paste_provider +'_avg', paste_date, new_avg) - ''' - prev_num_paste = server.hget(paste_provider+'_num', paste_date) - if prev_num_paste is not None: - ok = server.hset(paste_provider+'_num', paste_date, int(prev_num_paste)+1) - prev_sum_size = server.hget(paste_provider+'_size', paste_date) - - if prev_sum_size is not None: - ok = server.hset(paste_provider+'_size', paste_date, float(prev_sum_size)+paste_size) - new_avg = (float(prev_sum_size)+paste_size) / (int(prev_num_paste)+1) - else: - ok = server.hset(paste_provider+'_size', paste_date, paste_size) - - else: - ok = server.hset(paste_provider+'_num', paste_date, 1) - prev_num_paste = 0 - ''' # # Compute Most Posted @@ -136,28 +98,6 @@ def compute_provider_info(server, path): server.zrem(redis_avg_size_name_set, member_set[0][0]) server.zadd(redis_avg_size_name_set, float(new_avg), paste_provider) - ''' - if paste_provider not in server.smembers(redis_avg_size_name_set): # if it is already in the set - if (server.scard(redis_avg_size_name_set) < max_set_cardinality): - server.sadd(redis_avg_size_name_set, paste_provider) - - else: #set full capacity - #Check value for all members - member_set = [] - for provider in server.smembers(redis_avg_size_name_set): - curr_avg = 0.0 - curr_size = server.hget(provider+'_size', paste_date) - curr_num = server.hget(provider+'_num', paste_date) - if (curr_size is not None) and (curr_num is not None): - curr_avg = float(curr_size) / float(curr_num) - member_set.append((provider, curr_avg)) - member_set.sort(key=lambda tup: tup[1]) - if member_set[0][1] < new_avg: - #remove min from set and add the new one - print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' - server.srem(redis_avg_size_name_set, member_set[0][0]) - server.sadd(redis_avg_size_name_set, paste_provider) - ''' # Num # if set not full or provider already present @@ -172,27 +112,6 @@ def compute_provider_info(server, path): server.zrem(member_set[0][0]) server.zadd(redis_providers_name_set, float(num_paste), paste_provider) - ''' - if paste_provider not in server.smembers(redis_providers_name_set): # if it is already in the set - if (server.scard(redis_providers_name_set) < max_set_cardinality): - server.sadd(redis_providers_name_set, paste_provider) - - else: #set full capacity - #Check value for all members - member_set = [] - for provider in server.smembers(redis_providers_name_set): - curr_num = 0 - curr_num = server.hget(provider+'_num', paste_date) - if curr_num is not None: - member_set.append((provider, int(curr_num))) - member_set.sort(key=lambda tup: tup[1]) - if len(member_set) > 0: - if member_set[0][1] < int(prev_num_paste)+1: - #remove min from set and add the new one - print 'Num - adding ' +paste_provider+ '(' +str(int(prev_num_paste)+1)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' - server.srem(redis_providers_name_set, member_set[0][0]) - server.sadd(redis_providers_name_set, paste_provider) - ''' if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 83e2e033..97b1a9bb 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -173,6 +173,7 @@ def showpaste(content_range): if content_range != 0: p_content = p_content[0:content_range] + return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list) def getPastebyType(server, module_name): @@ -205,20 +206,6 @@ def get_top_relevant_data(server, module_name): member_set.insert(0, ("passed_days", days)) return member_set -# member_set = [] -# for keyw in server.smembers(redis_progression_name_set): -# redis_progression_name = module_name+'-'+keyw -# keyw_value = server.hget(date ,redis_progression_name) -# keyw_value = keyw_value if keyw_value is not None else 0 -# member_set.append((keyw, int(keyw_value))) -# member_set.sort(key=lambda tup: tup[1], reverse=True) -# if member_set[0][1] == 0: #No data for this date -# days += 1 -# continue -# else: -# member_set.insert(0, ("passed_days", days)) -# return member_set - def Term_getValueOverRange(word, startDate, num_day): passed_days = 0 @@ -357,34 +344,6 @@ def providersChart(): return jsonify(member_set) -''' - # Iterate over element in top_x_set and retreive their value - member_set = [] - for keyw in r_serv_charts.smembers(redis_provider_name_set): - redis_provider_name_size = keyw+'_'+'size' - redis_provider_name_num = keyw+'_'+'num' - keyw_value_size = r_serv_charts.hget(redis_provider_name_size, get_date_range(0)[0]) - keyw_value_size = keyw_value_size if keyw_value_size is not None else 0.0 - keyw_value_num = r_serv_charts.hget(redis_provider_name_num, get_date_range(0)[0]) - - if keyw_value_num is not None: - keyw_value_num = int(keyw_value_num) - else: - if module_name == "size": - keyw_value_num = 10000000000 - else: - keyw_value_num = 0 - if module_name == "size": - member_set.append((keyw, float(keyw_value_size)/float(keyw_value_num))) - else: - member_set.append((keyw, float(keyw_value_num))) - - member_set.sort(key=lambda tup: tup[1], reverse=True) - if len(member_set) == 0: - member_set.append(("No relevant data", float(100))) - return jsonify(member_set) -''' - @app.route("/search", methods=['POST']) def search(): @@ -771,50 +730,6 @@ def terms_plot_top_data(): return jsonify(to_return) - ''' - to_return = [] - for term in r_serv_term.smembers(the_set): - value_range = [] - tot_sum = 0 - for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay): - value = r_serv_term.hget(timestamp, term) - curr_value_range = int(value) if value is not None else 0 - tot_sum += curr_value_range - value_range.append([timestamp, curr_value_range]) - - to_return.append([term, value_range, tot_sum]) - - return jsonify(to_return) - ''' - - -@app.route("/test/") #completely shows the paste in a new tab -def test(): - - server = r_serv_term - array1 = [] - for w in server.smembers('TopTermFreq_set_day'): - val = server.hget('1471564800', w) - val = val if val is not None else 0 - val2 = server.hget('1471478400', w) - val2 = val2 if val2 is not None else 0 - array1.append((w, (int(val), int(val2)))) - -# array2 = [] -# for w in server.smembers('TopTermFreq_set_week'): -# array2.append((w, int(server.hget('1471478400', w)))) - - array1.sort(key=lambda tup: tup[1][0]) - stri = "
"+ e[0] + "\t" + str(e[1]) +"
" -# stri += ""+ e[0] + "\t" + str(e[1]) +"
" - - print stri - return stri - @app.route("/showsavedpaste/") #completely shows the paste in a new tab diff --git a/var/www/static/js/FlexGauge.js b/var/www/static/js/FlexGauge.js index e7db3bd9..3aa2bf7a 100644 --- a/var/www/static/js/FlexGauge.js +++ b/var/www/static/js/FlexGauge.js @@ -3,6 +3,8 @@ * Version: 1.0 * Author: Jeff Millies * Author URI: + * + * Slight modification for better display in Sentiment webpages */ (function ($) { var FlexGauge = function (o) { diff --git a/var/www/templates/important_paste_by_module.html b/var/www/templates/important_paste_by_module.html index fdb732ba..d4e6b5dd 100644 --- a/var/www/templates/important_paste_by_module.html +++ b/var/www/templates/important_paste_by_module.html @@ -115,7 +115,6 @@ $(document).ready(function(){ $('#myTable').on( 'draw.dt', function () { // On click, get html content from url and update the corresponding modal $("[data-toggle='modal']").unbind('click.openmodal').on("click.openmodal", function (event) { - console.log('hi'); event.preventDefault(); var modal=$(this); var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num');