Merge branch 'website-improvement' into production

This commit is contained in:
Mokaddem 2016-12-08 10:05:55 +01:00
commit d3ed1bbf40
4 changed files with 77 additions and 29 deletions

View file

@ -45,13 +45,14 @@ def manage_top_set():
startDate = datetime.datetime.now()
startDate = startDate.replace(hour=0, minute=0, second=0, microsecond=0)
startDate = calendar.timegm(startDate.timetuple())
blacklist_size = int(server_term.scard(BlackListTermsSet_Name))
dico = {}
# Retreive top data (2*max_card) from days sets
# Retreive top data (max_card + blacklist_size) from days sets
for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay):
curr_set = top_termFreq_setName_day[0] + str(timestamp)
array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality*2)
array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size)
for word, value in array_top_day:
if word not in server_term.smembers(BlackListTermsSet_Name):

View file

@ -38,35 +38,55 @@ def get_date_range(num_day):
date_list.append(date.substract_day(i))
return date_list
# Compute the progression for one keyword
def compute_progression_word(keyword):
date_range = get_date_range(num_day)
# check if this keyword is eligible for progression
keyword_total_sum = 0
value_list = []
for date in date_range: # get value up to date_range
curr_value = server.hget(keyword, date)
value_list.append(int(curr_value if curr_value is not None else 0))
keyword_total_sum += int(curr_value) if curr_value is not None else 0
oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division
# The progression is based on the ratio: value[i] / value[i-1]
keyword_increase = 0
value_list_reversed = value_list[:]
value_list_reversed.reverse()
for i in range(1, len(value_list_reversed)):
divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1
keyword_increase += value_list_reversed[i] / divisor
return (keyword_increase, keyword_total_sum)
'''
recompute the set top_progression zset
- Compute the current field progression
- re-compute the current progression for each first 2*max_set_cardinality fields in the top_progression_zset
'''
def compute_progression(server, field_name, num_day, url_parsed):
redis_progression_name = 'top_progression_'+field_name
redis_progression_name_set = 'top_progression_'+field_name+'_set'
redis_progression_name_set = "z_top_progression_"+field_name
keyword = url_parsed[field_name]
if keyword is not None:
date_range = get_date_range(num_day)
# check if this keyword is eligible for progression
keyword_total_sum = 0
value_list = []
for date in date_range: # get value up to date_range
curr_value = server.hget(keyword, date)
value_list.append(int(curr_value if curr_value is not None else 0))
keyword_total_sum += int(curr_value) if curr_value is not None else 0
oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division
#compute the progression of the current word
keyword_increase, keyword_total_sum = compute_progression_word(keyword)
# The progression is based on the ratio: value[i] / value[i-1]
keyword_increase = 0
value_list_reversed = value_list[:]
value_list_reversed.reverse()
for i in range(1, len(value_list_reversed)):
divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1
keyword_increase += value_list_reversed[i] / divisor
#re-compute the progression of 2*max_set_cardinality
current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality)
for word, value in array_top_day:
word_inc, word_tot_sum = compute_progression_word(word)
server.zrem(redis_progression_name_set, word)
if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase):
server.zadd(redis_progression_name_set, float(word_inc), word)
# filter
# filter before adding
if (keyword_total_sum > threshold_total_sum) and (keyword_increase > threshold_increase):
server.zadd(redis_progression_name_set, float(keyword_increase), keyword)
server.zadd("z_top_progression_"+field_name, float(keyword_increase), keyword)
if __name__ == '__main__':

View file

@ -772,6 +772,7 @@ def terms_plot_tool_data():
value = r_serv_term.hget(timestamp, term)
curr_value_range = int(value) if value is not None else 0
value_range.append([timestamp, curr_value_range])
value_range.insert(0,term)
return jsonify(value_range)

View file

@ -72,7 +72,7 @@
</div>
<div class="panel-body">
<div aria-disabled="false" class="slider sliderRange sliderBlue ui-slider ui-slider-horizontal ui-widget ui-widget-content ui-corner-all" style="margin-bottom: 5px;"></div>
<strong>Date:</strong> <input type="text" id="amount" readonly style="border:0; color:#f6931f; font-weight:bold;">
<strong>Date:</strong> <input type="text" id="amount" readonly style="border:0; color:#f6931f; font-weight:bold;"> <button id="plot-month" onclick="replot();" class="btn btn-info" style="float: right;">Replot</button>
<div class="form-group input-group" style="margin-top: 30px;">
<span class="input-group-addon"><span class="glyphicon glyphicon-stats"></span></span>
@ -168,6 +168,7 @@
<script>
var plot;
var graph_data = [];
var plotted_terms = [];
var graph_options = {
series: {
lines: {
@ -198,6 +199,7 @@ function plotData() {
$('#plot-btn-add').show("fast");
var curthis = $(this);
var term = $('#TermInput').val();
plotted_terms = [term]
var range_start = new Date($( ".sliderRange" ).slider( "values", 0 )).getTime() / 1000;
var range_end = new Date($( ".sliderRange" ).slider( "values", 1 )).getTime() / 1000;
@ -205,8 +207,8 @@ function plotData() {
graph_data = [];
var to_plot = [];
var curr_data = [];
for(i=0; i<data.length; i++) {
curr_data.push([data[i][0]*1000, data[i][1]]);
for(i=1; i<data.length; i++) {
curr_data.push([data[i][0]*1000, data[i][1]]);
}
to_plot.push({ data: curr_data, label: term});
graph_data.push({ data: curr_data, label: term});
@ -234,22 +236,46 @@ function plotData() {
function addData() {
var curthis = $(this);
var term = $('#TermInput').val();
plotted_terms.push(term)
var range_start = new Date($( ".sliderRange" ).slider( "values", 0 )).getTime() / 1000;
var range_end = new Date($( ".sliderRange" ).slider( "values", 1 )).getTime() / 1000;
$.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term }, function(data, status){
var to_plot = [];
var curr_data = [];
for(i=0; i<data.length; i++) {
for(i=1; i<data.length; i++) {
curr_data.push([data[i][0]*1000, data[i][1]]);
}
to_plot.push({ data: curr_data, label: term});
graph_data.push({ data: curr_data, label: term});
plot = $.plot($("#graph"), graph_data, graph_options);
$("#TermInput").val("");
})
}
function replot() {
graph_data = [];
promises = [];
for(i=0; i<plotted_terms.length; i++) {
var term = plotted_terms[i];
var range_start = new Date($( ".sliderRange" ).slider( "values", 0 )).getTime() / 1000;
var range_end = new Date($( ".sliderRange" ).slider( "values", 1 )).getTime() / 1000;
promises.push($.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term }, function(data, status){
var curr_data = [];
for(i=1; i<data.length; i++) {
curr_data.push([data[i][0]*1000, data[i][1]]);
}
graph_data.push({ data: curr_data, label: data[0]});
$("#TermInput").val("");
}))
}
$.when.apply($, promises).done( function () {
plot = $.plot($("#graph"), graph_data, graph_options);
});
}
</script>