Webstats should correctly updates top_progression_zset (Not fully tested because not enough data. Will be tested latter)

This commit is contained in:
Mokaddem 2016-12-08 10:05:07 +01:00
parent 570324060e
commit 73d4f9e082

View file

@ -38,14 +38,9 @@ def get_date_range(num_day):
date_list.append(date.substract_day(i)) date_list.append(date.substract_day(i))
return date_list return date_list
def compute_progression(server, field_name, num_day, url_parsed): # Compute the progression for one keyword
redis_progression_name = 'top_progression_'+field_name def compute_progression_word(keyword):
redis_progression_name_set = 'top_progression_'+field_name+'_set'
keyword = url_parsed[field_name]
if keyword is not None:
date_range = get_date_range(num_day) date_range = get_date_range(num_day)
# check if this keyword is eligible for progression # check if this keyword is eligible for progression
keyword_total_sum = 0 keyword_total_sum = 0
value_list = [] value_list = []
@ -63,10 +58,35 @@ def compute_progression(server, field_name, num_day, url_parsed):
divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1 divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1
keyword_increase += value_list_reversed[i] / divisor keyword_increase += value_list_reversed[i] / divisor
# filter return (keyword_increase, keyword_total_sum)
if (keyword_total_sum > threshold_total_sum) and (keyword_increase > threshold_increase):
'''
recompute the set top_progression zset
- Compute the current field progression
- re-compute the current progression for each first 2*max_set_cardinality fields in the top_progression_zset
'''
def compute_progression(server, field_name, num_day, url_parsed):
redis_progression_name_set = "z_top_progression_"+field_name
keyword = url_parsed[field_name]
if keyword is not None:
#compute the progression of the current word
keyword_increase, keyword_total_sum = compute_progression_word(keyword)
#re-compute the progression of 2*max_set_cardinality
current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality)
for word, value in array_top_day:
word_inc, word_tot_sum = compute_progression_word(word)
server.zrem(redis_progression_name_set, word)
if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase):
server.zadd(redis_progression_name_set, float(word_inc), word)
# filter before adding
if (keyword_total_sum > threshold_total_sum) and (keyword_increase > threshold_increase):
server.zadd(redis_progression_name_set, float(keyword_increase), keyword)
server.zadd("z_top_progression_"+field_name, float(keyword_increase), keyword)
if __name__ == '__main__': if __name__ == '__main__':