mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-27 00:07:16 +00:00
Fixed bug with redis where the inserted key was too long, causing a crash + fixed bug taking min instead of max in terms-top-set
This commit is contained in:
parent
2466f355ab
commit
003c72bd7c
3 changed files with 11 additions and 15 deletions
|
@ -55,7 +55,7 @@ def check_if_tracked_term(term, path):
|
||||||
if term in TrackedTermsSet_Name:
|
if term in TrackedTermsSet_Name:
|
||||||
#add_paste to tracked_word_set
|
#add_paste to tracked_word_set
|
||||||
set_name = "tracked_" + term
|
set_name = "tracked_" + term
|
||||||
server.sadd(set_name, path)
|
server_term.sadd(set_name, path)
|
||||||
p.populate_set_out("New Term added", 'CurveManageTopSets')
|
p.populate_set_out("New Term added", 'CurveManageTopSets')
|
||||||
|
|
||||||
|
|
||||||
|
@ -113,7 +113,7 @@ if __name__ == "__main__":
|
||||||
temp = filename.split('/')
|
temp = filename.split('/')
|
||||||
date = temp[-4] + temp[-3] + temp[-2]
|
date = temp[-4] + temp[-3] + temp[-2]
|
||||||
timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0))
|
timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0))
|
||||||
top_termFreq_setName_day[0] += str(timestamp)
|
curr_set = top_termFreq_setName_day[0] + str(timestamp)
|
||||||
|
|
||||||
|
|
||||||
low_word = word.lower()
|
low_word = word.lower()
|
||||||
|
@ -123,8 +123,8 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
# Update redis
|
# Update redis
|
||||||
curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score)))
|
curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score)))
|
||||||
if low_word not in server.smembers(BlackListTermsSet_Name):
|
if low_word not in server_term.smembers(BlackListTermsSet_Name):
|
||||||
server.zincrby(top_termFreq_setName_day[0], int(score), low_word)
|
server_term.zincrby(curr_set, low_word, float(score))
|
||||||
|
|
||||||
#Add more info for tracked terms
|
#Add more info for tracked terms
|
||||||
check_if_tracked_term(low_word, filename)
|
check_if_tracked_term(low_word, filename)
|
||||||
|
|
|
@ -52,15 +52,14 @@ def manage_top_set():
|
||||||
# Retreive top data (2*max_card) from days sets
|
# Retreive top data (2*max_card) from days sets
|
||||||
for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay):
|
for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay):
|
||||||
curr_set = top_termFreq_setName_day[0] + str(timestamp)
|
curr_set = top_termFreq_setName_day[0] + str(timestamp)
|
||||||
print top_termFreq_setName_day[0]
|
array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality*2)
|
||||||
array_top_day = server_term.zrangebyscore(curr_set, '-inf', '+inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality*2)
|
|
||||||
|
|
||||||
print array_top_day
|
|
||||||
for word, value in array_top_day:
|
for word, value in array_top_day:
|
||||||
if word in dico.keys():
|
if word not in server_term.smembers(BlackListTermsSet_Name):
|
||||||
dico[word] += value
|
if word in dico.keys():
|
||||||
else:
|
dico[word] += value
|
||||||
dico[word] = value
|
else:
|
||||||
|
dico[word] = value
|
||||||
|
|
||||||
if timestamp == startDate - num_day_week*oneDay:
|
if timestamp == startDate - num_day_week*oneDay:
|
||||||
dico_week = copy.deepcopy(dico)
|
dico_week = copy.deepcopy(dico)
|
||||||
|
@ -78,9 +77,6 @@ def manage_top_set():
|
||||||
array_week.sort(key=lambda tup: -tup[1])
|
array_week.sort(key=lambda tup: -tup[1])
|
||||||
array_week = array_week[0:20]
|
array_week = array_week[0:20]
|
||||||
|
|
||||||
print array_month
|
|
||||||
print array_week
|
|
||||||
|
|
||||||
# suppress every terms in top sets
|
# suppress every terms in top sets
|
||||||
for curr_set, curr_num_day in top_termFreq_set_array[1:3]:
|
for curr_set, curr_num_day in top_termFreq_set_array[1:3]:
|
||||||
for w in server_term.zrange(curr_set, 0, -1):
|
for w in server_term.zrange(curr_set, 0, -1):
|
||||||
|
|
|
@ -760,7 +760,7 @@ def terms_plot_top_data():
|
||||||
if the_set == "TopTermFreq_set_day":
|
if the_set == "TopTermFreq_set_day":
|
||||||
the_set += "_" + str(today_timestamp)
|
the_set += "_" + str(today_timestamp)
|
||||||
|
|
||||||
for term, tot_value in r_serv_term.zrangebyscore(the_set, '-inf', '+inf', withscores=True, start=0, num=20):
|
for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20):
|
||||||
value_range = []
|
value_range = []
|
||||||
for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay):
|
for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay):
|
||||||
value = r_serv_term.hget(timestamp, term)
|
value = r_serv_term.hget(timestamp, term)
|
||||||
|
|
Loading…
Reference in a new issue