chg: [migration] migrate Screenshots + Trackers + Duplicates + fix zadd zincrby

This commit is contained in:
Terrtia 2022-11-29 16:01:01 +01:00
parent 25a8eb09c0
commit af583939d8
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
24 changed files with 191 additions and 252 deletions

View file

@ -77,16 +77,16 @@ class Duplicates(AbstractModule):
obj_hash = self.algos[algo]['hash']
for date_ymonth in last_month_dates:
if Duplicate.exists_algo_hash_by_month(algo, obj_hash, date_ymonth):
Duplicate.add_obj_duplicate(algo, obj_hash, 100, 'item', '', item.get_id(), date_ymonth)
nb_duplicates +=1
Duplicate.add_duplicate(algo, obj_hash, 100, 'item', '', item.get_id(), date_ymonth)
nb_duplicates += 1
else:
for hash in Duplicate.get_algo_hashs_by_month(algo, date_ymonth):
# # FIXME: try - catch 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash
similarity = Duplicate.get_algo_similarity(algo, obj_hash, hash)
print(f'[{algo}] comparing: {obj_hash} and {hash} similarity: {similarity}') # DEBUG:
print(f'[{algo}] comparing: {obj_hash} and {hash} similarity: {similarity}') # DEBUG:
if similarity >= self.algos[algo]['threshold']:
Duplicate.add_obj_duplicate(algo, hash, similarity, 'item', '', item.get_id(), date_ymonth)
nb_duplicates +=1
Duplicate.add_duplicate(algo, hash, similarity, 'item', '', item.get_id(), date_ymonth)
nb_duplicates += 1
# Save Hashs
Duplicate.save_object_hash(algo, curr_date_ymonth, self.algos[algo]['hash'], item.get_id())

View file

@ -194,12 +194,12 @@ class Global(AbstractModule):
self.redis_logger.warning(f'Global; Incomplete file: {filename}')
print(f'Global; Incomplete file: {filename}')
# save daily stats
self.r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
self.r_stats.zincrby('module:Global:incomplete_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
except OSError:
self.redis_logger.warning(f'Global; Not a gzipped file: {filename}')
print(f'Global; Not a gzipped file: {filename}')
# save daily stats
self.r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
self.r_stats.zincrby('module:Global:invalid_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
return curr_file_content