chg: [DB Migration] UI: Extract + highlight leaks and trackers match, Data Retention save object first/last date, Refactor Tools

This commit is contained in:
Terrtia 2022-12-19 16:38:20 +01:00
parent f9715408be
commit bf71c9ba99
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
26 changed files with 883 additions and 873 deletions

View file

@ -71,7 +71,7 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
if r_set:
all_items = r_serv_cache.smembers(redis_key)
else:
all_items = r_serv_cache.lrange(redis_key, 0 ,-1)
all_items = r_serv_cache.lrange(redis_key, 0, -1)
r_serv_cache.delete(redis_key)
proc.terminate()
return all_items
@ -80,29 +80,66 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
proc.terminate()
sys.exit(0)
def _regex_search(redis_key, regex, item_content):
first_occ = regex.search(item_content)
if first_occ:
r_serv_cache.set(redis_key, first_occ)
def _regex_finditer(r_key, regex, content):
iterator = re.finditer(regex, content)
for match in iterator:
value = match.group()
start = match.start()
end = match.end()
r_serv_cache.rpush(r_key, f'{start}:{end}:{value}')
r_serv_cache.expire(r_key, 360)
def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30):
proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, ))
def regex_finditer(r_key, regex, item_id, content, max_time=30):
proc = Proc(target=_regex_finditer, args=(r_key, regex, content))
try:
proc.start()
proc.join(max_time)
if proc.is_alive():
proc.terminate()
Statistics.incr_module_timeout_statistic(module_name)
err_mess = f"{module_name}: processing timeout: {item_id}"
Statistics.incr_module_timeout_statistic(r_key)
err_mess = f"{r_key}: processing timeout: {item_id}"
print(err_mess)
publisher.info(err_mess)
return None
return []
else:
first_occ = r_serv_cache.get(redis_key)
r_serv_cache.delete(redis_key)
res = r_serv_cache.lrange(r_key, 0, -1)
r_serv_cache.delete(r_key)
proc.terminate()
return first_occ
all_match = []
for match in res:
start, end, value = match.split(':', 2)
all_match.append((int(start), int(end), value))
return all_match
except KeyboardInterrupt:
print("Caught KeyboardInterrupt, terminating workers")
print("Caught KeyboardInterrupt, terminating regex worker")
proc.terminate()
sys.exit(0)
def _regex_search(r_key, regex, content):
if re.search(regex, content):
r_serv_cache.set(r_key, 1)
r_serv_cache.expire(r_key, 360)
def regex_search(r_key, regex, item_id, content, max_time=30):
proc = Proc(target=_regex_search, args=(r_key, regex, content))
try:
proc.start()
proc.join(max_time)
if proc.is_alive():
proc.terminate()
Statistics.incr_module_timeout_statistic(r_key)
err_mess = f"{r_key}: processing timeout: {item_id}"
print(err_mess)
publisher.info(err_mess)
return False
else:
if r_serv_cache.exists(r_key):
r_serv_cache.delete(r_key)
return True
else:
r_serv_cache.delete(r_key)
return False
except KeyboardInterrupt:
print("Caught KeyboardInterrupt, terminating regex worker")
proc.terminate()
sys.exit(0)