From bf92a2f22f0aa9174685c34809d0e53946f67f57 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 12 Apr 2019 16:07:40 +0200 Subject: [PATCH] fix: [background update] avoid multiple update on the same key --- OVERVIEW.md | 1 + bin/LAUNCH.sh | 2 + bin/update-background.py | 7 +- update/v1.4/Update-ARDB_Metadata.py | 153 ++-------------------------- update/v1.4/Update-ARDB_Tags.py | 16 ++- update/v1.4/Update.py | 5 + 6 files changed, 27 insertions(+), 157 deletions(-) diff --git a/OVERVIEW.md b/OVERVIEW.md index 37cf3adc..0e0dfc4a 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -66,6 +66,7 @@ Redis and ARDB overview | paste_metadata:**item path** | super_father | **first url crawled** | | | father | **item father** | | | domain | **crawled domain**:**domain port** | +| | screenshot | **screenshot hash** | ##### Set: | Key | Field | diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 5978f0cb..42c71602 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -212,6 +212,8 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x" sleep 0.1 + screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./update-background.py; read x" + sleep 0.1 screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./submit_paste.py; read x" } diff --git a/bin/update-background.py b/bin/update-background.py index 86aed2df..e72e599d 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -5,7 +5,7 @@ Update AIL ============================ -Update AIL clone and fork +Update AIL in the background """ @@ -31,10 +31,7 @@ if __name__ == "__main__": db=cfg.getint("ARDB_DB", "db"), decode_responses=True) - ail_version = r_serv.get('ail:version') - if ail_version is None: - sys.exit(0) - else: + if r_serv.exists('ail:update_v1.5'): if ail_version == 'v1.5': onions_update_status = r_serv.get('v1.5:onions') if onions_update_status is None: diff --git a/update/v1.4/Update-ARDB_Metadata.py b/update/v1.4/Update-ARDB_Metadata.py index 757fdc9a..66462cbc 100755 --- a/update/v1.4/Update-ARDB_Metadata.py +++ b/update/v1.4/Update-ARDB_Metadata.py @@ -106,7 +106,8 @@ if __name__ == '__main__': # update domain port domain = r_serv_metadata.hget(new_item_metadata, 'domain') if domain: - r_serv_metadata.hset(new_item_metadata, 'domain', '{}:80'.format(domain)) + if domain[-3:] ?= ':80': + r_serv_metadata.hset(new_item_metadata, 'domain', '{}:80'.format(domain)) super_father = r_serv_metadata.hget(new_item_metadata, 'super_father') if super_father: if PASTES_FOLDER in super_father: @@ -116,150 +117,18 @@ if __name__ == '__main__': if PASTES_FOLDER in father: r_serv_metadata.hset(new_item_metadata, 'father', father.replace(PASTES_FOLDER, '', 1)) - - - - ###################################################################################################################### - ###################################################################################################################### - ###################################################################################################################### - ###################################################################################################################### - ###################################################################################################################### - ###################################################################################################################### - ''' - - string_keys_to_rename = ['misp_events:{}*'.format(PASTES_FOLDER), 'hive_cases:{}*'.format(PASTES_FOLDER)] - for key_to_rename in string_keys_to_rename: - - keys_to_rename = [] - for key in r_serv_metadata.scan_iter(key_to_rename): - new_key = key.replace(PASTES_FOLDER, '', 1) - keys_to_rename.append( (key, new_key) ) - index = index + 1 - for key, new_key in keys_to_rename: - r_serv_metadata.rename(key, new_key) - - keys_to_rename = None - - set_keys_to_rename = ['tag:{}*'.format(PASTES_FOLDER), 'paste_regular_external_links:{}*'.format(PASTES_FOLDER), 'paste_onion_external_links:{}*'.format(PASTES_FOLDER), 'paste_children:{}*'.format(PASTES_FOLDER)] - for key_to_rename in set_keys_to_rename: - - keys_to_remove = [] - keys_to_rename = [] - for key in r_serv_metadata.scan_iter(key_to_rename): - new_key = key.replace(PASTES_FOLDER, '', 1) - # a set with this key already exist - if r_serv_metadata.exists(new_key): - # save data - for new_key_value in r_serv_metadata.smembers(key): - r_serv_metadata.sadd(new_key, new_key_value) - keys_to_remove.append(key) - else: - keys_to_rename.append( (key, new_key) ) - index = index + 1 - for key in keys_to_remove: - r_serv_metadata.delete(key) - for key, new_key in keys_to_rename: - r_serv_metadata.rename(key, new_key) - - keys_to_remove = None - keys_to_rename = None - - - zset_keys_to_rename = ['nb_seen_hash:*', 'base64_hash:*', 'binary_hash:*'] - for key_to_rename in zset_keys_to_rename: - - keys_to_remove = [] - zkeys_to_remove = [] - keys_to_add = [] - for key in r_serv_metadata.scan_iter(key_to_rename): - temp = [] - for zset_key, value in r_serv_metadata.zscan_iter(key, '*{}*'.format(PASTES_FOLDER)): - new_key = zset_key.replace(PASTES_FOLDER, '', 1) - index = index +1 - temp.append((key, zset_key)) - keys_to_add.append((key, new_key, value)) - if 0 < len(temp) < r_serv_metadata.zcard(key): - zkeys_to_remove.extend(temp) - else: - keys_to_remove.append(key) - for key in keys_to_remove: - r_serv_metadata.delete(key) - for key, zset_key in zkeys_to_remove: - r_serv_metadata.zrem(key, zset_key) - for key, new_key, value in keys_to_add: - r_serv_metadata.zincrby(key, new_key, int(value)) - keys_to_remove = None - zkeys_to_remove = None - keys_to_add = None - - set_keys_to_rename = ['paste_children:*'] - for key_to_rename in set_keys_to_rename: - keys_to_remove = [] - skeys_to_remove = [] - keys_to_add = [] - for key in r_serv_metadata.scan_iter(key_to_rename): - temp = [] - for set_key in r_serv_metadata.sscan_iter(key, '*{}*'.format(PASTES_FOLDER)): - new_key = set_key.replace(PASTES_FOLDER, '', 1) - index = index +1 - temp.append((key, set_key)) - keys_to_add.append((key, new_key)) - if 0 < len(temp) < r_serv_metadata.scard(key): - skeys_to_remove.extend(temp) - else: - keys_to_remove.append(key) - for key in keys_to_remove: - r_serv_metadata.delete(key) - for key, set_key in skeys_to_remove: - r_serv_metadata.srem(key, set_key) - for key, new_key in keys_to_add: - r_serv_metadata.sadd(key, new_key) - keys_to_remove = None - skeys_to_remove = None - keys_to_add = None - - hset_keys_to_rename = ['paste_metadata:{}*'.format(PASTES_FOLDER)] - for key_to_rename in hset_keys_to_rename: - - keys_to_rename = [] - for key in r_serv_metadata.scan_iter(key_to_rename): - new_key = key.replace(PASTES_FOLDER, '', 1) - # a hset with this key already exist - if r_serv_metadata.exists(new_key): - pass - else: - keys_to_rename.append((key, new_key)) - index = index + 1 - for key, new_key in keys_to_rename: - r_serv_metadata.rename(key, new_key) - keys_to_rename = None - - # to verify 120/100 try with scan - hset_keys_to_rename = ['paste_metadata:*'] - for key_to_rename in hset_keys_to_rename: - for key in r_serv_metadata.scan_iter(key_to_rename): - father = r_serv_metadata.hget(key, 'father') - super_father = r_serv_metadata.hget(key, 'super_father') - - if father: - if PASTES_FOLDER in father: - index = index + 1 - r_serv_metadata.hdel(key, 'father') - r_serv_metadata.hset(key, 'father', father.replace(PASTES_FOLDER, '', 1)) - - if super_father: - if PASTES_FOLDER in super_father: - index = index + 1 - r_serv_metadata.hdel(key, 'super_father') - r_serv_metadata.hset(key, 'super_father', super_father.replace(PASTES_FOLDER, '', 1)) - - keys_to_rename = None - ''' - - end = time.time() print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start)) print() r_serv.set('v1.5:metadata', 1) + + ## + #Key, Dynamic Update + ## + #paste_children + #nb_seen_hash, base64_hash, binary_hash + #paste_onion_external_links + #misp_events, hive_cases + ## diff --git a/update/v1.4/Update-ARDB_Tags.py b/update/v1.4/Update-ARDB_Tags.py index 9928ec77..2e100bf7 100755 --- a/update/v1.4/Update-ARDB_Tags.py +++ b/update/v1.4/Update-ARDB_Tags.py @@ -7,14 +7,6 @@ import time import redis import configparser -def tags_key_fusion(old_item_path_key, new_item_path_key): - print('fusion:') - print(old_item_path_key) - print(new_item_path_key) - for tag in r_serv_metadata.smembers(old_item_path_key): - r_serv_metadata.sadd(new_item_path_key, tag) - r_serv_metadata.srem(old_item_path_key, tag) - if __name__ == '__main__': start_deb = time.time() @@ -109,8 +101,12 @@ if __name__ == '__main__': # update metadata last_seen if item_date > tag_metadata[tag]['last_seen']: tag_metadata[tag]['last_seen'] = item_date - r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date) - + last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen') + if last_seen_db: + if item_date > int(last_seen_db): + r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date) + else: + tag_metadata[tag]['last_seen'] = last_seen_db r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path) r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1) diff --git a/update/v1.4/Update.py b/update/v1.4/Update.py index c4b5fdfe..126b30a0 100755 --- a/update/v1.4/Update.py +++ b/update/v1.4/Update.py @@ -5,6 +5,7 @@ import os import sys import time import redis +import datetime import configparser if __name__ == '__main__': @@ -56,5 +57,9 @@ if __name__ == '__main__': #Set current ail version r_serv.set('ail:version', 'v1.5') + r_serv.set('ail:update_v1.5', 1) + + #Set current ail version + r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d")) print('Done in {} s'.format(end - start_deb))