From 41e6b4ec590668f048edc21c3a5d896e8fc2f1f2 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 25 Apr 2019 14:39:38 +0200 Subject: [PATCH] fix: [v1.5 background update screenshot] remove duplicate files --- bin/update-background.py | 10 +- update/v1.4/Update.py | 68 ------ update/v1.4/Update.sh | 60 ------ update/{v1.4 => v1.5}/Update-ARDB_Metadata.py | 0 update/{v1.4 => v1.5}/Update-ARDB_Onions.py | 0 .../Update-ARDB_Onions_screenshots.py | 2 + update/{v1.4 => v1.5}/Update-ARDB_Tags.py | 0 .../Update-ARDB_Tags_background.py | 0 update/v1.5/Update.py | 203 ++---------------- update/v1.5/Update.sh | 59 +++-- 10 files changed, 67 insertions(+), 335 deletions(-) delete mode 100755 update/v1.4/Update.py delete mode 100755 update/v1.4/Update.sh rename update/{v1.4 => v1.5}/Update-ARDB_Metadata.py (100%) rename update/{v1.4 => v1.5}/Update-ARDB_Onions.py (100%) rename update/{v1.4 => v1.5}/Update-ARDB_Onions_screenshots.py (98%) rename update/{v1.4 => v1.5}/Update-ARDB_Tags.py (100%) rename update/{v1.4 => v1.5}/Update-ARDB_Tags_background.py (100%) diff --git a/bin/update-background.py b/bin/update-background.py index 75f14e16..96a1258a 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -36,22 +36,22 @@ if __name__ == "__main__": r_serv.set('ail:update_in_progress', 'v1.5') r_serv.set('ail:current_background_update', 'v1.5') if not r_serv.sismember('ail:update_v1.5', 'onions'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.4', 'Update-ARDB_Onions.py') + update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Onions.py') process = subprocess.run(['python' ,update_file]) if not r_serv.sismember('ail:update_v1.5', 'metadata'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.4', 'Update-ARDB_Metadata.py') + update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Metadata.py') process = subprocess.run(['python' ,update_file]) if not r_serv.sismember('ail:update_v1.5', 'tags'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.4', 'Update-ARDB_Tags.py') + update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Tags.py') process = subprocess.run(['python' ,update_file]) if not r_serv.sismember('ail:update_v1.5', 'tags_background'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.4', 'Update-ARDB_Tags_background.py') + update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Tags_background.py') process = subprocess.run(['python' ,update_file]) if not r_serv.sismember('ail:update_v1.5', 'crawled_screenshot'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.4', 'Update-ARDB_Onions_screenshots.py') + update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Onions_screenshots.py') process = subprocess.run(['python' ,update_file]) if r_serv.scard('ail:update_v1.5') != 5: r_serv.set('ail:update_error', 'Update v1.5 Failed, please relaunch the bin/update-background.py script') diff --git a/update/v1.4/Update.py b/update/v1.4/Update.py deleted file mode 100755 index af8800cf..00000000 --- a/update/v1.4/Update.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import redis -import datetime -import configparser - -if __name__ == '__main__': - - start_deb = time.time() - - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' - - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) - - r_serv_onion = redis.StrictRedis( - host=cfg.get("ARDB_Onion", "host"), - port=cfg.getint("ARDB_Onion", "port"), - db=cfg.getint("ARDB_Onion", "db"), - decode_responses=True) - - print() - print('Updating ARDB_Onion ...') - index = 0 - start = time.time() - - # update crawler queue - for elem in r_serv_onion.smembers('onion_crawler_queue'): - if PASTES_FOLDER in elem: - r_serv_onion.srem('onion_crawler_queue', elem) - r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1)) - index = index +1 - for elem in r_serv_onion.smembers('onion_crawler_priority_queue'): - if PASTES_FOLDER in elem: - r_serv_onion.srem('onion_crawler_queue', elem) - r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1)) - index = index +1 - - end = time.time() - print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start)) - print() - - #Set current ail version - r_serv.set('ail:version', 'v1.5') - - #Set current update_in_progress - r_serv.set('ail:update_in_progress', 'v1.5') - r_serv.set('ail:current_background_update', 'v1.5') - - #Set current ail version - r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d")) - - print('Done in {} s'.format(end - start_deb)) diff --git a/update/v1.4/Update.sh b/update/v1.4/Update.sh deleted file mode 100755 index 099ce2cd..00000000 --- a/update/v1.4/Update.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -k & -wait - -echo "" -bash -c "bash ${AIL_HOME}/update/bin/Update_Redis.sh" -#bash -c "bash ${AIL_HOME}/update/bin/Update_ARDB.sh" - -echo "" -echo -e $GREEN"Update DomainClassifier"$DEFAULT -echo "" -pip3 install --upgrade --force-reinstall git+https://github.com/D4-project/BGP-Ranking.git/@28013297efb039d2ebbce96ee2d89493f6ae56b0#subdirectory=client&egg=pybgpranking -pip3 install --upgrade --force-reinstall git+https://github.com/adulau/DomainClassifier.git -wait -echo "" - -echo "" -echo -e $GREEN"Update Web thirdparty"$DEFAULT -echo "" -bash ${AIL_FLASK}update_thirdparty.sh & -wait -echo "" - -bash ${AIL_BIN}LAUNCH.sh -lav & -wait -echo "" - -echo "" -echo -e $GREEN"Fixing ARDB ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v1.4/Update.py & -wait -echo "" -echo "" - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -k & -wait - -echo "" - -exit 0 diff --git a/update/v1.4/Update-ARDB_Metadata.py b/update/v1.5/Update-ARDB_Metadata.py similarity index 100% rename from update/v1.4/Update-ARDB_Metadata.py rename to update/v1.5/Update-ARDB_Metadata.py diff --git a/update/v1.4/Update-ARDB_Onions.py b/update/v1.5/Update-ARDB_Onions.py similarity index 100% rename from update/v1.4/Update-ARDB_Onions.py rename to update/v1.5/Update-ARDB_Onions.py diff --git a/update/v1.4/Update-ARDB_Onions_screenshots.py b/update/v1.5/Update-ARDB_Onions_screenshots.py similarity index 98% rename from update/v1.4/Update-ARDB_Onions_screenshots.py rename to update/v1.5/Update-ARDB_Onions_screenshots.py index 6b39a66b..5aa3cf0f 100755 --- a/update/v1.4/Update-ARDB_Onions_screenshots.py +++ b/update/v1.5/Update-ARDB_Onions_screenshots.py @@ -100,6 +100,8 @@ if __name__ == '__main__': os.makedirs(dirname) if not os.path.exists(filename_img): os.rename(img_path, filename_img) + else: + os.remove(img_path) item = os.path.join('crawled', date[0:4], date[4:6], date[6:8], file[:-4]) # add item metadata diff --git a/update/v1.4/Update-ARDB_Tags.py b/update/v1.5/Update-ARDB_Tags.py similarity index 100% rename from update/v1.4/Update-ARDB_Tags.py rename to update/v1.5/Update-ARDB_Tags.py diff --git a/update/v1.4/Update-ARDB_Tags_background.py b/update/v1.5/Update-ARDB_Tags_background.py similarity index 100% rename from update/v1.4/Update-ARDB_Tags_background.py rename to update/v1.5/Update-ARDB_Tags_background.py diff --git a/update/v1.5/Update.py b/update/v1.5/Update.py index 6a75b47a..af8800cf 100755 --- a/update/v1.5/Update.py +++ b/update/v1.5/Update.py @@ -5,6 +5,7 @@ import os import sys import time import redis +import datetime import configparser if __name__ == '__main__': @@ -21,16 +22,10 @@ if __name__ == '__main__': PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' - r_serv_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) - - r_serv_tag = redis.StrictRedis( - host=cfg.get("ARDB_Tags", "host"), - port=cfg.getint("ARDB_Tags", "port"), - db=cfg.getint("ARDB_Tags", "db"), + r_serv = redis.StrictRedis( + host=cfg.get("ARDB_DB", "host"), + port=cfg.getint("ARDB_DB", "port"), + db=cfg.getint("ARDB_DB", "db"), decode_responses=True) r_serv_onion = redis.StrictRedis( @@ -39,189 +34,35 @@ if __name__ == '__main__': db=cfg.getint("ARDB_Onion", "db"), decode_responses=True) - ## Update metadata ## - print('Updating ARDB_Metadata ...') - index = 0 - start = time.time() - - string_keys_to_rename = ['misp_events:{}*'.format(PASTES_FOLDER), 'hive_cases:{}*'.format(PASTES_FOLDER)] - for key_to_rename in string_keys_to_rename: - - keys_to_rename = [] - for key in r_serv_metadata.scan_iter(key_to_rename): - new_key = key.replace(PASTES_FOLDER, '', 1) - keys_to_rename.append( (key, new_key) ) - index = index + 1 - for key, new_key in keys_to_rename: - r_serv_metadata.rename(key, new_key) - - keys_to_rename = None - - set_keys_to_rename = ['tag:{}*'.format(PASTES_FOLDER), 'hash_paste:{}*'.format(PASTES_FOLDER), 'base64_paste:{}*'.format(PASTES_FOLDER), 'binary_paste:{}*'.format(PASTES_FOLDER), 'hexadecimal_paste:{}*'.format(PASTES_FOLDER), 'paste_regular_external_links:{}*'.format(PASTES_FOLDER), 'paste_onion_external_links:{}*'.format(PASTES_FOLDER), 'paste_children:{}*'.format(PASTES_FOLDER)] - for key_to_rename in set_keys_to_rename: - - keys_to_remove = [] - keys_to_rename = [] - for key in r_serv_metadata.scan_iter(key_to_rename): - new_key = key.replace(PASTES_FOLDER, '', 1) - # a set with this key already exist - if r_serv_metadata.exists(new_key): - # save data - for new_key_value in r_serv_metadata.smembers(key): - r_serv_metadata.sadd(new_key, new_key_value) - keys_to_remove.append(key) - else: - keys_to_rename.append( (key, new_key) ) - index = index + 1 - for key in keys_to_remove: - r_serv_metadata.delete(key) - for key, new_key in keys_to_rename: - r_serv_metadata.rename(key, new_key) - - keys_to_remove = None - keys_to_rename = None - - - zset_keys_to_rename = ['nb_seen_hash:*', 'base64_hash:*', 'binary_hash:*'] - for key_to_rename in zset_keys_to_rename: - - keys_to_remove = [] - zkeys_to_remove = [] - keys_to_add = [] - for key in r_serv_metadata.scan_iter(key_to_rename): - temp = [] - for zset_key, value in r_serv_metadata.zscan_iter(key, '*{}*'.format(PASTES_FOLDER)): - new_key = zset_key.replace(PASTES_FOLDER, '', 1) - index = index +1 - temp.append((key, zset_key)) - keys_to_add.append((key, new_key, value)) - if 0 < len(temp) < r_serv_metadata.zcard(key): - zkeys_to_remove.extend(temp) - else: - keys_to_remove.append(key) - for key in keys_to_remove: - r_serv_metadata.delete(key) - for key, zset_key in zkeys_to_remove: - r_serv_metadata.zrem(key, zset_key) - for key, new_key, value in keys_to_add: - r_serv_metadata.zincrby(key, new_key, int(value)) - keys_to_remove = None - zkeys_to_remove = None - keys_to_add = None - - set_keys_to_rename = ['paste_children:*'] - for key_to_rename in set_keys_to_rename: - keys_to_remove = [] - skeys_to_remove = [] - keys_to_add = [] - for key in r_serv_metadata.scan_iter(key_to_rename): - temp = [] - for set_key in r_serv_metadata.sscan_iter(key, '*{}*'.format(PASTES_FOLDER)): - new_key = set_key.replace(PASTES_FOLDER, '', 1) - index = index +1 - temp.append((key, set_key)) - keys_to_add.append((key, new_key)) - if 0 < len(temp) < r_serv_metadata.scard(key): - skeys_to_remove.extend(temp) - else: - keys_to_remove.append(key) - for key in keys_to_remove: - r_serv_metadata.delete(key) - for key, set_key in skeys_to_remove: - r_serv_metadata.srem(key, set_key) - for key, new_key in keys_to_add: - r_serv_metadata.sadd(key, new_key) - keys_to_remove = None - skeys_to_remove = None - keys_to_add = None - - hset_keys_to_rename = ['paste_metadata:{}*'.format(PASTES_FOLDER)] - for key_to_rename in hset_keys_to_rename: - - keys_to_rename = [] - for key in r_serv_metadata.scan_iter(key_to_rename): - new_key = key.replace(PASTES_FOLDER, '', 1) - # a hset with this key already exist - keys_to_rename.append((key, new_key)) - index = index + 1 - for key, new_key in keys_to_rename: - r_serv_metadata.rename(key, new_key) - keys_to_rename = None - - # to verify 120/100 try with scan - hset_keys_to_rename = ['paste_metadata:*'] - for key_to_rename in hset_keys_to_rename: - for key in r_serv_metadata.scan_iter(key_to_rename): - father = r_serv_metadata.hget(key, 'father') - super_father = r_serv_metadata.hget(key, 'super_father') - - if father: - if PASTES_FOLDER in father: - index = index + 1 - r_serv_metadata.hdel(key, 'father') - r_serv_metadata.hset(key, 'father', father.replace(PASTES_FOLDER, '', 1)) - - if super_father: - if PASTES_FOLDER in super_father: - index = index + 1 - r_serv_metadata.hdel(key, 'super_father') - r_serv_metadata.hset(key, 'super_father', super_father.replace(PASTES_FOLDER, '', 1)) - - keys_to_rename = None - - - end = time.time() - - print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start)) - - print() - print('Updating ARDB_Tags ...') - index = 0 - start = time.time() - - tags_list = r_serv_tag.smembers('list_tags') - for tag in tags_list: - res = False - - list_pastes = r_serv_tag.sscan(tag, 0, '*{}*'.format(PASTES_FOLDER), 1000) - while list_pastes[1]: - for paste in list_pastes[1]: - r_serv_tag.srem(tag, paste) - r_serv_tag.sadd(tag, paste.replace(PASTES_FOLDER, '', 1)) - index = index + 1 - - list_pastes = r_serv_tag.sscan(tag, 0, '*{}*'.format(PASTES_FOLDER), 1000) - - end = time.time() - print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start)) - print() print('Updating ARDB_Onion ...') index = 0 start = time.time() - hset_keys_to_rename = ['onion_metadata:*'] - for key_to_rename in hset_keys_to_rename: - for key in r_serv_onion.scan_iter(key_to_rename): - list_data = r_serv_onion.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - while list_data[1]: - for hash_key, value in list_data[1].items(): - r_serv_onion.hdel(key, hash_key) - new_hash = hash_key.replace(PASTES_FOLDER, '', 1) - new_value = value.replace(PASTES_FOLDER, '', 1) - index = index +1 - r_serv_onion.hset(key, new_hash, new_value) - - list_data = r_serv_onion.hscan(key, 0, '*{}*'.format(PASTES_FOLDER), 1000) - + # update crawler queue for elem in r_serv_onion.smembers('onion_crawler_queue'): if PASTES_FOLDER in elem: r_serv_onion.srem('onion_crawler_queue', elem) r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1)) index = index +1 - + for elem in r_serv_onion.smembers('onion_crawler_priority_queue'): + if PASTES_FOLDER in elem: + r_serv_onion.srem('onion_crawler_queue', elem) + r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1)) + index = index +1 end = time.time() print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start)) print() + + #Set current ail version + r_serv.set('ail:version', 'v1.5') + + #Set current update_in_progress + r_serv.set('ail:update_in_progress', 'v1.5') + r_serv.set('ail:current_background_update', 'v1.5') + + #Set current ail version + r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d")) + print('Done in {} s'.format(end - start_deb)) diff --git a/update/v1.5/Update.sh b/update/v1.5/Update.sh index 3cc45f01..099ce2cd 100755 --- a/update/v1.5/Update.sh +++ b/update/v1.5/Update.sh @@ -12,31 +12,48 @@ export PATH=$AIL_ARDB:$PATH export PATH=$AIL_BIN:$PATH export PATH=$AIL_FLASK:$PATH -echo "Killing all screens ..." -bash -c "bash ${AIL_BIN}/LAUNCH.sh -k" -echo "" -echo "Starting ARDB ..." -bash -c "bash ${AIL_BIN}/launch_ardb.sh" +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" -flag_ardb=true -while $flag_ardb; do - sleep 1 - bash -c "bash ${AIL_BIN}/check_ardb.sh" - if [ $? == 0 ]; then - flag_ardb=false - else - echo "ARDB not available, waiting 5s before retry" - sleep 5 - fi -done +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -k & +wait echo "" -echo "Fixing ARDB ..." -echo "" -bash -c "python ${AIL_HOME}/update/v1.5/Update.py" +bash -c "bash ${AIL_HOME}/update/bin/Update_Redis.sh" +#bash -c "bash ${AIL_HOME}/update/bin/Update_ARDB.sh" -echo "Shutting down ARDB ..." -bash -c "bash ${AIL_BIN}/LAUNCH.sh -k" +echo "" +echo -e $GREEN"Update DomainClassifier"$DEFAULT +echo "" +pip3 install --upgrade --force-reinstall git+https://github.com/D4-project/BGP-Ranking.git/@28013297efb039d2ebbce96ee2d89493f6ae56b0#subdirectory=client&egg=pybgpranking +pip3 install --upgrade --force-reinstall git+https://github.com/adulau/DomainClassifier.git +wait +echo "" + +echo "" +echo -e $GREEN"Update Web thirdparty"$DEFAULT +echo "" +bash ${AIL_FLASK}update_thirdparty.sh & +wait +echo "" + +bash ${AIL_BIN}LAUNCH.sh -lav & +wait +echo "" + +echo "" +echo -e $GREEN"Fixing ARDB ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v1.4/Update.py & +wait +echo "" +echo "" + +echo "" +echo -e $GREEN"Shutting down ARDB ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -k & +wait echo ""