chg: [updater] refactor background updater + add v5.2 update

This commit is contained in:
Terrtia 2023-07-12 11:36:47 +02:00
parent 28c647d370
commit 73bfe614df
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
10 changed files with 334 additions and 180 deletions

View file

@ -15,38 +15,15 @@ config_loader = ConfigLoader()
r_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
BACKGROUND_UPDATES = {
'v1.5': {
'nb_updates': 5,
'message': 'Tags and Screenshots'
},
'v2.4': {
'nb_updates': 1,
'message': ' Domains Tags and Correlations'
},
'v2.6': {
'nb_updates': 1,
'message': 'Domains Tags and Correlations'
},
'v2.7': {
'nb_updates': 1,
'message': 'Domains Tags'
},
'v3.4': {
'nb_updates': 1,
'message': 'Domains Languages'
},
'v3.7': {
'nb_updates': 1,
'message': 'Trackers first_seen/last_seen'
}
}
# # # # # # # #
# #
# UPDATE #
# #
# # # # # # # #
def get_ail_version():
return r_db.get('ail:version')
def get_ail_float_version():
version = get_ail_version()
if version:
@ -55,6 +32,179 @@ def get_ail_float_version():
version = 0
return version
# # # - - # # #
# # # # # # # # # # # #
# #
# UPDATE BACKGROUND #
# #
# # # # # # # # # # # #
BACKGROUND_UPDATES = {
'v5.2': {
'message': 'Compress HAR',
'scripts': ['compress_har.py']
},
}
class AILBackgroundUpdate:
"""
AIL Background Update.
"""
def __init__(self, version):
self.version = version
def _get_field(self, field):
return r_db.hget('ail:update:background', field)
def _set_field(self, field, value):
r_db.hset('ail:update:background', field, value)
def get_version(self):
return self.version
def get_message(self):
return BACKGROUND_UPDATES.get(self.version, {}).get('message', '')
def get_error(self):
return self._get_field('error')
def set_error(self, error): # TODO ADD LOGS
self._set_field('error', error)
def get_nb_scripts(self):
return int(len(BACKGROUND_UPDATES.get(self.version, {}).get('scripts', [''])))
def get_scripts(self):
return BACKGROUND_UPDATES.get(self.version, {}).get('scripts', [])
def get_nb_scripts_done(self):
done = self._get_field('done')
try:
done = int(done)
except (TypeError, ValueError):
done = 0
return done
def inc_nb_scripts_done(self):
self._set_field('done', self.get_nb_scripts_done() + 1)
def get_script(self):
return self._get_field('script')
def get_script_path(self):
path = os.path.basename(self.get_script())
if path:
return os.path.join(os.environ['AIL_HOME'], 'update', self.version, path)
def get_nb_to_update(self): # TODO use cache ?????
nb_to_update = self._get_field('nb_to_update')
if not nb_to_update:
nb_to_update = 1
return int(nb_to_update)
def set_nb_to_update(self, nb):
self._set_field('nb_to_update', int(nb))
def get_nb_updated(self): # TODO use cache ?????
nb_updated = self._get_field('nb_updated')
if not nb_updated:
nb_updated = 0
return int(nb_updated)
def inc_nb_updated(self): # TODO use cache ?????
r_db.hincrby('ail:update:background', 'nb_updated', 1)
def get_progress(self): # TODO use cache ?????
return self._get_field('progress')
def set_progress(self, progress):
self._set_field('progress', progress)
def update_progress(self):
nb_updated = self.get_nb_updated()
nb_to_update = self.get_nb_to_update()
if nb_updated == nb_to_update:
progress = 100
elif nb_updated > nb_to_update:
progress = 99
else:
progress = int((nb_updated * 100) / nb_to_update)
self.set_progress(progress)
print(f'{nb_updated}/{nb_to_update} updated {progress}%')
return progress
def is_running(self):
return r_db.hget('ail:update:background', 'version') == self.version
def get_meta(self, options=set()):
meta = {'version': self.get_version(),
'error': self.get_error(),
'script': self.get_script(),
'script_progress': self.get_progress(),
'nb_update': self.get_nb_scripts(),
'nb_completed': self.get_nb_scripts_done()}
meta['progress'] = int(meta['nb_completed'] * 100 / meta['nb_update'])
if 'message' in options:
meta['message'] = self.get_message()
return meta
def start(self):
self._set_field('version', self.version)
r_db.hdel('ail:update:background', 'error')
def start_script(self, script):
self.clear()
self._set_field('script', script)
self.set_progress(0)
def end_script(self):
self.set_progress(100)
self.inc_nb_scripts_done()
def clear(self):
r_db.hdel('ail:update:background', 'error')
r_db.hdel('ail:update:background', 'progress')
r_db.hdel('ail:update:background', 'nb_updated')
r_db.hdel('ail:update:background', 'nb_to_update')
def end(self):
r_db.delete('ail:update:background')
r_db.srem('ail:updates:background', self.version)
# To Add in update script
def add_background_update(version):
r_db.sadd('ail:updates:background', version)
def is_update_background_running():
return r_db.exists('ail:update:background')
def get_update_background_version():
return r_db.hget('ail:update:background', 'version')
def get_update_background_meta(options=set()):
version = get_update_background_version()
if version:
return AILBackgroundUpdate(version).get_meta(options=options)
else:
return {}
def get_update_background_to_launch():
to_launch = []
updates = r_db.smembers('ail:updates:background')
for version in BACKGROUND_UPDATES:
if version in updates:
to_launch.append(version)
return to_launch
# # # - - # # #
##########################################################################################
##########################################################################################
##########################################################################################
def get_ail_all_updates(date_separator='-'):
dict_update = r_db.hgetall('ail:update_date')
@ -87,111 +237,6 @@ def check_version(version):
return True
#### UPDATE BACKGROUND ####
def exits_background_update_to_launch():
return r_db.scard('ail:update:to_update') != 0
def is_version_in_background_update(version):
return r_db.sismember('ail:update:to_update', version)
def get_all_background_updates_to_launch():
return r_db.smembers('ail:update:to_update')
def get_current_background_update():
return r_db.get('ail:update:update_in_progress')
def get_current_background_update_script():
return r_db.get('ail:update:current_background_script')
def get_current_background_update_script_path(version, script_name):
return os.path.join(os.environ['AIL_HOME'], 'update', version, script_name)
def get_current_background_nb_update_completed():
return r_db.scard('ail:update:update_in_progress:completed')
def get_current_background_update_progress():
progress = r_db.get('ail:update:current_background_script_stat')
if not progress:
progress = 0
return int(progress)
def get_background_update_error():
return r_db.get('ail:update:error')
def add_background_updates_to_launch(version):
return r_db.sadd('ail:update:to_update', version)
def start_background_update(version):
r_db.delete('ail:update:error')
r_db.set('ail:update:update_in_progress', version)
def set_current_background_update_script(script_name):
r_db.set('ail:update:current_background_script', script_name)
r_db.set('ail:update:current_background_script_stat', 0)
def set_current_background_update_progress(progress):
r_db.set('ail:update:current_background_script_stat', progress)
def set_background_update_error(error):
r_db.set('ail:update:error', error)
def end_background_update_script():
r_db.sadd('ail:update:update_in_progress:completed')
def end_background_update(version):
r_db.delete('ail:update:update_in_progress')
r_db.delete('ail:update:current_background_script')
r_db.delete('ail:update:current_background_script_stat')
r_db.delete('ail:update:update_in_progress:completed')
r_db.srem('ail:update:to_update', version)
def clear_background_update():
r_db.delete('ail:update:error')
r_db.delete('ail:update:update_in_progress')
r_db.delete('ail:update:current_background_script')
r_db.delete('ail:update:current_background_script_stat')
r_db.delete('ail:update:update_in_progress:completed')
def get_update_background_message(version):
return BACKGROUND_UPDATES[version]['message']
# TODO: Detect error in subprocess
def get_update_background_metadata():
dict_update = {}
version = get_current_background_update()
if version:
dict_update['version'] = version
dict_update['script'] = get_current_background_update_script()
dict_update['script_progress'] = get_current_background_update_progress()
dict_update['nb_update'] = BACKGROUND_UPDATES[dict_update['version']]['nb_updates']
dict_update['nb_completed'] = get_current_background_nb_update_completed()
dict_update['progress'] = int(dict_update['nb_completed'] * 100 / dict_update['nb_update'])
dict_update['error'] = get_background_update_error()
return dict_update
##-- UPDATE BACKGROUND --##
if __name__ == '__main__':
res = check_version('v3.1..1')
print(res)

View file

@ -366,21 +366,25 @@ def _reprocess_all_hars_etag():
etag = Etags.create(etag_content)
etag.add(date, domain)
def _gzip_har(har_id):
har_path = os.path.join(HAR_DIR, har_id)
new_id = f'{har_path}.gz'
if not har_id.endswith('.gz'):
if not os.path.exists(new_id):
with open(har_path, 'rb') as f:
content = f.read()
if content:
with gzip.open(new_id, 'wb') as f:
r = f.write(content)
print(r)
if os.path.exists(new_id) and os.path.exists(har_path):
os.remove(har_path)
print('delete:', har_path)
def _gzip_all_hars():
for har_id in get_all_har_ids():
har_path = os.path.join(HAR_DIR, har_id)
new_id = f'{har_path}.gz'
if not har_id.endswith('.gz'):
if not os.path.exists(new_id):
with open(har_path, 'rb') as f:
content = f.read()
if content:
with gzip.open(new_id, 'wb') as f:
r = f.write(content)
print(r)
if os.path.exists(new_id) and os.path.exists(har_path):
os.remove(har_path)
print('delete:', har_path)
_gzip_har(har_id)
# # # - - # # #

View file

@ -10,6 +10,8 @@ Update AIL in the background
"""
import os
import logging
import logging.config
import sys
import subprocess
@ -19,35 +21,52 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib import ail_updates
def launch_background_upgrade(version, l_script_name):
if ail_updates.is_version_in_background_update(version):
ail_updates.start_background_update(version)
logging.config.dictConfig(ail_logger.get_config(name='updates'))
def launch_background_upgrade(version):
logger = logging.getLogger()
update.set_error('Error Updater Script')
print('launching background update', version)
update = ail_updates.AILBackgroundUpdate(version)
nb_done = update.get_nb_scripts_done()
update.start()
scripts = update.get_scripts()
scripts = scripts[nb_done:]
for script in scripts:
print('launching background script update', script)
# launch script
update.start_script(script)
script_path = update.get_script_path()
if script_path:
try:
process = subprocess.run(['python', script_path])
if process.returncode != 0:
stderr = process.stderr
if stderr:
error = stderr.decode()
logger.error(error)
update.set_error(error)
else:
update.set_error('Error Updater Script')
logger.error('Error Updater Script')
sys.exit(0)
except Exception as e:
update.set_error(str(e))
logger.error(str(e))
sys.exit(0)
for script_name in l_script_name:
ail_updates.set_current_background_update_script(script_name)
update_file = ail_updates.get_current_background_update_script_path(version, script_name)
if not update.get_error():
update.end_script()
else:
logger.warning('Updater exited on error')
sys.exit(0)
# # TODO: Get error output
process = subprocess.run(['python', update_file])
update_progress = ail_updates.get_current_background_update_progress()
if update_progress == 100:
ail_updates.end_background_update_script()
# # TODO: Create Custom error
# 'Please relaunch the bin/update-background.py script'
# # TODO: Create Class background update
ail_updates.end_background_update(version)
update.end()
if __name__ == "__main__":
if not ail_updates.exits_background_update_to_launch():
ail_updates.clear_background_update()
if ail_updates.is_update_background_running():
v = ail_updates.get_update_background_version()
launch_background_upgrade(v)
else:
launch_background_upgrade('v1.5', ['Update-ARDB_Onions.py', 'Update-ARDB_Metadata.py', 'Update-ARDB_Tags.py',
'Update-ARDB_Tags_background.py', 'Update-ARDB_Onions_screenshots.py'])
launch_background_upgrade('v2.6', ['Update_screenshots.py'])
launch_background_upgrade('v2.7', ['Update_domain_tags.py'])
launch_background_upgrade('v3.4', ['Update_domain.py'])
launch_background_upgrade('v3.7', ['Update_trackers.py'])
for ver in ail_updates.get_update_background_to_launch():
launch_background_upgrade(ver)

View file

@ -20,7 +20,7 @@ class AIL_Updater(object):
self.start_time = time.time()
self.config = ConfigLoader()
self.r_serv = self.config.get_redis_conn("Kvrocks_DB")
self.r_serv = self.config.get_db_conn("Kvrocks_DB")
self.f_version = float(self.version[1:])
self.current_f_version = ail_updates.get_ail_float_version()
@ -35,7 +35,7 @@ class AIL_Updater(object):
"""
Update DB version
"""
ail_updates.add_ail_update(version)
ail_updates.add_ail_update(self.version)
def run_update(self):
self.update()

24
update/v5.2/Update.py Executable file
View file

@ -0,0 +1,24 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.ail_updater import AIL_Updater
from lib import ail_updates
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
updater = Updater('v5.2')
updater.run_update()
ail_updates.add_background_update('v5.2')

33
update/v5.2/Update.sh Executable file
View file

@ -0,0 +1,33 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v5.2/Update.py
wait
echo ""
echo ""
exit 0

27
update/v5.2/compress_har.py Executable file
View file

@ -0,0 +1,27 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import gzip
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_updates
from lib import crawlers
if __name__ == '__main__':
update = ail_updates.AILBackgroundUpdate('v5.2')
HAR_DIR = crawlers.HAR_DIR
hars_ids = crawlers.get_all_har_ids()
update.set_nb_to_update(len(hars_ids))
n = 0
for har_id in hars_ids:
crawlers._gzip_har(har_id)
update.inc_nb_updated()
if n % 100 == 0:
update.update_progress()
crawlers._gzip_all_hars()

View file

@ -48,7 +48,7 @@ def settings_page():
@login_required
@login_read_only
def get_background_update_metadata_json():
return jsonify(ail_updates.get_update_background_metadata())
return jsonify(ail_updates.get_update_background_meta(options={}))
@settings_b.route("/settings/modules", methods=['GET'])
@login_required

View file

@ -167,10 +167,9 @@ def index():
# Check if update in progress
background_update = False
update_message = ''
if ail_updates.get_current_background_update():
if ail_updates.is_update_background_running():
background_update = True
# update_message = ail_updates.get_update_background_message()
update_message = None
update_message = ail_updates.AILBackgroundUpdate(ail_updates.get_update_background_version()).get_message()
return render_template("index.html", default_minute = default_minute,
threshold_stucked_module=threshold_stucked_module,

View file

@ -141,6 +141,9 @@
</body>
<script>
var to_refresh
$(document).ready(function(){
$("#nav_server_status").addClass("active");
$("#nav_server").removeClass("text-muted");
@ -199,12 +202,12 @@ function refresh_update_status(){
});
if (to_refresh) {
setTimeout("refresh_crawler_status()", 10000);
setTimeout("refresh_update_status()", 10000);
}
}
update_progress();
refresh_update_status();
//Interval
var progress_interval = setInterval(function(){
refresh_update_status()