mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
90 lines
3 KiB
Python
Executable file
90 lines
3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# -*-coding:UTF-8 -*
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
|
|
from pyfaup.faup import Faup
|
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
|
from lib import ConfigLoader
|
|
|
|
def get_domain(item_id):
|
|
item_id = item_id.split('/')
|
|
item_id = item_id[-1]
|
|
return item_id[:-36]
|
|
|
|
def get_all_item(s_sha256):
|
|
return r_serv_onion.smembers(f'screenshot:{s_sha256}')
|
|
|
|
def sanitize_domain(domain):
|
|
faup.decode(domain)
|
|
domain_sanitized = faup.get()
|
|
domain_sanitized = domain_sanitized['domain']
|
|
try:
|
|
domain_sanitized = domain_sanitized.decode()
|
|
except:
|
|
pass
|
|
return domain_sanitized.lower()
|
|
|
|
def update_db(s_sha256):
|
|
screenshot_items = get_all_item(s_sha256)
|
|
if screenshot_items:
|
|
for item_id in screenshot_items:
|
|
item_id = item_id.replace(PASTES_FOLDER+'/', '', 1) # remove root path
|
|
domain = get_domain(item_id)
|
|
|
|
domain_sanitized = sanitize_domain(domain)
|
|
if domain != domain_sanitized:
|
|
r_serv_onion.sadd('incorrect_domain', domain)
|
|
domain = domain_sanitized
|
|
|
|
r_serv_onion.sadd('domain_screenshot:{}'.format(domain), s_sha256)
|
|
r_serv_onion.sadd('screenshot_domain:{}'.format(s_sha256), domain)
|
|
else:
|
|
pass
|
|
# broken screenshot
|
|
r_serv_onion.sadd('broken_screenshot', s_sha256)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
start_deb = time.time()
|
|
faup = Faup()
|
|
|
|
config_loader = ConfigLoader.ConfigLoader()
|
|
|
|
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes"))
|
|
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot')
|
|
|
|
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
|
|
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
config_loader = None
|
|
|
|
r_serv_db.set('ail:update_in_progress', 'v2.6')
|
|
r_serv_db.set('ail:current_background_update', 'v2.6')
|
|
|
|
r_serv_db.set('ail:current_background_script_stat', 20)
|
|
r_serv_db.set('ail:current_background_script', 'screenshot update')
|
|
|
|
nb = 0
|
|
|
|
if os.path.isdir(SCREENSHOT_FOLDER):
|
|
for root, dirs, files in os.walk(SCREENSHOT_FOLDER, topdown=False):
|
|
# print(dirs)
|
|
for name in files:
|
|
nb = nb + 1
|
|
screenshot_sha256 = os.path.join(root, name)
|
|
screenshot_sha256 = screenshot_sha256[:-4] # remove .png
|
|
screenshot_sha256 = screenshot_sha256.replace(SCREENSHOT_FOLDER, '', 1)
|
|
screenshot_sha256 = screenshot_sha256.replace('/', '')
|
|
update_db(screenshot_sha256)
|
|
# print('Screenshot updated: {}'.format(nb))
|
|
if nb % 1000 == 0:
|
|
r_serv_db.set('ail:current_background_script', 'screenshot updated: {}'.format(nb))
|
|
|
|
r_serv_db.set('ail:current_background_script_stat', 100)
|
|
|
|
end = time.time()
|
|
print('ALL screenshot updated: {} in {} s'.format(nb, end - start_deb))
|