mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
chg: [Languages]detect + search domains languages
This commit is contained in:
parent
28f6963ff4
commit
6bc54baf74
17 changed files with 990 additions and 21 deletions
|
@ -216,6 +216,8 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Tags" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Tags.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Languages" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Languages.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DbCleaner.py; read x"
|
||||
|
|
33
bin/Languages.py
Executable file
33
bin/Languages.py
Executable file
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import cld3
|
||||
|
||||
from packages import Item
|
||||
from lib import Domain
|
||||
|
||||
from pubsublogger import publisher
|
||||
from Helper import Process
|
||||
|
||||
if __name__ == '__main__':
|
||||
publisher.port = 6380
|
||||
publisher.channel = 'Script'
|
||||
# Section name in bin/packages/modules.cfg
|
||||
config_section = 'Languages'
|
||||
# Setup the I/O queues
|
||||
p = Process(config_section)
|
||||
|
||||
while True:
|
||||
message = p.get_from_set()
|
||||
if message is None:
|
||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
item_id = Item.get_item_id(message)
|
||||
if Item.is_crawled(item_id):
|
||||
domain = Item.get_item_domain(item_id)
|
||||
Domain.add_domain_languages_by_item_id(domain, item_id)
|
|
@ -9,6 +9,7 @@ The ``Domain``
|
|||
|
||||
import os
|
||||
import sys
|
||||
import itertools
|
||||
import time
|
||||
import redis
|
||||
import random
|
||||
|
@ -24,6 +25,7 @@ import Tag
|
|||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
import Correlate_object
|
||||
import Language
|
||||
import Screenshot
|
||||
import Username
|
||||
|
||||
|
@ -66,6 +68,15 @@ def sanitize_domain_type(domain_type):
|
|||
else:
|
||||
return 'regular'
|
||||
|
||||
def sanitize_domain_types(l_domain_type):
|
||||
all_domain_types = get_all_domains_type()
|
||||
if not l_domain_type:
|
||||
return all_domain_types
|
||||
for domain_type in l_domain_type:
|
||||
if domain_type not in all_domain_types:
|
||||
return all_domain_types
|
||||
return l_domain_type
|
||||
|
||||
######## DOMAINS ########
|
||||
def get_all_domains_type():
|
||||
return ['onion', 'regular']
|
||||
|
@ -210,6 +221,15 @@ def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[],
|
|||
else:
|
||||
return None
|
||||
|
||||
|
||||
|
||||
## TODO: filters:
|
||||
# - tags
|
||||
# - languages
|
||||
# - daterange UP
|
||||
def get_domains_by_filters():
|
||||
pass
|
||||
|
||||
def create_domains_metadata_list(list_domains, domain_type):
|
||||
l_domains = []
|
||||
for domain in list_domains:
|
||||
|
@ -218,9 +238,98 @@ def create_domains_metadata_list(list_domains, domain_type):
|
|||
else:
|
||||
dom_type = domain_type
|
||||
l_domains.append(get_domain_metadata(domain, dom_type, first_seen=True, last_ckeck=True, status=True,
|
||||
ports=True, tags=True, screenshot=True, tags_safe=True))
|
||||
ports=True, tags=True, languages=True, screenshot=True, tags_safe=True))
|
||||
return l_domains
|
||||
|
||||
|
||||
######## LANGUAGES ########
|
||||
def get_all_domains_languages():
|
||||
return r_serv_onion.smembers('all_domains_languages')
|
||||
|
||||
def get_domains_by_languages(languages, l_domain_type=[]):
|
||||
l_domain_type = sanitize_domain_types(l_domain_type)
|
||||
if not languages:
|
||||
return []
|
||||
elif len(languages) == 1:
|
||||
return get_all_domains_by_language(languages[0], l_domain_type=l_domain_type)
|
||||
else:
|
||||
all_domains_t = []
|
||||
for domain_type in l_domain_type:
|
||||
l_keys_name = []
|
||||
for language in languages:
|
||||
l_keys_name.append('language:domains:{}:{}'.format(domain_type, language))
|
||||
res = r_serv_onion.sinter(l_keys_name[0], *l_keys_name[1:])
|
||||
if res:
|
||||
all_domains_t.append(res)
|
||||
return list(itertools.chain.from_iterable(all_domains_t))
|
||||
|
||||
def get_all_domains_by_language(language, l_domain_type=[]):
|
||||
l_domain_type = sanitize_domain_types(l_domain_type)
|
||||
if len(l_domain_type) == 1:
|
||||
return r_serv_onion.smembers('language:domains:{}:{}'.format(l_domain_type[0], language))
|
||||
else:
|
||||
l_keys_name = []
|
||||
for domain_type in l_domain_type:
|
||||
l_keys_name.append('language:domains:{}:{}'.format(domain_type, language))
|
||||
return r_serv_onion.sunion(l_keys_name[0], *l_keys_name[1:])
|
||||
|
||||
def get_domain_languages(domain, r_list=False):
|
||||
res = r_serv_onion.smembers('domain:language:{}'.format(domain))
|
||||
if r_list:
|
||||
return list(res)
|
||||
else:
|
||||
return res
|
||||
|
||||
def add_domain_language(domain, language):
|
||||
language = language.split('-')[0]
|
||||
domain_type = get_domain_type(domain)
|
||||
r_serv_onion.sadd('all_domains_languages', language)
|
||||
r_serv_onion.sadd('all_domains_languages:{}'.format(domain_type), language)
|
||||
r_serv_onion.sadd('language:domains:{}:{}'.format(domain_type, language), domain)
|
||||
r_serv_onion.sadd('domain:language:{}'.format(domain), language)
|
||||
|
||||
def add_domain_languages_by_item_id(domain, item_id):
|
||||
for lang in Item.get_item_languages(item_id, min_proportion=0.2, min_probability=0.8):
|
||||
add_domain_language(domain, lang.language)
|
||||
|
||||
def delete_domain_languages(domain):
|
||||
domain_type = get_domain_type(domain)
|
||||
for language in get_domain_languages(domain):
|
||||
r_serv_onion.srem('language:domains:{}:{}'.format(domain_type, language), domain)
|
||||
if not r_serv_onion.exists('language:domains:{}:{}'.format(domain_type, language)):
|
||||
r_serv_onion.srem('all_domains_languages:{}'.format(domain_type), language)
|
||||
exist_domain_type_lang = False
|
||||
for domain_type in get_all_domains_type():
|
||||
if r_serv_onion.sismembers('all_domains_languages:{}'.format(domain_type), language):
|
||||
exist_domain_type_lang = True
|
||||
continue
|
||||
if not exist_domain_type_lang:
|
||||
r_serv_onion.srem('all_domains_languages', language)
|
||||
r_serv_onion.delete('domain:language:{}'.format(domain))
|
||||
|
||||
def _delete_all_domains_languages():
|
||||
for language in get_all_domains_languages():
|
||||
for domain in get_all_domains_by_language(language):
|
||||
delete_domain_languages(domain)
|
||||
|
||||
## API ##
|
||||
## TODO: verify domains type + languages list
|
||||
## TODO: add pagination
|
||||
def api_get_domains_by_languages(domains_types, languages, domains_metadata=False, page=1):
|
||||
l_domains = sorted(get_domains_by_languages(languages, l_domain_type=domains_types))
|
||||
l_domains = paginate_iterator(l_domains, nb_obj=28, page=page)
|
||||
if not domains_metadata:
|
||||
return l_domains
|
||||
else:
|
||||
l_dict_domains = []
|
||||
for domain in l_domains['list_elem']:
|
||||
l_dict_domains.append(get_domain_metadata(domain, get_domain_type(domain), first_seen=True, last_ckeck=True,
|
||||
status=True, ports=True, tags=True, tags_safe=True,
|
||||
languages=True, screenshot=True))
|
||||
l_domains['list_elem'] = l_dict_domains
|
||||
return l_domains
|
||||
####---- ----####
|
||||
|
||||
######## DOMAIN ########
|
||||
|
||||
def get_domain_type(domain):
|
||||
|
@ -498,7 +607,7 @@ def get_domain_random_screenshot(domain):
|
|||
'''
|
||||
return Screenshot.get_randon_domain_screenshot(domain)
|
||||
|
||||
def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False, tags_safe=False, screenshot=False):
|
||||
def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False, tags_safe=False, languages=False, screenshot=False):
|
||||
'''
|
||||
Get Domain basic metadata
|
||||
|
||||
|
@ -516,6 +625,7 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s
|
|||
'''
|
||||
dict_metadata = {}
|
||||
dict_metadata['id'] = domain
|
||||
dict_metadata['type'] = domain_type
|
||||
if first_seen:
|
||||
res = get_domain_first_seen(domain, domain_type=domain_type)
|
||||
if res is not None:
|
||||
|
@ -535,6 +645,8 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s
|
|||
dict_metadata['is_tags_safe'] = Tag.is_tags_safe(dict_metadata['tags'])
|
||||
else:
|
||||
dict_metadata['is_tags_safe'] = Tag.is_tags_safe(get_domain_tags(domain))
|
||||
if languages:
|
||||
dict_metadata['languages'] = Language.get_languages_from_iso(get_domain_languages(domain, r_list=True), sort=True)
|
||||
if screenshot:
|
||||
dict_metadata['screenshot'] = get_domain_random_screenshot(domain)
|
||||
return dict_metadata
|
||||
|
@ -796,6 +908,14 @@ class Domain(object):
|
|||
'''
|
||||
return get_domain_tags(self.domain)
|
||||
|
||||
def get_domain_languages(self):
|
||||
'''
|
||||
Retun all languages of a given domain.
|
||||
|
||||
:param domain: domain name
|
||||
'''
|
||||
return get_domain_languages(self.domain)
|
||||
|
||||
def get_domain_correlation(self):
|
||||
'''
|
||||
Retun all correlation of a given domain.
|
||||
|
|
240
bin/lib/Language.py
Executable file
240
bin/lib/Language.py
Executable file
|
@ -0,0 +1,240 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
import redis
|
||||
|
||||
dict_iso_languages = {
|
||||
'af': 'Afrikaans',
|
||||
'am': 'Amharic',
|
||||
'ar': 'Arabic',
|
||||
'bg': 'Bulgarian',
|
||||
'bn': 'Bangla',
|
||||
'bs': 'Bosnian',
|
||||
'ca': 'Catalan',
|
||||
'ceb': 'Cebuano',
|
||||
'co': 'Corsican',
|
||||
'cs': 'Czech',
|
||||
'cy': 'Welsh',
|
||||
'da': 'Danish',
|
||||
'de': 'German',
|
||||
'el': 'Greek',
|
||||
'en': 'English',
|
||||
'eo': 'Esperanto',
|
||||
'es': 'Spanish',
|
||||
'et': 'Estonian',
|
||||
'eu': 'Basque',
|
||||
'fa': 'Persian',
|
||||
'fi': 'Finnish',
|
||||
'fil': 'Filipino',
|
||||
'fr': 'French',
|
||||
'fy': 'Western Frisian',
|
||||
'ga': 'Irish',
|
||||
'gd': 'Scottish Gaelic',
|
||||
'gl': 'Galician',
|
||||
'gu': 'Gujarati',
|
||||
'ha': 'Hausa',
|
||||
'haw': 'Hawaiian',
|
||||
'hi': 'Hindi',
|
||||
'hmn': 'Hmong',
|
||||
'hr': 'Croatian',
|
||||
'ht': 'Haitian Creole',
|
||||
'hu': 'Hungarian',
|
||||
'hy': 'Armenian',
|
||||
'id': 'Indonesian',
|
||||
'ig': 'Igbo',
|
||||
'is': 'Icelandic',
|
||||
'it': 'Italian',
|
||||
'iw': 'Hebrew',
|
||||
'ja': 'Japanese',
|
||||
'jv': 'Javanese',
|
||||
'ka': 'Georgian',
|
||||
'kk': 'Kazakh',
|
||||
'km': 'Khmer',
|
||||
'kn': 'Kannada',
|
||||
'ko': 'Korean',
|
||||
'ku': 'Kurdish',
|
||||
'ky': 'Kyrgyz',
|
||||
'la': 'Latin',
|
||||
'lb': 'Luxembourgish',
|
||||
'lo': 'Lao',
|
||||
'lt': 'Lithuanian',
|
||||
'lv': 'Latvian',
|
||||
'mg': 'Malagasy',
|
||||
'mi': 'Maori',
|
||||
'mk': 'Macedonian',
|
||||
'ml': 'Malayalam',
|
||||
'mn': 'Mongolian',
|
||||
'mr': 'Marathi',
|
||||
'ms': 'Malay',
|
||||
'mt': 'Maltese',
|
||||
'my': 'Burmese',
|
||||
'ne': 'Nepali',
|
||||
'nl': 'Dutch',
|
||||
'no': 'Norwegian',
|
||||
'ny': 'Nyanja',
|
||||
'pa': 'Punjabi',
|
||||
'pl': 'Polish',
|
||||
'ps': 'Pashto',
|
||||
'pt': 'Portuguese',
|
||||
'ro': 'Romanian',
|
||||
'ru': 'Russian',
|
||||
'sd': 'Sindhi',
|
||||
'si': 'Sinhala',
|
||||
'sk': 'Slovak',
|
||||
'sl': 'Slovenian',
|
||||
'sm': 'Samoan',
|
||||
'sn': 'Shona',
|
||||
'so': 'Somali',
|
||||
'sq': 'Albanian',
|
||||
'sr': 'Serbian',
|
||||
'st': 'Southern Sotho',
|
||||
'su': 'Sundanese',
|
||||
'sv': 'Swedish',
|
||||
'sw': 'Swahili',
|
||||
'ta': 'Tamil',
|
||||
'te': 'Telugu',
|
||||
'tg': 'Tajik',
|
||||
'th': 'Thai',
|
||||
'tr': 'Turkish',
|
||||
'uk': 'Ukrainian',
|
||||
'ur': 'Urdu',
|
||||
'uz': 'Uzbek',
|
||||
'vi': 'Vietnamese',
|
||||
'xh': 'Xhosa',
|
||||
'yi': 'Yiddish',
|
||||
'yo': 'Yoruba',
|
||||
'zh': 'Chinese',
|
||||
'zu': 'Zulu'
|
||||
}
|
||||
|
||||
dict_languages_iso = {
|
||||
'Afrikaans': 'af',
|
||||
'Amharic': 'am',
|
||||
'Arabic': 'ar',
|
||||
'Bulgarian': 'bg',
|
||||
'Bangla': 'bn',
|
||||
'Bosnian': 'bs',
|
||||
'Catalan': 'ca',
|
||||
'Cebuano': 'ceb',
|
||||
'Corsican': 'co',
|
||||
'Czech': 'cs',
|
||||
'Welsh': 'cy',
|
||||
'Danish': 'da',
|
||||
'German': 'de',
|
||||
'Greek': 'el',
|
||||
'English': 'en',
|
||||
'Esperanto': 'eo',
|
||||
'Spanish': 'es',
|
||||
'Estonian': 'et',
|
||||
'Basque': 'eu',
|
||||
'Persian': 'fa',
|
||||
'Finnish': 'fi',
|
||||
'Filipino': 'fil',
|
||||
'French': 'fr',
|
||||
'Western Frisian': 'fy',
|
||||
'Irish': 'ga',
|
||||
'Scottish Gaelic': 'gd',
|
||||
'Galician': 'gl',
|
||||
'Gujarati': 'gu',
|
||||
'Hausa': 'ha',
|
||||
'Hawaiian': 'haw',
|
||||
'Hindi': 'hi',
|
||||
'Hmong': 'hmn',
|
||||
'Croatian': 'hr',
|
||||
'Haitian Creole': 'ht',
|
||||
'Hungarian': 'hu',
|
||||
'Armenian': 'hy',
|
||||
'Indonesian': 'id',
|
||||
'Igbo': 'ig',
|
||||
'Icelandic': 'is',
|
||||
'Italian': 'it',
|
||||
'Hebrew': 'iw',
|
||||
'Japanese': 'ja',
|
||||
'Javanese': 'jv',
|
||||
'Georgian': 'ka',
|
||||
'Kazakh': 'kk',
|
||||
'Khmer': 'km',
|
||||
'Kannada': 'kn',
|
||||
'Korean': 'ko',
|
||||
'Kurdish': 'ku',
|
||||
'Kyrgyz': 'ky',
|
||||
'Latin': 'la',
|
||||
'Luxembourgish': 'lb',
|
||||
'Lao': 'lo',
|
||||
'Lithuanian': 'lt',
|
||||
'Latvian': 'lv',
|
||||
'Malagasy': 'mg',
|
||||
'Maori': 'mi',
|
||||
'Macedonian': 'mk',
|
||||
'Malayalam': 'ml',
|
||||
'Mongolian': 'mn',
|
||||
'Marathi': 'mr',
|
||||
'Malay': 'ms',
|
||||
'Maltese': 'mt',
|
||||
'Burmese': 'my',
|
||||
'Nepali': 'ne',
|
||||
'Dutch': 'nl',
|
||||
'Norwegian': 'no',
|
||||
'Nyanja': 'ny',
|
||||
'Punjabi': 'pa',
|
||||
'Polish': 'pl',
|
||||
'Pashto': 'ps',
|
||||
'Portuguese': 'pt',
|
||||
'Romanian': 'ro',
|
||||
'Russian': 'ru',
|
||||
'Sindhi': 'sd',
|
||||
'Sinhala': 'si',
|
||||
'Slovak': 'sk',
|
||||
'Slovenian': 'sl',
|
||||
'Samoan': 'sm',
|
||||
'Shona': 'sn',
|
||||
'Somali': 'so',
|
||||
'Albanian': 'sq',
|
||||
'Serbian': 'sr',
|
||||
'Southern Sotho': 'st',
|
||||
'Sundanese': 'su',
|
||||
'Swedish': 'sv',
|
||||
'Swahili': 'sw',
|
||||
'Tamil': 'ta',
|
||||
'Telugu': 'te',
|
||||
'Tajik': 'tg',
|
||||
'Thai': 'th',
|
||||
'Turkish': 'tr',
|
||||
'Ukrainian': 'uk',
|
||||
'Urdu': 'ur',
|
||||
'Uzbek': 'uz',
|
||||
'Vietnamese': 'vi',
|
||||
'Xhosa': 'xh',
|
||||
'Yiddish': 'yi',
|
||||
'Yoruba': 'yo',
|
||||
'Chinese': 'zh',
|
||||
'Zulu': 'zu'
|
||||
}
|
||||
|
||||
def get_language_from_iso(iso_language):
|
||||
return dict_iso_languages.get(iso_language, None)
|
||||
|
||||
def get_languages_from_iso(l_iso_languages, sort=False):
|
||||
l_languages = []
|
||||
for iso_language in l_iso_languages:
|
||||
language = get_language_from_iso(iso_language)
|
||||
if language:
|
||||
l_languages.append(language)
|
||||
if sort:
|
||||
l_languages = sorted(l_languages)
|
||||
return l_languages
|
||||
|
||||
def get_iso_from_language(language):
|
||||
return dict_languages_iso.get(language, None)
|
||||
|
||||
def get_iso_from_languages(l_languages, sort=False):
|
||||
l_iso = []
|
||||
for language in l_languages:
|
||||
iso_lang = get_iso_from_language(language)
|
||||
if iso_lang:
|
||||
l_iso.append(iso_lang)
|
||||
if sort:
|
||||
l_iso = sorted(l_iso)
|
||||
return l_iso
|
|
@ -2,8 +2,10 @@
|
|||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import redis
|
||||
import cld3
|
||||
import html2text
|
||||
|
||||
from io import BytesIO
|
||||
|
@ -101,13 +103,62 @@ def add_item_parent(item_parent, item_id):
|
|||
def get_item_content(item_id):
|
||||
return item_basic.get_item_content(item_id)
|
||||
|
||||
def get_item_content_html2text(item_id, item_content=None):
|
||||
def get_item_content_html2text(item_id, item_content=None, ignore_links=False):
|
||||
if not item_content:
|
||||
item_content = get_item_content(item_id)
|
||||
h = html2text.HTML2Text()
|
||||
h.ignore_links = False
|
||||
h.ignore_links = ignore_links
|
||||
h.ignore_images = ignore_links
|
||||
return h.handle(item_content)
|
||||
|
||||
def remove_all_urls_from_content(item_id, item_content=None):
|
||||
if not item_content:
|
||||
item_content = get_item_content(item_id)
|
||||
regex = r'\b(?:http://|https://)?(?:[a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*\b'
|
||||
url_regex = re.compile(regex)
|
||||
urls = url_regex.findall(item_content)
|
||||
urls = sorted(urls, key=len, reverse=True)
|
||||
for url in urls:
|
||||
item_content = item_content.replace(url, '')
|
||||
|
||||
regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
|
||||
regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
|
||||
regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
|
||||
re.compile(regex_pgp_public_blocs)
|
||||
re.compile(regex_pgp_signature)
|
||||
re.compile(regex_pgp_message)
|
||||
|
||||
res = re.findall(regex_pgp_public_blocs, item_content)
|
||||
for it in res:
|
||||
item_content = item_content.replace(it, '')
|
||||
res = re.findall(regex_pgp_signature, item_content)
|
||||
for it in res:
|
||||
item_content = item_content.replace(it, '')
|
||||
res = re.findall(regex_pgp_message, item_content)
|
||||
for it in res:
|
||||
item_content = item_content.replace(it, '')
|
||||
|
||||
return item_content
|
||||
|
||||
def get_item_languages(item_id, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7):
|
||||
all_languages = []
|
||||
|
||||
## CLEAN CONTENT ##
|
||||
content = get_item_content_html2text(item_id, ignore_links=True)
|
||||
content = remove_all_urls_from_content(item_id, item_content=content)
|
||||
|
||||
# REMOVE USELESS SPACE
|
||||
content = ' '.join(content.split())
|
||||
#- CLEAN CONTENT -#
|
||||
|
||||
#print(content)
|
||||
#print(len(content))
|
||||
if len(content) >= min_len:
|
||||
for lang in cld3.get_frequent_languages(content, num_langs=num_langs):
|
||||
if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable:
|
||||
all_languages.append(lang)
|
||||
return all_languages
|
||||
|
||||
# API
|
||||
def get_item(request_dict):
|
||||
if not request_dict:
|
||||
|
@ -496,3 +547,17 @@ def delete_domain_node(item_id):
|
|||
domain_basic.delete_domain_item_core(item_id, domain, port)
|
||||
for child_id in get_all_domain_node_by_item_id(item_id):
|
||||
delete_item(child_id)
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# import Domain
|
||||
# domain = Domain.Domain('domain.onion')
|
||||
# for domain_history in domain.get_domain_history():
|
||||
# domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag
|
||||
# if "items" in domain_item:
|
||||
# for item_dict in domain_item['items']:
|
||||
# item_id = item_dict['id']
|
||||
# print(item_id)
|
||||
# for lang in get_item_languages(item_id, min_proportion=0.2, min_probability=0.8):
|
||||
# print(lang)
|
||||
# print()
|
||||
# print(get_item_languages(item_id, min_proportion=0.2, min_probability=0.6)) # 0.7 ?
|
||||
|
|
|
@ -46,6 +46,9 @@ publish = Redis_Tags
|
|||
subscribe = Redis_Global
|
||||
publish = Redis_Tags
|
||||
|
||||
[Languages]
|
||||
subscribe = Redis_Global
|
||||
|
||||
[Categ]
|
||||
subscribe = Redis_Global
|
||||
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey
|
||||
|
|
|
@ -17,6 +17,25 @@ import subprocess
|
|||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
|
||||
def launch_background_upgrade(version, script_name):
|
||||
if r_serv.sismember('ail:to_update', version):
|
||||
r_serv.delete('ail:update_error')
|
||||
r_serv.set('ail:update_in_progress', version)
|
||||
r_serv.set('ail:current_background_update', version)
|
||||
r_serv.set('ail:current_background_script', 'domain tags update')
|
||||
|
||||
update_file = os.path.join(os.environ['AIL_HOME'], 'update', version, script_name)
|
||||
process = subprocess.run(['python' ,update_file])
|
||||
|
||||
update_progress = r_serv.get('ail:current_background_script_stat')
|
||||
if update_progress:
|
||||
if int(update_progress) == 100:
|
||||
r_serv.delete('ail:update_in_progress')
|
||||
r_serv.delete('ail:current_background_script')
|
||||
r_serv.delete('ail:current_background_script_stat')
|
||||
r_serv.delete('ail:current_background_update')
|
||||
r_serv.srem('ail:to_update', new_version)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
|
@ -114,3 +133,8 @@ if __name__ == "__main__":
|
|||
r_serv.delete('ail:current_background_script_stat')
|
||||
r_serv.delete('ail:current_background_update')
|
||||
r_serv.srem('ail:to_update', new_version)
|
||||
|
||||
launch_background_upgrade('v2.6', 'Update_screenshots.py')
|
||||
launch_background_upgrade('v2.7', 'Update_domain_tags.py')
|
||||
|
||||
launch_background_upgrade('v3.4', 'Update_domain.py')
|
||||
|
|
37
update/v3.4/Update.py
Executable file
37
update/v3.4/Update.py
Executable file
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import redis
|
||||
import datetime
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
|
||||
new_version = 'v3.4'
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
start_deb = time.time()
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
||||
config_loader = None
|
||||
|
||||
#Set current update_in_progress
|
||||
r_serv.set('ail:update_in_progress', new_version)
|
||||
r_serv.set('ail:current_background_update', new_version)
|
||||
|
||||
r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up')
|
||||
r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v3.4'))
|
||||
r_serv.set('update:nb_elem_converted',0)
|
||||
|
||||
#Set current ail version
|
||||
r_serv.set('ail:version', new_version)
|
||||
|
||||
#Set current ail version
|
||||
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))
|
54
update/v3.4/Update.sh
Executable file
54
update/v3.4/Update.sh
Executable file
|
@ -0,0 +1,54 @@
|
|||
#!/bin/bash
|
||||
|
||||
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
||||
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
||||
|
||||
export PATH=$AIL_HOME:$PATH
|
||||
export PATH=$AIL_REDIS:$PATH
|
||||
export PATH=$AIL_ARDB:$PATH
|
||||
export PATH=$AIL_BIN:$PATH
|
||||
export PATH=$AIL_FLASK:$PATH
|
||||
|
||||
GREEN="\\033[1;32m"
|
||||
DEFAULT="\\033[0;39m"
|
||||
|
||||
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
||||
wait
|
||||
|
||||
bash ${AIL_BIN}/LAUNCH.sh -ldbv &
|
||||
wait
|
||||
echo ""
|
||||
|
||||
# SUBMODULES #
|
||||
git submodule update
|
||||
|
||||
# echo ""
|
||||
# echo -e $GREEN"installing KVORCKS ..."$DEFAULT
|
||||
# cd ${AIL_HOME}
|
||||
# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git
|
||||
# pushd kvrocks/
|
||||
# make -j4
|
||||
# popd
|
||||
|
||||
echo -e $GREEN"Installing html2text ..."$DEFAULT
|
||||
pip3 install pycld3
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
||||
echo ""
|
||||
python ${AIL_HOME}/update/v3.4/Update.py
|
||||
wait
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
|
||||
echo ""
|
||||
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
|
||||
bash ${AIL_BIN}/LAUNCH.sh -ks
|
||||
wait
|
||||
|
||||
exit 0
|
57
update/v3.4/Update_domain.py
Executable file
57
update/v3.4/Update_domain.py
Executable file
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import redis
|
||||
import datetime
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
import Domain
|
||||
|
||||
def update_update_stats():
|
||||
nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
|
||||
progress = int((nb_updated * 100) / nb_elem_to_update)
|
||||
print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress))
|
||||
r_serv_db.set('ail:current_background_script_stat', progress)
|
||||
|
||||
def update_domain_language(domain_obj, item_id):
|
||||
domain_name = domain_obj.get_domain_name()
|
||||
Domain.add_domain_languages_by_item_id(domain_name, item_id)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
start_deb = time.time()
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
|
||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
||||
config_loader = None
|
||||
|
||||
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
|
||||
if not nb_elem_to_update:
|
||||
nb_elem_to_update = 1
|
||||
else:
|
||||
nb_elem_to_update = int(nb_elem_to_update)
|
||||
|
||||
#Domain._delete_all_domains_languages()
|
||||
|
||||
while True:
|
||||
domain = r_serv_onion.spop('domain_update_v3.4')
|
||||
if domain is not None:
|
||||
print(domain)
|
||||
domain = Domain.Domain(domain)
|
||||
for domain_history in domain.get_domain_history():
|
||||
domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag
|
||||
if "items" in domain_item:
|
||||
for item_dict in domain_item['items']:
|
||||
update_domain_language(domain, item_dict['id'])
|
||||
|
||||
r_serv_db.incr('update:nb_elem_converted')
|
||||
update_update_stats()
|
||||
|
||||
else:
|
||||
sys.exit(0)
|
|
@ -26,6 +26,7 @@ import Tag
|
|||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
import Domain
|
||||
import crawlers
|
||||
import Language
|
||||
|
||||
r_cache = Flask_config.r_cache
|
||||
r_serv_db = Flask_config.r_serv_db
|
||||
|
@ -85,6 +86,9 @@ def send_to_spider():
|
|||
return create_json_response(res[0], res[1])
|
||||
return redirect(url_for('crawler_splash.manual'))
|
||||
|
||||
|
||||
#### Domains ####
|
||||
|
||||
# add route : /crawlers/show_domain
|
||||
@crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
|
@ -111,6 +115,7 @@ def showDomain():
|
|||
dict_domain = {**dict_domain, **domain.get_domain_correlation()}
|
||||
dict_domain['correlation_nb'] = Domain.get_domain_total_nb_correlation(dict_domain)
|
||||
dict_domain['father'] = domain.get_domain_father()
|
||||
dict_domain['languages'] = Language.get_languages_from_iso(domain.get_domain_languages(), sort=True)
|
||||
dict_domain['tags'] = domain.get_domain_tags()
|
||||
dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags'])
|
||||
dict_domain['history'] = domain.get_domain_history_with_status()
|
||||
|
@ -198,6 +203,38 @@ def domains_explorer_web():
|
|||
dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to)
|
||||
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')
|
||||
|
||||
@crawler_splash.route('/domains/languages/all/json', methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def domains_all_languages_json():
|
||||
# # TODO: get domain type
|
||||
iso = request.args.get('iso')
|
||||
domain_types = request.args.getlist('domain_types')
|
||||
return jsonify(Language.get_languages_from_iso(Domain.get_all_domains_languages(), sort=True))
|
||||
|
||||
@crawler_splash.route('/domains/languages/search_get', methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def domains_search_languages_get():
|
||||
page = request.args.get('page')
|
||||
try:
|
||||
page = int(page)
|
||||
except:
|
||||
page = 1
|
||||
domains_types = request.args.getlist('domain_types')
|
||||
if domains_types:
|
||||
domains_types = domains_types[0].split(',')
|
||||
languages = request.args.getlist('languages')
|
||||
if languages:
|
||||
languages = languages[0].split(',')
|
||||
l_dict_domains = Domain.api_get_domains_by_languages(domains_types, Language.get_iso_from_languages(languages), domains_metadata=True, page=page)
|
||||
return render_template("domains/domains_filter_languages.html", template_folder='../../',
|
||||
l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label,
|
||||
current_languages=languages, domains_types=domains_types)
|
||||
|
||||
##-- --##
|
||||
|
||||
|
||||
## Cookiejar ##
|
||||
@crawler_splash.route('/crawler/cookiejar/add', methods=['GET'])
|
||||
@login_required
|
||||
|
|
|
@ -148,6 +148,10 @@
|
|||
{% include 'tags/block_obj_tags_search.html' %}
|
||||
{% endwith %}
|
||||
|
||||
{% with object_type='domain' %}
|
||||
{% include 'domains/block_languages_search.html' %}
|
||||
{% endwith %}
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -68,7 +68,7 @@
|
|||
</div>
|
||||
|
||||
|
||||
{% with dict_data=dict_data, bootstrap_label=bootstrap_label %}
|
||||
{% with l_dict_domains=dict_data['list_elem'], bootstrap_label=bootstrap_label %}
|
||||
{% include 'domains/card_img_domain.html' %}
|
||||
{% endwith %}
|
||||
|
||||
|
|
|
@ -67,6 +67,7 @@
|
|||
<th>First Seen</th>
|
||||
<th>Last Check</th>
|
||||
<th>Ports</th>
|
||||
<th>Languages</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
|
@ -74,6 +75,11 @@
|
|||
<td class="panelText">{%if "first_seen" in dict_domain%}{{ dict_domain['first_seen'] }}{%endif%}</td>
|
||||
<td class="panelText">{%if "last_check" in dict_domain%}{{ dict_domain['last_check'] }}{%endif%}</td>
|
||||
<td class="panelText">{%if dict_domain["ports"]%}{{ dict_domain["ports"] }}{%endif%}</td>
|
||||
<td class="panelText">
|
||||
{% for languages in dict_domain['languages'] %}
|
||||
{{languages}}
|
||||
{% endfor %}
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
|
73
var/www/templates/domains/block_languages_search.html
Normal file
73
var/www/templates/domains/block_languages_search.html
Normal file
|
@ -0,0 +1,73 @@
|
|||
<div class="card mb-3 mt-1">
|
||||
<div class="card-header text-white bg-dark">
|
||||
<h5 class="card-title mb-0">
|
||||
<i class="fas fa-language" style="font-size: 1.8rem;"></i> Domains by Languages :
|
||||
</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
|
||||
<div class="input-group">
|
||||
<div class="input-group-prepend">
|
||||
<button class="btn btn-outline-danger" type="button" id="button-clear" style="z-index: 1;" onclick="emptySearch()">
|
||||
<i class="fas fa-eraser"></i>
|
||||
</button>
|
||||
</div>
|
||||
<input id="llanguages" name="llanguages" type="text" class="form-control" aria-describedby="button-clear" autocomplete="off">
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<div class="custom-control custom-switch">
|
||||
<input class="custom-control-input" type="checkbox" name="domain_onion_switch" value="" id="domain_onion_switch" {%if 'onion' in domains_types%}checked{%endif%}>
|
||||
<label class="custom-control-label" for="domain_onion_switch">
|
||||
<span class="badge badge-danger"><i class="fas fa-user-secret"></i> Onion Domains</span>
|
||||
</label>
|
||||
</div>
|
||||
<div class="custom-control custom-switch">
|
||||
<input class="custom-control-input" type="checkbox" name="domain_regular_switch" value="True" id="domain_regular_switch"{%if 'regular' in domains_types%}checked{%endif%}>
|
||||
<label class="custom-control-label" for="domain_regular_switch">
|
||||
<span class="badge badge-warning"><i class="fab fa-html5"></i> Web Domains</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button class="btn btn-primary" type="button" id="button-search" onclick="searchLanguages()">
|
||||
<i class="fas fa-search"></i> Search
|
||||
</button>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<link href="{{ url_for('static', filename='css/tags.css') }}" rel="stylesheet" type="text/css" />
|
||||
<script src="{{ url_for('static', filename='js/tags.js') }}"></script>
|
||||
<script>
|
||||
var llanguages;
|
||||
$.getJSON("{{ url_for('crawler_splash.domains_all_languages_json') }}", //?object_type={{ object_type }}"
|
||||
function(data) {
|
||||
llanguages = $('#llanguages').tagSuggest({
|
||||
data: data,
|
||||
value: [{%if current_languages%}{% for language in current_languages %}'{{language|safe}}',{%endfor%}{%endif%}],
|
||||
sortOrder: 'name',
|
||||
maxDropHeight: 200,
|
||||
name: 'llanguages'
|
||||
});
|
||||
});
|
||||
|
||||
function searchLanguages() {
|
||||
var all_domain_types = ['onion', 'regular'] // TODO: load from flask
|
||||
var l_domains_types = [];
|
||||
|
||||
var data = llanguages.getValue();
|
||||
for (var i = 0; i < all_domain_types.length; i++) {
|
||||
if (document.getElementById('domain_'+ all_domain_types[i] +'_switch').checked) {
|
||||
l_domains_types.push(all_domain_types[i])
|
||||
}
|
||||
}
|
||||
var parameter = "?languages=" + data + "&domain_types=" + l_domains_types +"{%if page%}&page={{ page }}{%endif%}";
|
||||
window.location.href = "{{ url_for('crawler_splash.domains_search_languages_get') }}" + parameter;
|
||||
}
|
||||
function emptySearch() {
|
||||
llanguages.clear();
|
||||
}
|
||||
|
||||
</script>
|
|
@ -1,10 +1,10 @@
|
|||
{% for dict_domain in dict_data['list_elem'] %}
|
||||
{% for dict_domain in l_dict_domains %}
|
||||
|
||||
{% if loop.index0 % 4 == 0 %}
|
||||
<div class="card-deck mt-3">
|
||||
{% endif %}
|
||||
|
||||
<div class="card">
|
||||
<div class="card {% if dict_domain["status"] %}border-success{% else %}border-danger{% endif %}">
|
||||
<div class="text-center">
|
||||
<canvas id="canvas_{{loop.index0}}" style="max-height: 400px; max-width: 100%;"></canvas>
|
||||
</div>
|
||||
|
@ -13,24 +13,46 @@
|
|||
<a target="_blank" href="{{ url_for('crawler_splash.showDomain') }}?domain={{dict_domain["id"]}}">
|
||||
{{dict_domain["id"]}}
|
||||
</a>
|
||||
|
||||
{% if dict_domain["status"] %}
|
||||
<span style="color:Green;">
|
||||
<i class="fas fa-check-circle"></i> UP
|
||||
</span>
|
||||
{% else %}
|
||||
<span style="color:Red;">
|
||||
<i class="fas fa-times-circle"></i> DOWN
|
||||
</span>
|
||||
{% endif %}
|
||||
</h5>
|
||||
<div>
|
||||
<span class="badge badge-dark">
|
||||
<span data-toggle="tooltip" data-placement="top" title="Tooltip on top">
|
||||
<span class="badge badge-info" style="font-size: 0.8rem;">
|
||||
<i class="fas fa-hourglass-start"></i>
|
||||
</span>
|
||||
{{dict_domain["first_seen"]}}
|
||||
</span>
|
||||
<span class="badge badge-light mx-1" style="font-size: 1rem;">
|
||||
<i class="far fa-calendar-alt"></i>
|
||||
</span>
|
||||
{{dict_domain["first_seen"]}}
|
||||
<span class="badge badge-secondary" style="font-size: 0.8rem;">
|
||||
<i class="fas fa-hourglass-end"></i>
|
||||
</span>
|
||||
</span>
|
||||
</div>
|
||||
<p class="card-text">
|
||||
<small class="text-muted">
|
||||
First seen: {{dict_domain["first_seen"]}}<br>
|
||||
Last_seen: {{dict_domain["first_seen"]}}<br>
|
||||
Ports: {{dict_domain["ports"]}}
|
||||
Ports: {{dict_domain["ports"]}}<br>
|
||||
{% if dict_domain['languages'] %}
|
||||
Languages:
|
||||
{% for language in dict_domain['languages'] %}
|
||||
<span class="badge badge-secondary" style="font-size: 0.75rem;">{{ language }}</span>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
</small>
|
||||
</p>
|
||||
<small class="text-muted">Status: </small>
|
||||
{% if dict_domain["status"] %}
|
||||
<span style="color:Green;">
|
||||
<i class="fas fa-check-circle"></i> UP
|
||||
</span>
|
||||
{% else %}
|
||||
<span style="color:Red;">
|
||||
<i class="fas fa-times-circle"></i> DOWN
|
||||
</span>
|
||||
{% endif %}
|
||||
|
||||
<div>
|
||||
{% for tag in dict_domain['tags'] %}
|
||||
<a href="{{ url_for('tags_ui.get_obj_by_tags') }}?object_type=domain<ags={{ tag }}">
|
||||
|
@ -50,6 +72,6 @@
|
|||
|
||||
{% endfor %}
|
||||
|
||||
{% if dict_data['list_elem']|length % 4 != 0 %}
|
||||
{% if l_dict_domains|length % 4 != 0 %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
|
192
var/www/templates/domains/domains_filter_languages.html
Normal file
192
var/www/templates/domains/domains_filter_languages.html
Normal file
|
@ -0,0 +1,192 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Show Domain - AIL</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js') }}"></script>
|
||||
|
||||
<style>
|
||||
.card-columns {
|
||||
column-count: 4;
|
||||
}
|
||||
</style>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
<div class="row">
|
||||
<div class="col-12 col-lg-6">
|
||||
|
||||
{% include 'domains/block_languages_search.html' %}
|
||||
|
||||
|
||||
</div>
|
||||
<div class="col-12 col-xl-6">
|
||||
|
||||
<div class="card my-2 border-secondary" >
|
||||
<div class="card-body py-2">
|
||||
<div class="row">
|
||||
<div class="col-md-3 text-center">
|
||||
<button class="btn btn-primary" onclick="blocks.value=0;pixelate_all();">
|
||||
<i class="fas fa-eye-slash"></i>
|
||||
<span class="label-icon">Hide</span>
|
||||
</button>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="5">
|
||||
</div>
|
||||
<div class="col-md-3 text-center">
|
||||
<button class="btn btn-primary" onclick="blocks.value=50;pixelate_all();">
|
||||
<i class="fas fa-plus-square"></i>
|
||||
<span class="label-icon">Full resolution</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
{% with l_dict_domains=l_dict_domains['list_elem'], bootstrap_label=bootstrap_label %}
|
||||
{% include 'domains/card_img_domain.html' %}
|
||||
{% endwith %}
|
||||
|
||||
<br>
|
||||
<br>
|
||||
|
||||
{%if l_dict_domains['list_elem']%}
|
||||
{% with page=l_dict_domains['page'], nb_page_max=l_dict_domains['nb_pages'], nb_first_elem=l_dict_domains['nb_first_elem'], nb_last_elem=l_dict_domains['nb_last_elem'], nb_all_elem=l_dict_domains['nb_all_elem'] %}
|
||||
{% set target_url=url_for('crawler_splash.domains_search_languages_get') + "?languages=" + ','.join(current_languages)%}
|
||||
{%if domains_types %}
|
||||
{% set target_url = target_url + '&domain_types=' + ','.join(domains_types)%}
|
||||
{%endif%}
|
||||
{% include 'pagination.html' %}
|
||||
{% endwith %}
|
||||
{%endif%}
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$('#nav_title_domains_explorer').removeClass("text-muted");
|
||||
|
||||
});
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
$('#side_menu').removeClass('border-right')
|
||||
$('#side_menu').removeClass('col-lg-2')
|
||||
$('#core_content').removeClass('col-lg-10')
|
||||
}else{
|
||||
$('#nav_menu').show();
|
||||
$('#side_menu').addClass('border-right')
|
||||
$('#side_menu').addClass('col-lg-2')
|
||||
$('#core_content').addClass('col-lg-10')
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
|
||||
// img_url
|
||||
// ctx
|
||||
// canevas_id
|
||||
|
||||
var dict_canevas_blurr_img = {}
|
||||
|
||||
function init_canevas_blurr_img(canevas_id, img_url){
|
||||
|
||||
// ctx, turn off image smoothin
|
||||
dict_canevas_blurr_img[canevas_id] = {}
|
||||
var canvas_container = document.getElementById(canevas_id);
|
||||
var ctx = canvas_container.getContext('2d');
|
||||
ctx.webkitImageSmoothingEnabled = false;
|
||||
ctx.imageSmoothingEnabled = false;
|
||||
dict_canevas_blurr_img[canevas_id]["ctx"] = ctx;
|
||||
|
||||
// img
|
||||
dict_canevas_blurr_img[canevas_id]["img"] = new Image();
|
||||
dict_canevas_blurr_img[canevas_id]["img"].onload = function() {pixelate_img(canevas_id);};
|
||||
dict_canevas_blurr_img[canevas_id]["img"].addEventListener("error", function() {img_error(canevas_id);});
|
||||
dict_canevas_blurr_img[canevas_id]["img"].src = img_url;
|
||||
}
|
||||
|
||||
function pixelate_all(){
|
||||
Object.entries(dict_canevas_blurr_img).forEach(([key, value]) => {
|
||||
pixelate_img(key);
|
||||
});
|
||||
}
|
||||
|
||||
function pixelate_img(canevas_id) {
|
||||
|
||||
if (typeof canevas_id !== 'undefined') {
|
||||
var canevas_to_blurr = document.getElementById(canevas_id);
|
||||
|
||||
/// use slider value
|
||||
if( blocks.value == 50 ){
|
||||
size = 1;
|
||||
} else {
|
||||
var size = (blocks.value) * 0.01;
|
||||
}
|
||||
|
||||
canevas_to_blurr.width = dict_canevas_blurr_img[canevas_id]["img"].width;
|
||||
canevas_to_blurr.height = dict_canevas_blurr_img[canevas_id]["img"].height;
|
||||
|
||||
/// cache scaled width and height
|
||||
w = canevas_to_blurr.width * size;
|
||||
h = canevas_to_blurr.height * size;
|
||||
|
||||
/// draw original image to the scaled size
|
||||
dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(dict_canevas_blurr_img[canevas_id]["img"], 0, 0, w, h);
|
||||
|
||||
/// pixelated
|
||||
dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(canevas_to_blurr, 0, 0, w, h, 0, 0, canevas_to_blurr.width, canevas_to_blurr.height);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function img_error(canevas_id) {
|
||||
dict_canevas_blurr_img[canevas_id]["img"].onerror=null;
|
||||
dict_canevas_blurr_img[canevas_id]["img"].src="{{ url_for('static', filename='image/AIL.png') }}";
|
||||
}
|
||||
|
||||
blocks.addEventListener('change', pixelate_all, false);
|
||||
|
||||
{% for dict_domain in l_dict_domains['list_elem'] %}
|
||||
{% if 'screenshot' in dict_domain %}
|
||||
{% if dict_domain['is_tags_safe'] %}
|
||||
var screenshot_url = "{{ url_for('showsavedpastes.screenshot', filename="") }}{{dict_domain['screenshot']}}";
|
||||
{% else %}
|
||||
var screenshot_url = "{{ url_for('static', filename='image/AIL.png') }}";
|
||||
{% endif %}
|
||||
init_canevas_blurr_img("canvas_{{loop.index0}}", screenshot_url);
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
</script>
|
||||
|
||||
</html>
|
Loading…
Reference in a new issue