chg: [domains search] search domains by name

This commit is contained in:
Terrtia 2021-02-05 17:42:33 +01:00
parent a1fe49192b
commit d941d8abb4
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
8 changed files with 338 additions and 19 deletions

View file

@ -10,9 +10,10 @@ The ``Domain``
import os
import sys
import itertools
import time
import re
import redis
import random
import time
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Cryptocurrency
@ -241,6 +242,52 @@ def create_domains_metadata_list(list_domains, domain_type):
ports=True, tags=True, languages=True, screenshot=True, tags_safe=True))
return l_domains
def sanithyse_domain_name_to_search(name_to_search, domain_type):
if domain_type == 'onion':
r_name = r'[a-z0-9\.]+'
else:
r_name = r'[a-zA-Z0-9\.-_]+'
# invalid domain name
if not re.fullmatch(r_name, name_to_search):
return None
return name_to_search.replace('.', '\.')
def search_domains_by_name(name_to_search, domain_types, r_pos=False):
domains_dict = {}
for domain_type in domain_types:
r_name = sanithyse_domain_name_to_search(name_to_search, domain_type)
if not name_to_search:
break
r_name = re.compile(r_name)
for domain in get_all_domains_up(domain_type):
res = re.search(r_name, domain)
if res:
domains_dict[domain] = {}
if r_pos:
domains_dict[domain]['hl-start'] = res.start()
domains_dict[domain]['hl-end'] = res.end()
return domains_dict
def api_search_domains_by_name(name_to_search, domains_types, domains_metadata=False, page=1):
domains_types = sanitize_domain_types(domains_types)
domains_dict = search_domains_by_name(name_to_search, domains_types, r_pos=True)
l_domains = sorted(domains_dict.keys())
l_domains = paginate_iterator(l_domains, nb_obj=28, page=page)
if not domains_metadata:
return l_domains
else:
l_dict_domains = []
for domain in l_domains['list_elem']:
dict_domain = get_domain_metadata(domain, get_domain_type(domain), first_seen=True, last_ckeck=True,
status=True, ports=True, tags=True, tags_safe=True,
languages=True, screenshot=True)
dict_domain = {**domains_dict[domain], **dict_domain}
l_dict_domains.append(dict_domain)
l_domains['list_elem'] = l_dict_domains
l_domains['search'] = name_to_search
return l_domains
######## LANGUAGES ########
def get_all_domains_languages():
@ -940,3 +987,6 @@ class Domain(object):
'''
port = sanathyse_port(port, self.domain, self.type, strict=True, current_port=self.current_port)
return get_domain_items_crawled(self.domain, self.type, port, epoch=epoch, items_link=items_link, item_screenshot=item_screenshot, item_tag=item_tag)
if __name__ == '__main__':
search_domains_by_name('c', 'onion')

View file

@ -47,6 +47,20 @@ faup = Faup()
def generate_uuid():
return str(uuid.uuid4()).replace('-', '')
def is_valid_onion_domain(domain):
if not domain.endswith('.onion'):
return False
domain = domain.replace('.onion', '', 1)
if len(domain) == 16: # v2 address
r_onion = r'[a-z0-9]{16}'
if re.match(r_onion, domain):
return True
elif len(domain) == 56: # v3 address
r_onion = r'[a-z0-9]{56}'
if re.fullmatch(r_onion, domain):
return True
return False
################################################################################
# # TODO: handle prefix cookies

View file

@ -232,6 +232,25 @@ def domains_search_languages_get():
l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label,
current_languages=languages, domains_types=domains_types)
@crawler_splash.route('/domains/name/search', methods=['GET'])
@login_required
@login_analyst
def domains_search_name():
name = request.args.get('name')
page = request.args.get('page')
try:
page = int(page)
except:
page = 1
domains_types = request.args.getlist('domain_types')
if domains_types:
domains_types = domains_types[0].split(',')
l_dict_domains = Domain.api_search_domains_by_name(name, domains_types, domains_metadata=True, page=page)
return render_template("domains/domains_result_list.html", template_folder='../../',
l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label,
domains_types=domains_types)
##-- --##

View file

@ -105,23 +105,7 @@
</div>
</div>
<div class="d-flex justify-content-center my-4">
<div class="card border-secondary" style="max-width: 40rem;">
<div class="card-body text-dark">
<h5 class="card-title">Show Domain:</h5>
<form class="" action="{{url_for('crawler_splash.showDomain')}}" method="post">
<div class="input-group mb-3">
<input type="text" class="form-control" style="min-width: 30rem;" placeholder="Domain name" aria-label="Domain name" aria-describedby="btn_show_domain" id="in_show_domain" , name="in_show_domain">
<div class="input-group-append">
<button class="btn btn-info" type="submit" id="btn_show_domain">
<i class="fas fa-search"></i>
</button>
</div>
</div>
</form>
</div>
</div>
</div>
{% include 'domains/block_domains_name_search.html' %}
<hr>

View file

@ -0,0 +1,7 @@
.hg-text{
padding-top: 0.2em;
padding-bottom: 0.2em;
padding-right: 0.15em;
padding-left: 0.15em;
background-color: #2e5;
}

View file

@ -0,0 +1,50 @@
<div class="d-flex justify-content-center my-4">
<div class="card border-secondary" style="max-width: 40rem;">
<div class="card-body text-dark">
<h5 class="card-title">Search Domain by name:</h5>
<div class="input-group mb-3">
<input type="text" class="form-control" id="in_search_name" value="{{search}}" style="min-width: 30rem;" placeholder="Domain name" aria-label="Domain name" aria-describedby="btn_show_domain">
<div class="input-group-append">
<button class="btn btn-info" type="button" id="btn_search_name" onclick="searchDomainName()">
<i class="fas fa-search"></i>
</button>
</div>
</div>
<div class="mb-3">
<div class="custom-control custom-switch">
<input class="custom-control-input" type="checkbox" name="domain_onion_switch" value="" id="domain_onion_switch" {%if 'onion' in domains_types or not domains_types%}checked{%endif%}>
<label class="custom-control-label" for="domain_onion_switch">
<span class="badge badge-danger"><i class="fas fa-user-secret"></i> Onion Domains</span>
</label>
</div>
<div class="custom-control custom-switch">
<input class="custom-control-input" type="checkbox" name="domain_regular_switch" value="True" id="domain_regular_switch"{%if 'regular' in domains_types%}checked{%endif%}>
<label class="custom-control-label" for="domain_regular_switch">
<span class="badge badge-warning"><i class="fab fa-html5"></i> Web Domains</span>
</label>
</div>
</div>
</div>
</div>
</div>
<script>
function searchDomainName() {
var all_domain_types = ['onion', 'regular'] // TODO: load from flask
var l_domains_types = [];
console.log(document.getElementById('in_search_name'));
var data = document.getElementById('in_search_name').value;
for (var i = 0; i < all_domain_types.length; i++) {
if (document.getElementById('domain_'+ all_domain_types[i] +'_switch').checked) {
l_domains_types.push(all_domain_types[i])
}
}
var parameter = "?name=" + data + "&domain_types=" + l_domains_types +"{%if page%}&page={{ page }}{%endif%}";
window.location.href = "{{ url_for('crawler_splash.domains_search_name') }}" + parameter;
}
</script>

View file

@ -12,7 +12,7 @@
<h5 class="card-title">
<a target="_blank" href="{{ url_for('crawler_splash.showDomain') }}?domain={{dict_domain["id"]}}">
{% if 'hl-start' in dict_domain %}
{{dict_domain["id"][:dict_domain['hl-start']]}}<mark>{{dict_domain["id"][dict_domain['hl-start']:dict_domain['hl-end']]}}</mark>{{dict_domain["id"][dict_domain['hl-end']:]}}
{{dict_domain["id"][:dict_domain['hl-start']]}}<span class="hg-text">{{dict_domain["id"][dict_domain['hl-start']:dict_domain['hl-end']]}}</span>{{dict_domain["id"][dict_domain['hl-end']:]}}
{% else %}
{{dict_domain["id"]}}
{% endif %}

View file

@ -0,0 +1,195 @@
<!DOCTYPE html>
<html>
<head>
<title>Domain Search - AIL</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/ail-project.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js') }}"></script>
<style>
.card-columns {
column-count: 4;
}
</style>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="row">
<div class="col-12 col-lg-6">
{% with page=l_dict_domains['page'], search=l_dict_domains['search'] %}
{% include 'domains/block_domains_name_search.html' %}
{% endwith %}
</div>
<div class="col-12 col-xl-6">
<div class="card my-2 border-secondary" >
<div class="card-body py-2">
<div class="row">
<div class="col-md-3 text-center">
<button class="btn btn-primary" onclick="blocks.value=0;pixelate_all();">
<i class="fas fa-eye-slash"></i>
<span class="label-icon">Hide</span>
</button>
</div>
<div class="col-md-6">
<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="5">
</div>
<div class="col-md-3 text-center">
<button class="btn btn-primary" onclick="blocks.value=50;pixelate_all();">
<i class="fas fa-plus-square"></i>
<span class="label-icon">Full resolution</span>
</button>
</div>
</div>
</div>
</div>
</div>
</div>
{% with l_dict_domains=l_dict_domains['list_elem'], bootstrap_label=bootstrap_label %}
{% include 'domains/card_img_domain.html' %}
{% endwith %}
<br>
<br>
{%if l_dict_domains['list_elem']%}
{% with page=l_dict_domains['page'], nb_page_max=l_dict_domains['nb_pages'], nb_first_elem=l_dict_domains['nb_first_elem'], nb_last_elem=l_dict_domains['nb_last_elem'], nb_all_elem=l_dict_domains['nb_all_elem'] %}
{% set target_url=url_for('crawler_splash.domains_search_name') + "?name=" + l_dict_domains['search']%}
{%if domains_types %}
{% set target_url = target_url + '&domain_types=' + ','.join(domains_types)%}
{%endif%}
{% include 'pagination.html' %}
{% endwith %}
{%endif%}
</div>
</div>
</div>
</body>
<script>
$(document).ready(function(){
$('#nav_title_domains_explorer').removeClass("text-muted");
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
<script>
// img_url
// ctx
// canevas_id
var dict_canevas_blurr_img = {}
function init_canevas_blurr_img(canevas_id, img_url){
// ctx, turn off image smoothin
dict_canevas_blurr_img[canevas_id] = {}
var canvas_container = document.getElementById(canevas_id);
var ctx = canvas_container.getContext('2d');
ctx.webkitImageSmoothingEnabled = false;
ctx.imageSmoothingEnabled = false;
dict_canevas_blurr_img[canevas_id]["ctx"] = ctx;
// img
dict_canevas_blurr_img[canevas_id]["img"] = new Image();
dict_canevas_blurr_img[canevas_id]["img"].onload = function() {pixelate_img(canevas_id);};
dict_canevas_blurr_img[canevas_id]["img"].addEventListener("error", function() {img_error(canevas_id);});
dict_canevas_blurr_img[canevas_id]["img"].src = img_url;
}
function pixelate_all(){
Object.entries(dict_canevas_blurr_img).forEach(([key, value]) => {
pixelate_img(key);
});
}
function pixelate_img(canevas_id) {
if (typeof canevas_id !== 'undefined') {
var canevas_to_blurr = document.getElementById(canevas_id);
/// use slider value
if( blocks.value == 50 ){
size = 1;
} else {
var size = (blocks.value) * 0.01;
}
canevas_to_blurr.width = dict_canevas_blurr_img[canevas_id]["img"].width;
canevas_to_blurr.height = dict_canevas_blurr_img[canevas_id]["img"].height;
/// cache scaled width and height
w = canevas_to_blurr.width * size;
h = canevas_to_blurr.height * size;
/// draw original image to the scaled size
dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(dict_canevas_blurr_img[canevas_id]["img"], 0, 0, w, h);
/// pixelated
dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(canevas_to_blurr, 0, 0, w, h, 0, 0, canevas_to_blurr.width, canevas_to_blurr.height);
}
}
function img_error(canevas_id) {
dict_canevas_blurr_img[canevas_id]["img"].onerror=null;
dict_canevas_blurr_img[canevas_id]["img"].src="{{ url_for('static', filename='image/AIL.png') }}";
}
blocks.addEventListener('change', pixelate_all, false);
{% for dict_domain in l_dict_domains['list_elem'] %}
{% if 'screenshot' in dict_domain %}
{% if dict_domain['is_tags_safe'] %}
var screenshot_url = "{{ url_for('showsavedpastes.screenshot', filename="") }}{{dict_domain['screenshot']}}";
{% else %}
var screenshot_url = "{{ url_for('static', filename='image/AIL.png') }}";
{% endif %}
init_canevas_blurr_img("canvas_{{loop.index0}}", screenshot_url);
{% endif %}
{% endfor %}
</script>
</html>