fix: [domain explorer] fix search by daterange

This commit is contained in:
Terrtia 2023-04-24 10:55:58 +02:00
parent 1ed4875b55
commit 10fbf228c1
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
3 changed files with 45 additions and 33 deletions

View file

@ -478,7 +478,7 @@ def get_all_domains_languages():
def get_domains_by_languages(languages, domain_types): def get_domains_by_languages(languages, domain_types):
if len(languages) == 1: if len(languages) == 1:
if len(domain_types) == 1: if len(domain_types) == 1:
return r_crawler.smembers(f'language:domains:{domain_type[0]}:{languages[0]}') return r_crawler.smembers(f'language:domains:{domain_types[0]}:{languages[0]}')
else: else:
l_keys = [] l_keys = []
for domain_type in domain_types: for domain_type in domain_types:
@ -523,6 +523,15 @@ def get_domains_down_by_date(date, domain_type):
return r_crawler.smembers(f'{domain_type}_down:{date}') return r_crawler.smembers(f'{domain_type}_down:{date}')
def get_domains_by_daterange(date_from, date_to, domain_type, up=True, down=False): def get_domains_by_daterange(date_from, date_to, domain_type, up=True, down=False):
domains = []
for date in Date.substract_date(date_from, date_to):
if up:
domains.extend(get_domains_up_by_date(date, domain_type))
if down:
domains.extend(get_domains_down_by_date(date, domain_type))
return domains
def get_domains_dates_by_daterange(date_from, date_to, domain_type, up=True, down=False):
date_domains = {} date_domains = {}
for date in Date.substract_date(date_from, date_to): for date in Date.substract_date(date_from, date_to):
domains = [] domains = []
@ -541,21 +550,26 @@ def get_domains_meta(domains):
metas.append(dom.get_meta()) metas.append(dom.get_meta())
return metas return metas
# TODO HANDLE ALL MULTIPLE DOMAIN TYPES
# TODO ADD TAGS FILTER # TODO ADD TAGS FILTER
def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[], nb_obj=28, page=1): def get_domains_up_by_filers(domain_types, date_from=None, date_to=None, tags=[], nb_obj=28, page=1):
if not domain_types:
domain_types = ['onion', 'web']
if not tags: if not tags:
domains = []
if not date_from and not date_to: if not date_from and not date_to:
domains = sorted(get_domains_up_by_type(domain_type)) for domain_type in domain_types:
domains[0:0] = get_domains_up_by_type(domain_type)
else: else:
domains = sorted(get_domains_by_daterange(date_from, date_to, domain_type)) for domain_type in domain_types:
domains[0:0] = get_domains_by_daterange(date_from, date_to, domain_type)
domains = sorted(domains)
domains = paginate_iterator(domains, nb_obj=nb_obj, page=page) domains = paginate_iterator(domains, nb_obj=nb_obj, page=page)
meta = [] meta = []
for dom in domains['list_elem']: for dom in domains['list_elem']:
domain = Domain(dom) domain = Domain(dom)
meta.append(domain.get_meta(options={'languages', 'screenshot', 'tags_safe'})) meta.append(domain.get_meta(options={'languages', 'screenshot', 'tags_safe'}))
domains['list_elem'] = meta domains['list_elem'] = meta
domains['domain_type'] = domain_type domains['domain_types'] = domain_types
if date_from: if date_from:
domains['date_from'] = date_from domains['date_from'] = date_from
if date_to: if date_to:

View file

@ -358,12 +358,11 @@ def domains_explorer_post_filter():
date_from = None date_from = None
date_to = None date_to = None
# TODO SEARCH BOTH if domain_onion and domain_regular:
# if domain_onion and domain_regular: if date_from and date_to:
# if date_from and date_to: return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to))
# return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to)) else:
# else: return redirect(url_for('crawler_splash.domains_explorer_all'))
# return redirect(url_for('crawler_splash.domains_explorer_all'))
if domain_regular: if domain_regular:
if date_from and date_to: if date_from and date_to:
return redirect(url_for('crawler_splash.domains_explorer_web', date_from=date_from, date_to=date_to)) return redirect(url_for('crawler_splash.domains_explorer_web', date_from=date_from, date_to=date_to))
@ -376,22 +375,21 @@ def domains_explorer_post_filter():
return redirect(url_for('crawler_splash.domains_explorer_onion')) return redirect(url_for('crawler_splash.domains_explorer_onion'))
# TODO TEMP DISABLE @crawler_splash.route('/domains/explorer/all', methods=['GET'])
# @crawler_splash.route('/domains/explorer/all', methods=['GET']) @login_required
# @login_required @login_read_only
# @login_read_only def domains_explorer_all():
# def domains_explorer_all(): page = request.args.get('page')
# page = request.args.get('page') date_from = request.args.get('date_from')
# date_from = request.args.get('date_from') date_to = request.args.get('date_to')
# date_to = request.args.get('date_to') try:
# try: page = int(page)
# page = int(page) except:
# except: page = 1
# page = 1
# dict_data = Domains.get_domains_up_by_filers(['onion', 'web'], page=page, date_from=date_from, date_to=date_to)
# dict_data = Domain.get_domains_up_by_filers('all', page=page, date_from=date_from, date_to=date_to) return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='all')
# return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='all')
#
@crawler_splash.route('/domains/explorer/onion', methods=['GET']) @crawler_splash.route('/domains/explorer/onion', methods=['GET'])
@login_required @login_required
@ -405,7 +403,7 @@ def domains_explorer_onion():
except: except:
page = 1 page = 1
dict_data = Domains.get_domains_up_by_filers('onion', page=page, date_from=date_from, date_to=date_to) dict_data = Domains.get_domains_up_by_filers(['onion'], page=page, date_from=date_from, date_to=date_to)
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
domain_type='onion') domain_type='onion')
@ -422,7 +420,7 @@ def domains_explorer_web():
except: except:
page = 1 page = 1
dict_data = Domains.get_domains_up_by_filers('web', page=page, date_from=date_from, date_to=date_to) dict_data = Domains.get_domains_up_by_filers(['web'], page=page, date_from=date_from, date_to=date_to)
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
domain_type='regular') domain_type='regular')
@ -495,7 +493,7 @@ def domains_search_date():
# page = request.args.get('page') # page = request.args.get('page')
date = Date.sanitise_date_range(date_from, date_to) date = Date.sanitise_date_range(date_from, date_to)
domains_date = Domains.get_domains_by_daterange(date['date_from'], date['date_to'], domain_type) domains_date = Domains.get_domains_dates_by_daterange(date['date_from'], date['date_to'], domain_type)
dict_domains = {} dict_domains = {}
for d in domains_date: for d in domains_date:
dict_domains[d] = Domains.get_domains_meta(domains_date[d]) dict_domains[d] = Domains.get_domains_meta(domains_date[d])

View file

@ -8,11 +8,11 @@
<div class="input-group" id="date-range-from"> <div class="input-group" id="date-range-from">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div> <div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{% if date_from %}{{ date_from }}{% endif %}" name="date_from" autocomplete="off"> <input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{% if date_from %}{{ date_from[0:4] }}-{{ date_from[4:6] }}-{{ date_from[6:8] }}{% endif %}" name="date_from" autocomplete="off">
</div> </div>
<div class="input-group" id="date-range-to"> <div class="input-group" id="date-range-to">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div> <div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{% if date_to %}{{ date_to }}{% endif %}" name="date_to" autocomplete="off"> <input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{% if date_to %}{{ date_to[0:4] }}-{{ date_to[4:6] }}-{{ date_to[6:8] }}{% endif %}" name="date_to" autocomplete="off">
</div> </div>
</div> </div>
<div class="col-6"> <div class="col-6">