diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py
index fd8ac372..89d39870 100755
--- a/bin/lib/Domain.py
+++ b/bin/lib/Domain.py
@@ -11,6 +11,7 @@ import os
import sys
import time
import redis
+import random
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Correlation
@@ -32,6 +33,34 @@ def get_domain_type(domain):
else:
return 'regular'
+def sanathyse_port(port, domain, domain_type, strict=False, current_port=None):
+ '''
+ Retun a port number, If the port number is invalid, a port of the provided domain is randomly selected
+ '''
+ try:
+ port = int(port)
+ except (TypeError, ValueError):
+ if strict:
+ port = current_port
+ else:
+ port = get_random_domain_port(domain, domain_type)
+ return port
+
+def is_domain_up(domain, domain_type):
+ return r_serv_onion.hexists('{}_metadata:{}'.format(domain_type, domain), 'ports')
+
+def get_domain_all_ports(domain, domain_type):
+ '''
+ Return a list of all crawled ports
+ '''
+ l_ports = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'ports')
+ if l_ports:
+ return l_ports.split(";")
+ return []
+
+def get_random_domain_port(domain, domain_type):
+ return random.choice(get_domain_all_ports(domain, domain_type))
+
def get_all_domain_up_by_type(domain_type):
if domain_type in domains:
list_domain = list(r_serv_onion.smembers('full_{}_up'.format(domain_type)))
@@ -52,6 +81,53 @@ def get_domain_item_children(domain, root_item_id):
all_items.extend(get_domain_item_children(domain, item_id))
return all_items
+def get_domain_last_crawled_item_root(domain, domain_type, port):
+ '''
+ Retun last_crawled_item_core dict
+ '''
+ res = r_serv_onion.zrevrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), 0, 0, withscores=True)
+ if res:
+ return {"root_item": res[0][0], "epoch": int(res[0][1])}
+ else:
+ return {}
+
+def get_domain_crawled_item_root(domain, domain_type, port, epoch=None):
+ '''
+ Retun the first item crawled for a given domain:port (and epoch)
+ '''
+ if epoch:
+ res = r_serv_onion.zrevrangebyscore('crawler_history_{}:{}:{}'.format(domain_type, domain, port), int(epoch), int(epoch))
+ if res:
+ return {"root_item": res[0], "epoch": int(epoch)}
+ # invalid epoch
+ epoch = None
+
+ if not epoch:
+ return get_domain_last_crawled_item_root(domain, domain_type, port)
+
+
+def get_domain_items_crawled(domain, domain_type, port, epoch=None, items_link=False, item_screenshot=False, item_tag=False):
+ '''
+
+ '''
+ item_crawled = {}
+ item_root = get_domain_crawled_item_root(domain, domain_type, port, epoch=epoch)
+ if item_root:
+ item_crawled['port'] = port
+ item_crawled['epoch'] = item_root['epoch']
+ item_crawled['date'] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(item_root['epoch']))
+ item_crawled['items'] = []
+ for item in get_domain_items(domain, item_root['root_item']):
+ dict_item = {"id": item}
+ if items_link:
+ dict_item['link'] = Item.get_item_link(item)
+ if item_screenshot:
+ dict_item['screenshot'] = Item.get_item_screenshot(item)
+ if item_tag:
+ dict_item['tags'] = Tag.get_item_tags_minimal(item)
+ item_crawled['items'].append(dict_item)
+ return item_crawled
+
def get_link_tree():
pass
@@ -65,7 +141,7 @@ def get_domain_tags(domain):
'''
return Tag.get_item_tags(domain)
-def get_domain_cryptocurrency(domain, currencies_type=None):
+def get_domain_cryptocurrency(domain, currencies_type=None, get_nb=False):
'''
Retun all cryptocurrencies of a given domain.
@@ -73,9 +149,9 @@ def get_domain_cryptocurrency(domain, currencies_type=None):
:param currencies_type: list of cryptocurrencies type
:type currencies_type: list, optional
'''
- return cryptocurrency.get_domain_correlation_dict(domain, correlation_type=currencies_type)
+ return cryptocurrency.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb)
-def get_domain_pgp(domain, currencies_type=None):
+def get_domain_pgp(domain, currencies_type=None, get_nb=False):
'''
Retun all pgp of a given domain.
@@ -83,9 +159,9 @@ def get_domain_pgp(domain, currencies_type=None):
:param currencies_type: list of pgp type
:type currencies_type: list, optional
'''
- return pgp.get_domain_correlation_dict(domain, correlation_type=currencies_type)
+ return pgp.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb)
-def get_domain_all_correlation(domain, correlation_type=None):
+def get_domain_all_correlation(domain, correlation_type=None, get_nb=False):
'''
Retun all correlation of a given domain.
@@ -96,10 +172,10 @@ def get_domain_all_correlation(domain, correlation_type=None):
:rtype: dict
'''
domain_correl = {}
- res = get_domain_cryptocurrency(domain)
+ res = get_domain_cryptocurrency(domain, get_nb=get_nb)
if res:
domain_correl['cryptocurrency'] = res
- res = get_domain_pgp(domain)
+ res = get_domain_pgp(domain, get_nb=get_nb)
if res:
domain_correl['pgp'] = res
return domain_correl
@@ -141,13 +217,26 @@ def get_domain_history_with_status(domain, domain_type, port): # TODO: add date_
l_history.append({"epoch": epoch_val, "date": time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch_val)), "status": status})
return l_history
+def verify_if_domain_exist(domain):
+ return r_serv_onion.exists('{}_metadata:{}'.format(get_domain_type(domain), domain))
+
+def api_verify_if_domain_exist(domain):
+ if not verify_if_domain_exist(domain):
+ return ({'status': 'error', 'reason': 'Unknow Domain'}, 404)
+ else:
+ return None
class Domain(object):
"""docstring for Domain."""
- def __init__(self, domain, port=80):
+ def __init__(self, domain, port=None):
self.domain = str(domain)
self.type = get_domain_type(domain)
+ if self.is_domain_up():
+ self.current_port = sanathyse_port(port, self.domain, self.type)
+
+ def get_current_port(self):
+ return self.current_port
def get_domain_first_seen(self):
'''
@@ -161,7 +250,7 @@ class Domain(object):
first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8])
return first_seen
- def get_domain_last_check(self):# # TODO: add epoch ???
+ def get_domain_last_check(self):
'''
Get domain last check date
@@ -173,10 +262,16 @@ class Domain(object):
last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8])
return last_check
- #def get_domain_all_ports(self):
- # pass
+ def is_domain_up(self): # # TODO: handle multiple ports
+ '''
+ Return True if this domain is UP
+ '''
+ return is_domain_up(self.domain, self.type)
- def get_domain_metadata(self, first_seen=True, last_ckeck=True, ports=True):
+ def get_domain_all_ports(self):
+ return get_domain_all_ports(self.domain, self.type)
+
+ def get_domain_metadata(self, first_seen=True, last_ckeck=True, status=True, ports=True):
'''
Get Domain basic metadata
@@ -199,6 +294,10 @@ class Domain(object):
res = self.get_domain_last_check()
if res is not None:
dict_metadata['last_check'] = res
+ if status:
+ dict_metadata['status'] = self.is_domain_up()
+ if ports:
+ dict_metadata['ports'] = self.get_domain_all_ports()
return dict_metadata
def get_domain_tags(self):
@@ -213,10 +312,17 @@ class Domain(object):
'''
Retun all cryptocurrencies of a given domain.
'''
- return get_domain_all_correlation(self.domain)
+ return get_domain_all_correlation(self.domain, get_nb=True)
def get_domain_history_with_status(self):
'''
Retun the full history of a given domain and port.
'''
return get_domain_history_with_status(self.domain, self.type, 80)
+
+ def get_domain_items_crawled(self, port=None, epoch=None, items_link=False, item_screenshot=False, item_tag=False):
+ '''
+ Return ........................
+ '''
+ port = sanathyse_port(port, self.domain, self.type, strict=True, current_port=self.current_port)
+ return get_domain_items_crawled(self.domain, self.type, port, epoch=epoch, items_link=items_link, item_screenshot=item_screenshot, item_tag=item_tag)
diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py
index ee339fb2..84569724 100755
--- a/bin/packages/Correlation.py
+++ b/bin/packages/Correlation.py
@@ -126,7 +126,7 @@ class Correlation(object):
else:
return []
- def get_domain_correlation_dict(self, domain, correlation_type=None):
+ def get_domain_correlation_dict(self, domain, correlation_type=None, get_nb=False):
'''
Return all correlation of a given domain.
@@ -143,6 +143,8 @@ class Correlation(object):
res = self._get_domain_correlation_obj(domain, correl)
if res:
dict_correlation[correl] = res
+ if get_nb:
+ dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl])
return dict_correlation
diff --git a/bin/packages/Item.py b/bin/packages/Item.py
index 680b8f97..b142ba7b 100755
--- a/bin/packages/Item.py
+++ b/bin/packages/Item.py
@@ -177,6 +177,14 @@ def is_item_in_domain(domain, item_id):
def get_item_domain(item_id):
return item_id[19:-36]
-
def get_item_children(item_id):
return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id)))
+
+def get_item_link(item_id):
+ return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'real_link')
+
+def get_item_screenshot(item_id):
+ screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot')
+ if screenshot:
+ return os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot[8:10], screenshot[10:12], screenshot[12:])
+ return ''
diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py
index 00c59cfa..44ac43b2 100755
--- a/bin/packages/Tag.py
+++ b/bin/packages/Tag.py
@@ -93,6 +93,22 @@ def get_item_tags(item_id):
else:
return []
+def get_min_tag(tag):
+ tag = tag.split('=')
+ if len(tag) > 1:
+ if tag[1] != '':
+ tag = tag[1][1:-1]
+ # no value
+ else:
+ tag = tag[0][1:-1]
+ # custom tags
+ else:
+ tag = tag[0]
+ return tag
+
+def get_item_tags_minimal(item_id):
+ return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_item_tags(item_id) ]
+
# TEMPLATE + API QUERY
def add_items_tag(tags=[], galaxy_tags=[], item_id=None):
res_dict = {}
diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py
index 364a84d9..c996a49a 100644
--- a/var/www/blueprints/crawler_splash.py
+++ b/var/www/blueprints/crawler_splash.py
@@ -7,6 +7,8 @@
import os
import sys
+import json
+import random
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response
from flask_login import login_required, current_user, login_user, logout_user
@@ -34,7 +36,9 @@ crawler_splash = Blueprint('crawler_splash', __name__, template_folder=os.path.j
# ============ FUNCTIONS ============
-
+def api_validator(api_response):
+ if api_response:
+ return Response(json.dumps(api_response[0], indent=2, sort_keys=True), mimetype='application/json'), api_response[1]
# ============= ROUTES ==============
@@ -46,14 +50,19 @@ def showDomain():
epoch = request.args.get('epoch')
port = request.args.get('port')
- domain = Domain.Domain(domain_name)
+ res = api_validator(Domain.api_verify_if_domain_exist(domain_name))
+ if res:
+ return res
+
+ domain = Domain.Domain(domain_name, port=port)
dict_domain = domain.get_domain_metadata()
- dict_domain = {**dict_domain, **domain.get_domain_correlation()}
dict_domain['domain'] = domain_name
- dict_domain['tags'] = domain.get_domain_tags()
- dict_domain['history'] = domain.get_domain_history_with_status()
+ if domain.is_domain_up():
+ dict_domain = {**dict_domain, **domain.get_domain_correlation()}
+ dict_domain['tags'] = domain.get_domain_tags()
+ dict_domain['history'] = domain.get_domain_history_with_status()
+ dict_domain['crawler_history'] = domain.get_domain_items_crawled(items_link=True, epoch=epoch, item_screenshot=True, item_tag=True) # # TODO: handle multiple port
+ dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items'])
- print(dict_domain)
-
- return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label, screenshot={'item': None, '':None}, dict_links={})
+ return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label)
diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py
index f64df4e1..3daa76b8 100644
--- a/var/www/modules/hiddenServices/Flask_hiddenServices.py
+++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py
@@ -730,91 +730,6 @@ def show_domains_by_daterange():
date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down,
domains_tags=domains_tags, type=service_type, bootstrap_label=bootstrap_label)
-@hiddenServices.route("/crawlers/show_domain", methods=['GET'])
-@login_required
-@login_analyst
-def show_domain():
- domain = request.args.get('domain')
- epoch = request.args.get('epoch')
- try:
- epoch = int(epoch)
- except:
- epoch = None
- port = request.args.get('port')
- faup.decode(domain)
- unpack_url = faup.get()
-
- ## TODO: # FIXME: remove me
- try:
- domain = unpack_url['domain'].decode()
- except:
- domain = unpack_url['domain']
-
- if not port:
- if unpack_url['port']:
- try:
- port = unpack_url['port'].decode()
- except:
- port = unpack_url['port']
- else:
- port = 80
- try:
- port = int(port)
- except:
- port = 80
- type = get_type_domain(domain)
- if domain is None or not r_serv_onion.exists('{}_metadata:{}'.format(type, domain)):
- return '404'
- # # TODO: FIXME return 404
-
- last_check = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'last_check')
- if last_check is None:
- last_check = '********'
- last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8])
- first_seen = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'first_seen')
- if first_seen is None:
- first_seen = '********'
- first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8])
- ports = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'ports')
- origin_paste = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'paste_parent')
-
- h = HiddenServices(domain, type, port=port)
- item_core = h.get_domain_crawled_core_item(epoch=epoch)
- if item_core:
- l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item'])
- else:
- l_pastes = []
- dict_links = h.get_all_links(l_pastes)
- if l_pastes:
- status = True
- else:
- status = False
- last_check = '{} - {}'.format(last_check, time.strftime('%H:%M.%S', time.gmtime(epoch)))
- screenshot = h.get_domain_random_screenshot(l_pastes)
- if screenshot:
- screenshot = screenshot[0]
- else:
- screenshot = 'None'
-
- domain_tags = h.get_domain_tags()
-
- origin_paste_name = h.get_origin_paste_name()
- origin_paste_tags = unpack_paste_tags(r_serv_metadata.smembers('tag:{}'.format(origin_paste)))
- paste_tags = []
- for path in l_pastes:
- p_tags = r_serv_metadata.smembers('tag:'+path)
- paste_tags.append(unpack_paste_tags(p_tags))
-
- domain_history = h.extract_epoch_from_history(h.get_domain_crawled_history())
-
- return render_template("showDomain.html", domain=domain, last_check=last_check, first_seen=first_seen,
- l_pastes=l_pastes, paste_tags=paste_tags, bootstrap_label=bootstrap_label,
- dict_links=dict_links, port=port, epoch=epoch,
- ports=ports, domain_history=domain_history,
- origin_paste_tags=origin_paste_tags, status=status,
- origin_paste=origin_paste, origin_paste_name=origin_paste_name,
- domain_tags=domain_tags, screenshot=screenshot)
-
@hiddenServices.route("/crawlers/download_domain", methods=['GET'])
@login_required
@login_analyst
diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html
index bafe3ecf..f793b3a9 100644
--- a/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html
+++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html
@@ -67,7 +67,7 @@
title="{{metadata_domain['domain']}}"
data-content="port: {{metadata_domain['port']}}
epoch: {{metadata_domain['epoch']}}">
-
Crawled Pastes | +|
---|---|
-
- {{ dict_links[path] }}
+
+ {{ item["link"] }}
- {% for tag in paste_tags[loop.index0] %}
-
- {{ tag[0] }}
+ {% for tag in item["tags"] %}
+
+ {{ tag["min_tag"] }}
{% endfor %}
|
+ + {%if item["screenshot"]%} + + {%endif%} + |
- + |
+
{{dict_domain["domain"]}}
- {% if epoch_item[2] %}
+ {% if dom_history["status"] %}
UP
{% else %}
DOWN
{% endif %}
- {{ epoch_item[1] }}
+ {{ dom_history["date"] }}
|