diff --git a/bin/packages/HiddenServices.py b/bin/packages/HiddenServices.py index 911285f8..74e66cf9 100755 --- a/bin/packages/HiddenServices.py +++ b/bin/packages/HiddenServices.py @@ -63,7 +63,7 @@ class HiddenServices(object): self.type = type self.tags = {} - if type == 'onion': + if type == 'onion' or type == 'regular': self.paste_directory = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) self.paste_crawled_directory = os.path.join(self.paste_directory, cfg.get("Directories", "crawled")) self.paste_crawled_directory_name = cfg.get("Directories", "crawled") @@ -75,6 +75,10 @@ class HiddenServices(object): ## TODO: # FIXME: add error pass + def remove_absolute_path_link(self, key, value): + print(key) + print(value) + def get_origin_paste_name(self): origin_item = self.r_serv_onion.hget('onion_metadata:{}'.format(self.domain), 'paste_parent') if origin_item is None: @@ -105,10 +109,35 @@ class HiddenServices(object): for tag in p_tags: self.tags[tag] = self.tags.get(tag, 0) + 1 + def get_first_crawled(self): + res = self.r_serv_onion.zrange('crawler_history_{}:{}'.format(self.type, self.domain), 0, 0, withscores=True) + if res: + res = res[0] + return {'root_item':res[0], 'epoch':res[1]} + else: + return {} + + def get_last_crawled(self): + res = self.r_serv_onion.zrevrange('crawler_history_{}:{}'.format(self.type, self.domain), 0, 0, withscores=True) + if res: + res = res[0] + return {'root_item':res[0], 'epoch':res[1]} + else: + return {} + #todo use the right paste - def get_last_crawled_pastes(self): - paste_root = self.r_serv_onion.zrevrange('crawler_history_{}:{}'.format(self.type, self.domain), 0, 0)[0] - return self.get_all_pastes_domain(paste_root) + def get_last_crawled_pastes(self, epoch=None): + if epoch is None: + list_root = self.r_serv_onion.zrevrange('crawler_history_{}:{}'.format(self.type, self.domain), 0, 0) + else: + list_root = self.r_serv_onion.zrevrangebyscore('crawler_history_{}:{}'.format(self.type, self.domain), int(epoch), int(epoch)) + if list_root: + return self.get_all_pastes_domain(list_root[0]) + else: + if epoch: + return self.get_last_crawled_pastes() + else: + return list_root def get_all_pastes_domain(self, root_item): if root_item is None: @@ -135,6 +164,27 @@ class HiddenServices(object): l_crawled_pastes.extend(self.get_item_crawled_children(children)) return l_crawled_pastes + def get_item_link(self, item): + link = self.r_serv_metadata.hget('paste_metadata:{}'.format(item), 'real_link') + if link is None: + if self.paste_directory in item: + self.r_serv_metadata.hget('paste_metadata:{}'.format(item.replace(self.paste_directory+'/', '')), 'real_link') + else: + key = os.path.join(self.paste_directory, item) + link = self.r_serv_metadata.hget('paste_metadata:{}'.format(key), 'real_link') + if link: + self.remove_absolute_path_link(key, link) + + return link + + def get_all_links(self, l_items): + dict_links = {} + for item in l_items: + link = self.get_item_link(item) + if link: + dict_links[item] = link + return dict_links + # experimental def get_domain_son(self, l_paste): if l_paste is None: @@ -177,11 +227,12 @@ class HiddenServices(object): l_screenshot_paste = [] for paste in l_crawled_pastes: ## FIXME: # TODO: remove me + origin_paste = paste paste= paste.replace(self.paste_directory+'/', '') paste = paste.replace(self.paste_crawled_directory_name, '') if os.path.isfile( '{}{}.png'.format(self.screenshot_directory, paste) ): - l_screenshot_paste.append(paste[1:]) + l_screenshot_paste.append({'screenshot': paste[1:], 'item': origin_paste}) if len(l_screenshot_paste) > num_screenshot: l_random_screenshot = [] diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index 8bce6ccb..5bd7b6fa 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -8,6 +8,7 @@ import redis import datetime import sys import os +import time import json from pyfaup.faup import Faup from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for @@ -95,6 +96,16 @@ def get_domain_type(domain): else: return 'regular' +def get_type_domain(domain): + if domain is None: + type = 'regular' + else: + if domain.rsplit('.', 1)[1] == 'onion': + type = 'onion' + else: + type = 'regular' + return type + def get_last_domains_crawled(type): return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1) @@ -560,25 +571,31 @@ def show_domains_by_daterange(): date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags, bootstrap_label=bootstrap_label) -@hiddenServices.route("/hiddenServices/onion_domain", methods=['GET']) -def onion_domain(): - onion_domain = request.args.get('onion_domain') - if onion_domain is None or not r_serv_onion.exists('onion_metadata:{}'.format(onion_domain)): +@hiddenServices.route("/hiddenServices/show_domain", methods=['GET']) +def show_domain(): + domain = request.args.get('domain') + epoch = request.args.get('epoch') + type = get_type_domain(domain) + if domain is None or not r_serv_onion.exists('{}_metadata:{}'.format(type, domain)): return '404' # # TODO: FIXME return 404 - last_check = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'last_check') + last_check = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'last_check') if last_check is None: last_check = '********' last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8]) - first_seen = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'first_seen') + first_seen = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'first_seen') if first_seen is None: first_seen = '********' first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8]) - origin_paste = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'paste_parent') + origin_paste = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'paste_parent') - h = HiddenServices(onion_domain, 'onion') - l_pastes = h.get_last_crawled_pastes() + h = HiddenServices(domain, type) + last_crawled_time = h.get_last_crawled() + if epoch in last_crawled_time: + last_check = '{} - {}'.format(last_check, time.strftime('%H:%M.%S', time.gmtime(last_crawled_time['epoch']))) + l_pastes = h.get_last_crawled_pastes(epoch=epoch) + dict_links = h.get_all_links(l_pastes) if l_pastes: status = True else: @@ -600,8 +617,9 @@ def onion_domain(): p_tags = r_serv_metadata.smembers('tag:'+path) paste_tags.append(unpack_paste_tags(p_tags)) - return render_template("showDomain.html", domain=onion_domain, last_check=last_check, first_seen=first_seen, + return render_template("showDomain.html", domain=domain, last_check=last_check, first_seen=first_seen, l_pastes=l_pastes, paste_tags=paste_tags, bootstrap_label=bootstrap_label, + dict_links=dict_links, path_name=path_name, origin_paste_tags=origin_paste_tags, status=status, origin_paste=origin_paste, origin_paste_name=origin_paste_name, domain_tags=domain_tags, screenshot=screenshot) diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html index 621b30aa..5e84e7c3 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html +++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html @@ -62,7 +62,7 @@
{% for metadata_domain in last_domains %}First Seen | -Last Check | -
---|---|
{{ first_seen }} | -{{ last_check }} | -
First Seen | +Last Check | +
---|---|
{{ first_seen }} | +{{ last_check }} | +
Crawled Pastes | -
---|
{{ path_name[loop.index0] }}
-
- {% for tag in paste_tags[loop.index0] %}
-
- {{ tag[0] }}
-
- {% endfor %}
-
- |
-
Crawled Pastes | +
---|
+
+ {{ dict_links[path] }}
+
+
+ {% for tag in paste_tags[loop.index0] %}
+
+ {{ tag[0] }}
+
+ {% endfor %}
+
+ |
+