mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-27 00:07:16 +00:00
chg: [Domain crawled] add random screenshot
This commit is contained in:
parent
ced0b1e350
commit
d42dd118a4
3 changed files with 68 additions and 11 deletions
|
@ -19,6 +19,7 @@ Conditions to fulfill to be able to use this class correctly:
|
||||||
import os
|
import os
|
||||||
import gzip
|
import gzip
|
||||||
import redis
|
import redis
|
||||||
|
import random
|
||||||
|
|
||||||
import configparser
|
import configparser
|
||||||
import sys
|
import sys
|
||||||
|
@ -52,11 +53,19 @@ class HiddenServices(object):
|
||||||
db=cfg.getint("ARDB_Onion", "db"),
|
db=cfg.getint("ARDB_Onion", "db"),
|
||||||
decode_responses=True)
|
decode_responses=True)
|
||||||
|
|
||||||
|
self.r_serv_metadata = redis.StrictRedis(
|
||||||
|
host=cfg.get("ARDB_Metadata", "host"),
|
||||||
|
port=cfg.getint("ARDB_Metadata", "port"),
|
||||||
|
db=cfg.getint("ARDB_Metadata", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
self.domain = domain
|
self.domain = domain
|
||||||
self.type = type
|
self.type = type
|
||||||
|
|
||||||
if type == 'onion':
|
if type == 'onion':
|
||||||
self.paste_directory = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes"), cfg.get("Directories", "crawled"))
|
self.paste_directory = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes"))
|
||||||
|
self.paste_crawled_directory = os.path.join(self.paste_directory, cfg.get("Directories", "crawled"))
|
||||||
|
self.paste_crawled_directory_name = cfg.get("Directories", "crawled")
|
||||||
self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot"))
|
self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot"))
|
||||||
elif type == 'i2p':
|
elif type == 'i2p':
|
||||||
self.paste_directory = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot"))
|
self.paste_directory = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot"))
|
||||||
|
@ -65,15 +74,57 @@ class HiddenServices(object):
|
||||||
## TODO: # FIXME: add error
|
## TODO: # FIXME: add error
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
#todo use the right paste
|
||||||
def get_last_crawled_pastes(self):
|
def get_last_crawled_pastes(self):
|
||||||
|
paste_parent = self.r_serv_onion.hget('onion_metadata:{}'.format(self.domain), 'paste_parent')
|
||||||
|
#paste_parent = paste_parent.replace(self.paste_directory, '')[1:]
|
||||||
|
return self.get_all_pastes_domain(paste_parent)
|
||||||
|
|
||||||
last_check = self.r_serv_onion.hget('onion_metadata:{}'.format(self.domain), 'last_check')
|
def get_all_pastes_domain(self, father):
|
||||||
return self.get_crawled_pastes_by_date(last_check)
|
l_crawled_pastes = []
|
||||||
|
paste_parent = father.replace(self.paste_directory, '')[1:]
|
||||||
|
paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(paste_parent))
|
||||||
|
## TODO: # FIXME: remove me
|
||||||
|
if not paste_childrens:
|
||||||
|
paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(father))
|
||||||
|
for children in paste_childrens:
|
||||||
|
if self.domain in children:
|
||||||
|
l_crawled_pastes.append(children)
|
||||||
|
l_crawled_pastes.extend(self.get_all_pastes_domain(children))
|
||||||
|
return l_crawled_pastes
|
||||||
|
|
||||||
|
def get_domain_random_screenshot(self, l_crawled_pastes, num_screenshot = 1):
|
||||||
|
l_screenshot_paste = []
|
||||||
|
for paste in l_crawled_pastes:
|
||||||
|
## FIXME: # TODO: remove me
|
||||||
|
paste= paste.replace(self.paste_directory, '')[1:]
|
||||||
|
|
||||||
|
paste = paste.replace(self.paste_crawled_directory_name, '')
|
||||||
|
if os.path.isfile( '{}{}.png'.format(self.screenshot_directory, paste) ):
|
||||||
|
l_screenshot_paste.append(paste[1:])
|
||||||
|
|
||||||
|
if len(l_screenshot_paste) > num_screenshot:
|
||||||
|
l_random_screenshot = []
|
||||||
|
for index in random.sample( range(0, len(l_screenshot_paste)), num_screenshot ):
|
||||||
|
l_random_screenshot.append(l_screenshot_paste[index])
|
||||||
|
return l_random_screenshot
|
||||||
|
else:
|
||||||
|
return l_screenshot_paste
|
||||||
|
|
||||||
def get_crawled_pastes_by_date(self, date):
|
def get_crawled_pastes_by_date(self, date):
|
||||||
pastes_path = os.path.join(self.paste_directory, date[0:4], date[4:6], date[6:8])
|
|
||||||
l_crawled_pastes = [f for f in os.listdir(pastes_path) if self.domain in f]
|
pastes_path = os.path.join(self.paste_crawled_directory, date[0:4], date[4:6], date[6:8])
|
||||||
print(len(l_crawled_pastes))
|
paste_parent = self.r_serv_onion.hget('onion_metadata:{}'.format(self.domain), 'last_check')
|
||||||
print(l_crawled_pastes)
|
|
||||||
|
l_crawled_pastes = []
|
||||||
|
return l_crawled_pastes
|
||||||
|
|
||||||
|
def get_last_crawled_pastes_fileSearch(self):
|
||||||
|
|
||||||
|
last_check = self.r_serv_onion.hget('onion_metadata:{}'.format(self.domain), 'last_check')
|
||||||
|
return self.get_crawled_pastes_by_date_fileSearch(last_check)
|
||||||
|
|
||||||
|
def get_crawled_pastes_by_date_fileSearch(self, date):
|
||||||
|
pastes_path = os.path.join(self.paste_crawled_directory, date[0:4], date[4:6], date[6:8])
|
||||||
|
l_crawled_pastes = [f for f in os.listdir(pastes_path) if self.domain in f]
|
||||||
return l_crawled_pastes
|
return l_crawled_pastes
|
||||||
|
|
|
@ -6,10 +6,12 @@
|
||||||
'''
|
'''
|
||||||
import redis
|
import redis
|
||||||
import datetime
|
import datetime
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
from flask import Flask, render_template, jsonify, request, Blueprint
|
from flask import Flask, render_template, jsonify, request, Blueprint
|
||||||
|
|
||||||
import HiddenServices
|
|
||||||
from Date import Date
|
from Date import Date
|
||||||
|
from HiddenServices import HiddenServices
|
||||||
|
|
||||||
# ============ VARIABLES ============
|
# ============ VARIABLES ============
|
||||||
import Flask_config
|
import Flask_config
|
||||||
|
@ -75,8 +77,12 @@ def onion_domain():
|
||||||
domain_paste = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'paste_parent')
|
domain_paste = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'paste_parent')
|
||||||
date_crawled = r_serv_onion.smembers('onion_history:{}'.format(onion_domain))
|
date_crawled = r_serv_onion.smembers('onion_history:{}'.format(onion_domain))
|
||||||
|
|
||||||
|
h = HiddenServices(onion_domain, 'onion')
|
||||||
|
l_pastes = h.get_last_crawled_pastes()
|
||||||
|
screenshot = h.get_domain_random_screenshot(l_pastes)[0]
|
||||||
|
|
||||||
return render_template("showDomain.html", domain=onion_domain, last_check=last_check, first_seen=first_seen,
|
return render_template("showDomain.html", domain=onion_domain, last_check=last_check, first_seen=first_seen,
|
||||||
domain_paste=domain_paste)
|
domain_paste=domain_paste, screenshot=screenshot)
|
||||||
|
|
||||||
# ============= JSON ==============
|
# ============= JSON ==============
|
||||||
@hiddenServices.route("/hiddenServices/domain_crawled_7days_json", methods=['GET'])
|
@hiddenServices.route("/hiddenServices/domain_crawled_7days_json", methods=['GET'])
|
||||||
|
|
|
@ -62,7 +62,7 @@
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="col-md-7">
|
<div class="col-md-7">
|
||||||
<img src="{{ url_for('showsavedpastes.screenshot', filename='ds') }}" onError="this.onerror=null;this.src='{{ url_for('static', filename='image/AIL.png') }}';" style="width:100%;" />
|
<img src="{{ url_for('showsavedpastes.screenshot', filename=screenshot) }}" onError="this.onerror=null;this.src='{{ url_for('static', filename='image/AIL.png') }}';" style="width:100%;" />
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
Loading…
Reference in a new issue