mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
chg: [crawler - cookies] add/show/select cookies
This commit is contained in:
parent
1c45571042
commit
d87ecff4a0
11 changed files with 334 additions and 238 deletions
|
@ -1,205 +0,0 @@
|
||||||
#!/usr/bin/python3
|
|
||||||
|
|
||||||
"""
|
|
||||||
API Helper
|
|
||||||
===================
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
import base64
|
|
||||||
import gzip
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import redis
|
|
||||||
import sys
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
|
||||||
import ConfigLoader
|
|
||||||
|
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
|
||||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
||||||
config_loader = None
|
|
||||||
|
|
||||||
# # # # # # # #
|
|
||||||
# #
|
|
||||||
# COOKIES #
|
|
||||||
# #
|
|
||||||
# # # # # # # #
|
|
||||||
|
|
||||||
# # # #
|
|
||||||
# Cookies Fields:
|
|
||||||
# - name
|
|
||||||
# - value
|
|
||||||
# - path (optional)
|
|
||||||
# - domain (optional)
|
|
||||||
# - secure (optional)
|
|
||||||
# - httpOnly (optional)
|
|
||||||
# # # #
|
|
||||||
def create_cookie_dict(browser_cookie=[], cookie_name=None, cookie_value=None, domain=None, crawler_type='regular'):
|
|
||||||
# UI created
|
|
||||||
if cookie_name and cookie_value and domain:
|
|
||||||
dict_cookie = create_cookie_dict_from_input(cookie_name, cookie_value, domain)
|
|
||||||
# Cookies imported from the browser
|
|
||||||
else:
|
|
||||||
dict_cookie = create_cookie_dict_from_browser(browser_cookie)
|
|
||||||
|
|
||||||
# tor browser: disable secure cookie
|
|
||||||
if crawler_type=='onion':
|
|
||||||
dict_cookie['secure'] = False
|
|
||||||
|
|
||||||
dict_cookie['expires'] = (datetime.now() + timedelta(days=10)).strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
|
|
||||||
return dict_cookie
|
|
||||||
|
|
||||||
def create_cookie_dict_from_input(cookie_name, cookie_value, cookie_domain):
|
|
||||||
# WebKit use domain for cookie validation
|
|
||||||
return {'name': cookie_name, 'value': cookie_value, 'domain': '.{}'.format(cookie_domain)}
|
|
||||||
|
|
||||||
# # TODO: handle prefix cookies
|
|
||||||
# # TODO: fill empty fields
|
|
||||||
def create_cookie_dict_from_browser(browser_cookie):
|
|
||||||
url = urlparse(browser_cookie['Host raw'])
|
|
||||||
domain = url.netloc.split(':', 1)[0]
|
|
||||||
dict_cookie = {'path': browser_cookie['Path raw'],
|
|
||||||
'name': browser_cookie['Name raw'],
|
|
||||||
'httpOnly': browser_cookie['HTTP only raw'] == 'true',
|
|
||||||
'secure': browser_cookie['Send for'] == 'Encrypted connections only',
|
|
||||||
'domain': domain,
|
|
||||||
'value': browser_cookie['Content raw']
|
|
||||||
}
|
|
||||||
return dict_cookie
|
|
||||||
|
|
||||||
def load_cookies(cookies_uuid, domain=None, crawler_type='regular'):
|
|
||||||
cookies_json, l_cookies = get_cookies(cookies_uuid)
|
|
||||||
all_cookies = []
|
|
||||||
for cookie_dict in cookies_json:
|
|
||||||
all_cookies.append(create_cookie_dict(browser_cookie=cookie_dict, crawler_type=crawler_type))
|
|
||||||
for cookie_name, cookie_value in l_cookies:
|
|
||||||
all_cookies.append(create_cookie_dict( cookie_name=cookie_name, cookie_value=cookie_value, domain=domain, crawler_type=crawler_type))
|
|
||||||
return all_cookies
|
|
||||||
|
|
||||||
def get_all_cookies():
|
|
||||||
r_serv_onion.smembers('cookies:all')
|
|
||||||
|
|
||||||
def get_all_global_cookies():
|
|
||||||
r_serv_onion.smembers('cookies:global')
|
|
||||||
|
|
||||||
def get_user_cookies(user_id):
|
|
||||||
r_serv_onion.smembers('cookies:user:{}'.format(user_id))
|
|
||||||
|
|
||||||
def exist_cookies_uuid(cookies_uuid):
|
|
||||||
return r_serv_onion.exists('cookie_metadata:{}'.format(cookies_uuid))
|
|
||||||
|
|
||||||
def get_manual_cookies_keys(cookies_uuid):
|
|
||||||
return r_serv_onion.hgetall('cookies:manual_cookies:{}'.format(cookies_uuid))
|
|
||||||
|
|
||||||
def get_manual_cookie_val(cookies_uuid, cookie_name):
|
|
||||||
return r_serv_onion.hget('cookies:manual_cookies:{}'.format(cookies_uuid), cookie_name)
|
|
||||||
|
|
||||||
def get_cookies(cookies_uuid):
|
|
||||||
cookies_json = r_serv_onion.get('cookies:json_cookies:{}'.format(cookies_uuid))
|
|
||||||
if cookies_json:
|
|
||||||
cookies_json = json.loads(cookies_json)
|
|
||||||
else:
|
|
||||||
cookies_json = []
|
|
||||||
l_cookies = [ ( cookie_name, get_manual_cookie_val(cookies_uuid, cookie_name)) for cookie_name in get_manual_cookies_keys(cookies_uuid) ]
|
|
||||||
return (cookies_json, l_cookies)
|
|
||||||
|
|
||||||
# # TODO: handle errors + add api handler
|
|
||||||
def save_cookies(user_id, json_cookies=None, l_cookies=[], cookies_uuid=None, level=1, description=None):
|
|
||||||
if cookies_uuid is None or not exist_cookies_uuid(cookies_uuid):
|
|
||||||
cookies_uuid = str(uuid.uuid4())
|
|
||||||
|
|
||||||
if json_cookies:
|
|
||||||
json_cookies = json.loads(json_cookies) # # TODO: catch Exception
|
|
||||||
r_serv_onion.set('cookies:json_cookies:{}'.format(cookies_uuid), json.dumps(json_cookies))
|
|
||||||
|
|
||||||
for cookie_dict in l_cookies:
|
|
||||||
r_serv_onion.hset('cookies:manual_cookies:{}'.format(cookies_uuid), cookie_dict['name'], cookie_dict['value'])
|
|
||||||
|
|
||||||
# cookies level # # TODO: edit level set on edit
|
|
||||||
r_serv_onion.sadd('cookies:all', cookies_uuid)
|
|
||||||
if level==0:
|
|
||||||
r_serv_onion.sadd('cookies:user:{}'.format(user_id), cookies_uuid)
|
|
||||||
else:
|
|
||||||
r_serv_onion.sadd('cookies:global', cookies_uuid)
|
|
||||||
|
|
||||||
# metadata
|
|
||||||
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'user_id', user_id)
|
|
||||||
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'level', level)
|
|
||||||
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'description', description)
|
|
||||||
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'date', datetime.date.today().strftime("%Y%m%d"))
|
|
||||||
return cookies_uuid
|
|
||||||
|
|
||||||
#### ####
|
|
||||||
|
|
||||||
def is_redirection(domain, last_url):
|
|
||||||
url = urlparse(last_url)
|
|
||||||
last_domain = url.netloc
|
|
||||||
last_domain = last_domain.split('.')
|
|
||||||
last_domain = '{}.{}'.format(last_domain[-2], last_domain[-1])
|
|
||||||
return domain != last_domain
|
|
||||||
|
|
||||||
# domain up
|
|
||||||
def create_domain_metadata(domain_type, domain, current_port, date, date_month):
|
|
||||||
# Add to global set
|
|
||||||
r_serv_onion.sadd('{}_up:{}'.format(domain_type, date), domain)
|
|
||||||
r_serv_onion.sadd('full_{}_up'.format(domain_type), domain)
|
|
||||||
r_serv_onion.sadd('month_{}_up:{}'.format(domain_type, date_month), domain)
|
|
||||||
|
|
||||||
# create onion metadata
|
|
||||||
if not r_serv_onion.exists('{}_metadata:{}'.format(domain_type, domain)):
|
|
||||||
r_serv_onion.hset('{}_metadata:{}'.format(domain_type, domain), 'first_seen', date)
|
|
||||||
r_serv_onion.hset('{}_metadata:{}'.format(domain_type, domain), 'last_check', date)
|
|
||||||
|
|
||||||
# Update domain port number
|
|
||||||
all_domain_ports = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'ports')
|
|
||||||
if all_domain_ports:
|
|
||||||
all_domain_ports = all_domain_ports.split(';')
|
|
||||||
else:
|
|
||||||
all_domain_ports = []
|
|
||||||
if current_port not in all_domain_ports:
|
|
||||||
all_domain_ports.append(current_port)
|
|
||||||
r_serv_onion.hset('{}_metadata:{}'.format(domain_type, domain), 'ports', ';'.join(all_domain_ports))
|
|
||||||
|
|
||||||
# add root_item to history
|
|
||||||
def add_domain_root_item(root_item, domain_type, domain, epoch_date, port):
|
|
||||||
# Create/Update crawler history
|
|
||||||
r_serv_onion.zadd('crawler_history_{}:{}:{}'.format(domain_type, domain, port), epoch_date, root_item)
|
|
||||||
|
|
||||||
def create_item_metadata(item_id, domain, url, port, item_father):
|
|
||||||
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', item_father)
|
|
||||||
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'domain', '{}:{}'.format(domain, port))
|
|
||||||
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'real_link', url)
|
|
||||||
# add this item_id to his father
|
|
||||||
r_serv_metadata.sadd('paste_children:{}'.format(item_father), item_id)
|
|
||||||
|
|
||||||
def create_item_id(item_dir, domain):
|
|
||||||
if len(domain) > 215:
|
|
||||||
UUID = domain[-215:]+str(uuid.uuid4())
|
|
||||||
else:
|
|
||||||
UUID = domain+str(uuid.uuid4())
|
|
||||||
return os.path.join(item_dir, UUID)
|
|
||||||
|
|
||||||
def save_crawled_item(item_id, item_content):
|
|
||||||
try:
|
|
||||||
gzipencoded = gzip.compress(item_content.encode())
|
|
||||||
gzip64encoded = base64.standard_b64encode(gzipencoded).decode()
|
|
||||||
return gzip64encoded
|
|
||||||
except:
|
|
||||||
print("file error: {}".format(item_id))
|
|
||||||
return False
|
|
||||||
|
|
||||||
def save_har(har_dir, item_id, har_content):
|
|
||||||
if not os.path.exists(har_dir):
|
|
||||||
os.makedirs(har_dir)
|
|
||||||
item_id = item_id.split('/')[-1]
|
|
||||||
filename = os.path.join(har_dir, item_id + '.json')
|
|
||||||
with open(filename, 'w') as f:
|
|
||||||
f.write(json.dumps(har_content))
|
|
|
@ -28,7 +28,7 @@ from Helper import Process
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||||
#import ConfigLoader
|
#import ConfigLoader
|
||||||
import Screenshot
|
import Screenshot
|
||||||
import crawler_splash
|
import crawlers
|
||||||
|
|
||||||
script_cookie = """
|
script_cookie = """
|
||||||
function main(splash, args)
|
function main(splash, args)
|
||||||
|
@ -176,18 +176,18 @@ class TorSplashCrawler():
|
||||||
# detect connection to proxy refused
|
# detect connection to proxy refused
|
||||||
error_log = (json.loads(response.body.decode()))
|
error_log = (json.loads(response.body.decode()))
|
||||||
print(error_log)
|
print(error_log)
|
||||||
elif crawler_splash.is_redirection(self.domains[0], response.data['last_url']):
|
elif crawlers.is_redirection(self.domains[0], response.data['last_url']):
|
||||||
pass # ignore response
|
pass # ignore response
|
||||||
else:
|
else:
|
||||||
|
|
||||||
item_id = crawler_splash.create_item_id(self.item_dir, self.domains[0])
|
item_id = crawlers.create_item_id(self.item_dir, self.domains[0])
|
||||||
self.save_crawled_item(item_id, response.data['html'])
|
self.save_crawled_item(item_id, response.data['html'])
|
||||||
crawler_splash.create_item_metadata(item_id, self.domains[0], response.data['last_url'], self.port, response.meta['father'])
|
crawlers.create_item_metadata(item_id, self.domains[0], response.data['last_url'], self.port, response.meta['father'])
|
||||||
|
|
||||||
if self.root_key is None:
|
if self.root_key is None:
|
||||||
self.root_key = item_id
|
self.root_key = item_id
|
||||||
crawler_splash.add_domain_root_item(item_id, self.domain_type, self.domains[0], self.date_epoch, self.port)
|
crawlers.add_domain_root_item(item_id, self.domain_type, self.domains[0], self.date_epoch, self.port)
|
||||||
crawler_splash.create_domain_metadata(self.domain_type, self.domains[0], self.port, self.full_date, self.date_month)
|
crawlers.create_domain_metadata(self.domain_type, self.domains[0], self.port, self.full_date, self.date_month)
|
||||||
|
|
||||||
if 'cookies' in response.data:
|
if 'cookies' in response.data:
|
||||||
all_cookies = response.data['cookies']
|
all_cookies = response.data['cookies']
|
||||||
|
@ -202,7 +202,7 @@ class TorSplashCrawler():
|
||||||
Screenshot.save_domain_relationship(sha256_string, self.domains[0])
|
Screenshot.save_domain_relationship(sha256_string, self.domains[0])
|
||||||
# HAR
|
# HAR
|
||||||
if 'har' in response.data:
|
if 'har' in response.data:
|
||||||
crawler_splash.save_har(self.har_dir, item_id, response.data['har'])
|
crawlers.save_har(self.har_dir, item_id, response.data['har'])
|
||||||
|
|
||||||
le = LinkExtractor(allow_domains=self.domains, unique=True)
|
le = LinkExtractor(allow_domains=self.domains, unique=True)
|
||||||
for link in le.extract_links(response):
|
for link in le.extract_links(response):
|
||||||
|
@ -247,7 +247,7 @@ class TorSplashCrawler():
|
||||||
print(failure.type)
|
print(failure.type)
|
||||||
|
|
||||||
def save_crawled_item(self, item_id, item_content):
|
def save_crawled_item(self, item_id, item_content):
|
||||||
gzip64encoded = crawler_splash.save_crawled_item(item_id, item_content)
|
gzip64encoded = crawlers.save_crawled_item(item_id, item_content)
|
||||||
|
|
||||||
# Send item to queue
|
# Send item to queue
|
||||||
# send paste to Global
|
# send paste to Global
|
||||||
|
|
|
@ -9,7 +9,7 @@ from TorSplashCrawler import TorSplashCrawler
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||||
import ConfigLoader
|
import ConfigLoader
|
||||||
import crawler_splash
|
import crawlers
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ if __name__ == '__main__':
|
||||||
crawler_options = crawler_json['crawler_options']
|
crawler_options = crawler_json['crawler_options']
|
||||||
date = crawler_json['date']
|
date = crawler_json['date']
|
||||||
requested_mode = crawler_json['requested']
|
requested_mode = crawler_json['requested']
|
||||||
cookies = crawler_splash.load_cookies('ccad0090-bdcb-4ba5-875b-3dae8f936216', domain, crawler_type=service_type)
|
cookies = crawlers.load_cookies('ccad0090-bdcb-4ba5-875b-3dae8f936216', domain, crawler_type=service_type)
|
||||||
|
|
||||||
redis_cache.delete('crawler_request:{}'.format(uuid))
|
redis_cache.delete('crawler_request:{}'.format(uuid))
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ import sys
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
|
|
||||||
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response
|
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, make_response
|
||||||
from flask_login import login_required, current_user, login_user, logout_user
|
from flask_login import login_required, current_user, login_user, logout_user
|
||||||
|
|
||||||
sys.path.append('modules')
|
sys.path.append('modules')
|
||||||
|
@ -25,7 +25,7 @@ import Tag
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||||
import Domain
|
import Domain
|
||||||
import crawler_splash
|
import crawlers
|
||||||
|
|
||||||
r_cache = Flask_config.r_cache
|
r_cache = Flask_config.r_cache
|
||||||
r_serv_db = Flask_config.r_serv_db
|
r_serv_db = Flask_config.r_serv_db
|
||||||
|
@ -44,7 +44,19 @@ def api_validator(api_response):
|
||||||
if api_response:
|
if api_response:
|
||||||
return Response(json.dumps(api_response[0], indent=2, sort_keys=True), mimetype='application/json'), api_response[1]
|
return Response(json.dumps(api_response[0], indent=2, sort_keys=True), mimetype='application/json'), api_response[1]
|
||||||
|
|
||||||
|
def create_json_response(data, status_code):
|
||||||
|
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
|
||||||
|
|
||||||
# ============= ROUTES ==============
|
# ============= ROUTES ==============
|
||||||
|
@crawler_splash.route("/crawlers/manual", methods=['GET'])
|
||||||
|
#@login_required
|
||||||
|
#@login_read_only
|
||||||
|
def manual():
|
||||||
|
user_id = current_user.get_id()
|
||||||
|
l_cookies = crawlers.api_get_cookies_list(user_id)
|
||||||
|
return render_template("crawler_manual.html", crawler_enabled=True, l_cookies=l_cookies)
|
||||||
|
|
||||||
|
|
||||||
# add route : /crawlers/show_domain
|
# add route : /crawlers/show_domain
|
||||||
@crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST'])
|
@crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST'])
|
||||||
@login_required
|
@login_required
|
||||||
|
@ -194,18 +206,30 @@ def crawler_cookies_add_post():
|
||||||
l_manual_cookie.append(cookie_dict)
|
l_manual_cookie.append(cookie_dict)
|
||||||
elif l_input[1]: # cookie_value
|
elif l_input[1]: # cookie_value
|
||||||
l_invalid_cookie.append({'name': '', 'value': l_input[1]})
|
l_invalid_cookie.append({'name': '', 'value': l_input[1]})
|
||||||
else:
|
if l_invalid_cookie:
|
||||||
#print(l_input)
|
return create_json_response({'error': 'invalid cookie', 'invalid fileds': l_invalid_cookie}, 400)
|
||||||
pass
|
|
||||||
|
|
||||||
cookie_uuid = crawler_splash.save_cookies(user_id, json_cookies=json_file, l_cookies=l_manual_cookie, level=level, description=description)
|
cookies_uuid = crawler_splash.save_cookies(user_id, json_cookies=json_file, l_cookies=l_manual_cookie, level=level, description=description)
|
||||||
return render_template("add_cookies.html")
|
return redirect(url_for('crawler_splash.crawler_cookies_all', cookies_uuid=cookies_uuid))
|
||||||
|
|
||||||
@crawler_splash.route('/crawler/cookies/all', methods=['GET'])
|
@crawler_splash.route('/crawler/cookies/all', methods=['GET'])
|
||||||
#@login_required
|
#@login_required
|
||||||
#@login_read_only
|
#@login_read_only
|
||||||
def crawler_cookies_all():
|
def crawler_cookies_all():
|
||||||
user_id = current_user.get_id(user_id)
|
user_id = current_user.get_id()
|
||||||
user_cookies = crawler_splash.get_user_cookies(user_id)
|
user_cookies = crawlers.get_all_user_cookies_metadata(user_id)
|
||||||
global_cookies = crawler_splash.get_all_global_cookies()
|
global_cookies = crawlers.get_all_global_cookies_metadata()
|
||||||
return render_template("add_cookies.html", user_cookies=user_cookies, global_cookies=global_cookies)
|
return render_template("all_cookies.html", user_cookies=user_cookies, global_cookies=global_cookies)
|
||||||
|
|
||||||
|
@crawler_splash.route('/crawler/cookies/show', methods=['GET'])
|
||||||
|
#@login_required
|
||||||
|
#@login_read_only
|
||||||
|
def crawler_cookies_show():
|
||||||
|
user_id = current_user.get_id()
|
||||||
|
cookies_uuid = request.args.get('cookies_uuid')
|
||||||
|
res = crawlers.api_get_cookies(cookies_uuid, user_id)
|
||||||
|
if res[1] !=200:
|
||||||
|
return create_json_response(res[0], res[1])
|
||||||
|
cookies_json = json.dumps(res[0]['json_cookies'], indent=4, sort_keys=True)
|
||||||
|
cookie_metadata = crawlers.get_cookies_metadata(cookies_uuid)
|
||||||
|
return render_template("edit_cookies.html", cookie_metadata=cookie_metadata, cookies_json=cookies_json, manual_cookies=res[0]['manual_cookies'])
|
||||||
|
|
|
@ -30,6 +30,9 @@ r_serv_metadata = Flask_config.r_serv_metadata
|
||||||
crawler_enabled = Flask_config.crawler_enabled
|
crawler_enabled = Flask_config.crawler_enabled
|
||||||
bootstrap_label = Flask_config.bootstrap_label
|
bootstrap_label = Flask_config.bootstrap_label
|
||||||
|
|
||||||
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||||
|
import crawlers
|
||||||
|
|
||||||
hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates')
|
hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates')
|
||||||
|
|
||||||
faup = Faup()
|
faup = Faup()
|
||||||
|
@ -257,12 +260,6 @@ def dashboard():
|
||||||
crawler_metadata_regular=crawler_metadata_regular,
|
crawler_metadata_regular=crawler_metadata_regular,
|
||||||
statDomains_onion=statDomains_onion, statDomains_regular=statDomains_regular)
|
statDomains_onion=statDomains_onion, statDomains_regular=statDomains_regular)
|
||||||
|
|
||||||
@hiddenServices.route("/crawlers/manual", methods=['GET'])
|
|
||||||
@login_required
|
|
||||||
@login_read_only
|
|
||||||
def manual():
|
|
||||||
return render_template("Crawler_Splash_manual.html", crawler_enabled=crawler_enabled)
|
|
||||||
|
|
||||||
@hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET'])
|
@hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET'])
|
||||||
@login_required
|
@login_required
|
||||||
@login_read_only
|
@login_read_only
|
||||||
|
@ -475,7 +472,7 @@ def create_spider_splash():
|
||||||
create_crawler_config(mode, service_type, crawler_config, domain, url=url)
|
create_crawler_config(mode, service_type, crawler_config, domain, url=url)
|
||||||
send_url_to_crawl_in_queue(mode, service_type, url)
|
send_url_to_crawl_in_queue(mode, service_type, url)
|
||||||
|
|
||||||
return redirect(url_for('hiddenServices.manual'))
|
return redirect(url_for('crawler_splash.manual'))
|
||||||
|
|
||||||
@hiddenServices.route("/crawlers/auto_crawler", methods=['GET'])
|
@hiddenServices.route("/crawlers/auto_crawler", methods=['GET'])
|
||||||
@login_required
|
@login_required
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>AIL-Framework</title>
|
<title>AIL - Add Cookies</title>
|
||||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||||
<!-- Core CSS -->
|
<!-- Core CSS -->
|
||||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||||
|
|
99
var/www/templates/crawler/crawler_splash/all_cookies.html
Normal file
99
var/www/templates/crawler/crawler_splash/all_cookies.html
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
|
||||||
|
<title>AIL - Cookies</title>
|
||||||
|
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
|
||||||
|
|
||||||
|
<!-- Core CSS -->
|
||||||
|
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||||
|
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||||
|
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
|
||||||
|
|
||||||
|
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
{% include 'nav_bar.html' %}
|
||||||
|
|
||||||
|
<div class="container-fluid">
|
||||||
|
<div class="row">
|
||||||
|
|
||||||
|
{% include 'crawler/menu_sidebar.html' %}
|
||||||
|
|
||||||
|
<div class="col-12 col-lg-10" id="core_content">
|
||||||
|
|
||||||
|
<div class="card mb-3 mt-1">
|
||||||
|
<div class="card-header text-white bg-dark">
|
||||||
|
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Your Cookies</h5>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
{% with all_cookies=user_cookies, table_id='table_user'%}
|
||||||
|
{% include 'crawler/crawler_splash/table_cookies.html' %}
|
||||||
|
{% endwith %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card mb-3 mt-1">
|
||||||
|
<div class="card-header text-white bg-dark">
|
||||||
|
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Global Cookies</h5>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
{% with all_cookies=global_cookies, table_id='table_global'%}
|
||||||
|
{% include 'crawler/crawler_splash/table_cookies.html' %}
|
||||||
|
{% endwith %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<a class="btn btn-info my-4" href="{{url_for('crawler_splash.crawler_cookies_add')}}">
|
||||||
|
<i class="fas fa-plus-circle ml-auto"></i>
|
||||||
|
Add Cookies
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
$(document).ready(function(){
|
||||||
|
$("#page-crawler").addClass("active");
|
||||||
|
$("#nav_cookies_all").addClass("active");
|
||||||
|
$("#nav_title_cookies").removeClass("text-muted");
|
||||||
|
|
||||||
|
$('#table_user').DataTable({
|
||||||
|
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
|
||||||
|
"iDisplayLength": 10,
|
||||||
|
"order": [[ 0, "desc" ]]
|
||||||
|
});
|
||||||
|
$('#table_global').DataTable({
|
||||||
|
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
|
||||||
|
"iDisplayLength": 10,
|
||||||
|
"order": [[ 0, "desc" ]]
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
function toggle_sidebar(){
|
||||||
|
if($('#nav_menu').is(':visible')){
|
||||||
|
$('#nav_menu').hide();
|
||||||
|
$('#side_menu').removeClass('border-right')
|
||||||
|
$('#side_menu').removeClass('col-lg-2')
|
||||||
|
$('#core_content').removeClass('col-lg-10')
|
||||||
|
}else{
|
||||||
|
$('#nav_menu').show();
|
||||||
|
$('#side_menu').addClass('border-right')
|
||||||
|
$('#side_menu').addClass('col-lg-2')
|
||||||
|
$('#core_content').addClass('col-lg-10')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -108,8 +108,13 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
<select class="custom-select" name="cookies" id="cookies">
|
||||||
|
<option selected>None</option>
|
||||||
|
{%for cookie in l_cookies%}
|
||||||
|
<option value="{{cookie}}">{{cookie}}</option>
|
||||||
|
{%endfor%}
|
||||||
|
</select>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<button class="btn btn-primary mt-2">
|
<button class="btn btn-primary mt-2">
|
148
var/www/templates/crawler/crawler_splash/edit_cookies.html
Normal file
148
var/www/templates/crawler/crawler_splash/edit_cookies.html
Normal file
|
@ -0,0 +1,148 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
|
||||||
|
<title>AIL - Edit Cookies</title>
|
||||||
|
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
|
||||||
|
|
||||||
|
<!-- Core CSS -->
|
||||||
|
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||||
|
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||||
|
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
|
||||||
|
|
||||||
|
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
{% include 'nav_bar.html' %}
|
||||||
|
|
||||||
|
<div class="container-fluid">
|
||||||
|
<div class="row">
|
||||||
|
|
||||||
|
{% include 'crawler/menu_sidebar.html' %}
|
||||||
|
|
||||||
|
<div class="col-12 col-lg-10" id="core_content">
|
||||||
|
|
||||||
|
<div class="card mb-3 mt-1">
|
||||||
|
<div class="card-header text-white bg-dark">
|
||||||
|
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Edit Cookies</h5>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
{% with all_cookies=[cookie_metadata], table_id='table_metadata'%}
|
||||||
|
{% include 'crawler/crawler_splash/table_cookies.html' %}
|
||||||
|
{% endwith %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card mb-3 mt-1">
|
||||||
|
<div class="card-header text-white bg-dark">
|
||||||
|
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> JSON Cookies</h5>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
<p>
|
||||||
|
<pre class="border">{{cookies_json}}</pre>
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card mb-3 mt-1">
|
||||||
|
<div class="card-header text-white bg-dark">
|
||||||
|
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Manual Cookies</h5>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-5" for="obj_input_cookie_name"><b>Cookie Name</b></div>
|
||||||
|
<div class="col-6" for="obj_input_cookie_value"><b>Cookie Value</b></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="form-horizontal">
|
||||||
|
<div class="form-body">
|
||||||
|
<div class="form-group">
|
||||||
|
<div class="fields">
|
||||||
|
<div class="input-group mb-1">
|
||||||
|
<input type="text" class="form-control col-5" name="first_cookie" id="obj_input_cookie_name">
|
||||||
|
<input type="text" class="form-control col-6" name="first_cookie" id="obj_input_cookie_value">
|
||||||
|
<span class="btn btn-info input-group-addon add-field col-1"><i class="fas fa-plus"></i></span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{%for manual_cookie in manual_cookies%}
|
||||||
|
<div class="input-group mb-1">
|
||||||
|
<input type="text" class="form-control col-5" name="{{manual_cookie}}" value="{{manual_cookie[0]}}">
|
||||||
|
<input type="text" class="form-control col-6" name="{{manual_cookie}}" value="{{manual_cookie[1]}}">
|
||||||
|
<span class="btn btn-danger input-group-addon delete-field col-1"><i class="fas fa-trash-alt"></i></span>
|
||||||
|
</div>
|
||||||
|
{%endfor%}
|
||||||
|
<span class="help-block" hidden>Manual Cookies></span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<br>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
$(document).ready(function(){
|
||||||
|
$("#page-crawler").addClass("active");
|
||||||
|
$("#nav_title_cookies").removeClass("text-muted");
|
||||||
|
|
||||||
|
$('#table_user').DataTable({
|
||||||
|
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
|
||||||
|
"iDisplayLength": 10,
|
||||||
|
"order": [[ 0, "desc" ]]
|
||||||
|
});
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
function toggle_sidebar(){
|
||||||
|
if($('#nav_menu').is(':visible')){
|
||||||
|
$('#nav_menu').hide();
|
||||||
|
$('#side_menu').removeClass('border-right')
|
||||||
|
$('#side_menu').removeClass('col-lg-2')
|
||||||
|
$('#core_content').removeClass('col-lg-10')
|
||||||
|
}else{
|
||||||
|
$('#nav_menu').show();
|
||||||
|
$('#side_menu').addClass('border-right')
|
||||||
|
$('#side_menu').addClass('col-lg-2')
|
||||||
|
$('#core_content').addClass('col-lg-10')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var input_1 = '<div class="input-group mb-1"><input type="text" class="form-control col-5" name="'
|
||||||
|
var input_2 = '"><input type="text" class="form-control col-6" name="'
|
||||||
|
var input_3 = '">';
|
||||||
|
var minusButton = '<span class="btn btn-danger input-group-addon delete-field col-1"><i class="fas fa-trash-alt"></i></span></div>';
|
||||||
|
|
||||||
|
$('.add-field').click(function() {
|
||||||
|
var new_uuid = uuidv4();
|
||||||
|
var template = input_1 + new_uuid + input_2 + new_uuid + input_3;
|
||||||
|
var temp = $(template).insertBefore('.help-block');
|
||||||
|
temp.append(minusButton);
|
||||||
|
});
|
||||||
|
|
||||||
|
$('.fields').on('click', '.delete-field', function(){
|
||||||
|
$(this).parent().remove();
|
||||||
|
});
|
||||||
|
|
||||||
|
function uuidv4() {
|
||||||
|
return ([1e7]+-1e3+-4e3+-8e3+-1e11).replace(/[018]/g, c =>
|
||||||
|
(c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
28
var/www/templates/crawler/crawler_splash/table_cookies.html
Normal file
28
var/www/templates/crawler/crawler_splash/table_cookies.html
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
<table id="{{table_id}}" class="table table-striped table-bordered">
|
||||||
|
<thead class="bg-dark text-white">
|
||||||
|
<tr>
|
||||||
|
<th class="bg-info text-white">Description</th>
|
||||||
|
<th class="bg-info text-white">Date</th>
|
||||||
|
<th class="bg-info text-white">UUID</th>
|
||||||
|
<th class="bg-info text-white">User</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody style="font-size: 15px;">
|
||||||
|
{% for dict_cookies in all_cookies %}
|
||||||
|
<tr>
|
||||||
|
<td>{{dict_cookies['description']}}</td>
|
||||||
|
<td>
|
||||||
|
{%if dict_cookies['date']%}
|
||||||
|
{{dict_cookies['date'][0:4]}}/{{dict_cookies['date'][4:6]}}/{{dict_cookies['date'][6:8]}}
|
||||||
|
{%endif%}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<a target="_blank" href="{{ url_for('crawler_splash.crawler_cookies_show') }}?cookies_uuid={{ dict_cookies['cookies_uuid'] }}">
|
||||||
|
{{ dict_cookies['cookies_uuid']}}
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td>{{dict_cookies['user_id']}}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
|
@ -8,7 +8,7 @@
|
||||||
<nav class="navbar navbar-expand navbar-light bg-light flex-md-column flex-row align-items-start py-2" id="nav_menu">
|
<nav class="navbar navbar-expand navbar-light bg-light flex-md-column flex-row align-items-start py-2" id="nav_menu">
|
||||||
<h5 class="d-flex text-muted w-100">
|
<h5 class="d-flex text-muted w-100">
|
||||||
<span>Splash Crawlers </span>
|
<span>Splash Crawlers </span>
|
||||||
<a class="ml-auto" href="{{url_for('hiddenServices.manual')}}">
|
<a class="ml-auto" href="{{url_for('crawler_splash.manual')}}">
|
||||||
<i class="fas fa-plus-circle ml-auto"></i>
|
<i class="fas fa-plus-circle ml-auto"></i>
|
||||||
</a>
|
</a>
|
||||||
</h5>
|
</h5>
|
||||||
|
@ -32,7 +32,7 @@
|
||||||
</a>
|
</a>
|
||||||
</li>
|
</li>
|
||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a class="nav-link" href="{{url_for('hiddenServices.manual')}}" id="nav_manual_crawler">
|
<a class="nav-link" href="{{url_for('crawler_splash.manual')}}" id="nav_manual_crawler">
|
||||||
<i class="fas fa-spider"></i>
|
<i class="fas fa-spider"></i>
|
||||||
Manual Crawler
|
Manual Crawler
|
||||||
</a>
|
</a>
|
||||||
|
@ -77,7 +77,7 @@
|
||||||
</a>
|
</a>
|
||||||
</li>
|
</li>
|
||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a class="nav-link" href="{{url_for('crawler_splash.domains_explorer_web')}}" id="nav_cookies_all">
|
<a class="nav-link" href="{{url_for('crawler_splash.crawler_cookies_all')}}" id="nav_cookies_all">
|
||||||
<i class="fas fa-cookie-bite"></i>
|
<i class="fas fa-cookie-bite"></i>
|
||||||
<span>All Cookies</span>
|
<span>All Cookies</span>
|
||||||
</a>
|
</a>
|
||||||
|
|
Loading…
Reference in a new issue