mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
Merge pull request #486 from CIRCL/crawler_v2
Crawler v2 - Add cookiejar - use cookie to bypass login form
This commit is contained in:
commit
d72f28fd53
26 changed files with 1700 additions and 310 deletions
|
@ -351,23 +351,24 @@ if __name__ == '__main__':
|
|||
# get HAR files
|
||||
default_crawler_har = p.config.getboolean("Crawler", "default_crawler_har")
|
||||
if default_crawler_har:
|
||||
default_crawler_har = 1
|
||||
default_crawler_har = True
|
||||
else:
|
||||
default_crawler_har = 0
|
||||
default_crawler_har = False
|
||||
|
||||
# get PNG files
|
||||
default_crawler_png = p.config.getboolean("Crawler", "default_crawler_png")
|
||||
if default_crawler_png:
|
||||
default_crawler_png = 1
|
||||
default_crawler_png = True
|
||||
else:
|
||||
default_crawler_png = 0
|
||||
default_crawler_png = False
|
||||
|
||||
# Default crawler options
|
||||
default_crawler_config = {'html': 1,
|
||||
default_crawler_config = {'html': True,
|
||||
'har': default_crawler_har,
|
||||
'png': default_crawler_png,
|
||||
'depth_limit': p.config.getint("Crawler", "crawler_depth_limit"),
|
||||
'closespider_pagecount': p.config.getint("Crawler", "default_crawler_closespider_pagecount"),
|
||||
'cookiejar_uuid': None,
|
||||
'user_agent': p.config.get("Crawler", "default_crawler_user_agent")}
|
||||
|
||||
# Track launched crawler
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import base64
|
||||
import os
|
||||
import sys
|
||||
import redis
|
||||
|
||||
from hashlib import sha256
|
||||
from io import BytesIO
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
|
||||
|
@ -164,6 +166,25 @@ def get_screenshot_file_content(sha256_string):
|
|||
file_content = BytesIO(f.read())
|
||||
return file_content
|
||||
|
||||
# if force save, ignore max_size
|
||||
def save_crawled_screeshot(b64_screenshot, max_size, f_save=False):
|
||||
screenshot_size = (len(b64_screenshot)*3) /4
|
||||
if screenshot_size < max_size or f_save:
|
||||
image_content = base64.standard_b64decode(b64_screenshot.encode())
|
||||
sha256_string = sha256(image_content).hexdigest()
|
||||
filepath = get_screenshot_filepath(sha256_string)
|
||||
if os.path.isfile(filepath):
|
||||
#print('File already exist')
|
||||
return sha256_string
|
||||
# create dir
|
||||
dirname = os.path.dirname(filepath)
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(image_content)
|
||||
return sha256_string
|
||||
return False
|
||||
|
||||
def save_screenshot_file(sha256_string, io_content):
|
||||
filepath = get_screenshot_filepath(sha256_string)
|
||||
if os.path.isfile(filepath):
|
||||
|
|
532
bin/lib/crawlers.py
Executable file
532
bin/lib/crawlers.py
Executable file
|
@ -0,0 +1,532 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
"""
|
||||
API Helper
|
||||
===================
|
||||
|
||||
|
||||
"""
|
||||
import base64
|
||||
import gzip
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import redis
|
||||
import sys
|
||||
import uuid
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from pyfaup.faup import Faup
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
config_loader = None
|
||||
|
||||
faup = Faup()
|
||||
|
||||
def generate_uuid():
|
||||
return str(uuid.uuid4()).replace('-', '')
|
||||
|
||||
################################################################################
|
||||
|
||||
# # TODO: handle prefix cookies
|
||||
# # TODO: fill empty fields
|
||||
def create_cookie_crawler(cookie_dict, domain, crawler_type='regular'):
|
||||
# check cookie domain filed
|
||||
if not 'domain' in cookie_dict:
|
||||
cookie_dict['domain'] = '.{}'.format(domain)
|
||||
|
||||
# tor browser: disable secure cookie
|
||||
if crawler_type=='onion':
|
||||
cookie_dict['secure'] = False
|
||||
|
||||
# force cookie domain
|
||||
# url = urlparse(browser_cookie['Host raw'])
|
||||
# domain = url.netloc.split(':', 1)[0]
|
||||
# cookie_dict['domain'] = '.{}'.format(domain)
|
||||
|
||||
# change expire date
|
||||
cookie_dict['expires'] = (datetime.now() + timedelta(days=10)).strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
|
||||
return cookie_dict
|
||||
|
||||
def load_crawler_cookies(cookiejar_uuid, domain, crawler_type='regular'):
|
||||
cookies = get_cookiejar_cookies_list(cookiejar_uuid)
|
||||
all_cookies = []
|
||||
for cookie_dict in cookies:
|
||||
all_cookies.append(create_cookie_crawler(cookie_dict, domain, crawler_type=crawler_type))
|
||||
return all_cookies
|
||||
|
||||
################################################################################
|
||||
|
||||
def get_all_cookiejar():
|
||||
r_serv_onion.smembers('cookiejar:all')
|
||||
|
||||
def get_global_cookiejar():
|
||||
res = r_serv_onion.smembers('cookiejar:global')
|
||||
if not res:
|
||||
res = []
|
||||
return res
|
||||
|
||||
def get_user_cookiejar(user_id):
|
||||
res = r_serv_onion.smembers('cookiejar:user:{}'.format(user_id))
|
||||
if not res:
|
||||
res = []
|
||||
return res
|
||||
|
||||
def exist_cookiejar(cookiejar_uuid):
|
||||
return r_serv_onion.exists('cookiejar_metadata:{}'.format(cookiejar_uuid))
|
||||
|
||||
def create_cookiejar(user_id, level=1, description=None):
|
||||
cookiejar_uuid = str(uuid.uuid4())
|
||||
|
||||
r_serv_onion.sadd('cookiejar:all', cookiejar_uuid)
|
||||
if level==0:
|
||||
r_serv_onion.sadd('cookiejar:user:{}'.format(user_id), cookiejar_uuid)
|
||||
else:
|
||||
r_serv_onion.sadd('cookiejar:global', cookiejar_uuid)
|
||||
# metadata
|
||||
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'user_id', user_id)
|
||||
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'level', level)
|
||||
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'description', description)
|
||||
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'date', datetime.now().strftime("%Y%m%d"))
|
||||
|
||||
# if json_cookies:
|
||||
# json_cookies = json.loads(json_cookies) # # TODO: catch Exception
|
||||
# r_serv_onion.set('cookies:json_cookies:{}'.format(cookies_uuid), json.dumps(json_cookies))
|
||||
#
|
||||
# for cookie_dict in l_cookies:
|
||||
# r_serv_onion.hset('cookies:manual_cookies:{}'.format(cookies_uuid), cookie_dict['name'], cookie_dict['value'])
|
||||
return cookiejar_uuid
|
||||
|
||||
def delete_cookie_jar(cookiejar_uuid):
|
||||
level = get_cookiejar_level(cookiejar_uuid)
|
||||
if level == 0:
|
||||
user_id = get_cookiejar_owner(cookiejar_uuid)
|
||||
r_serv_onion.srem('cookiejar:user:{}'.format(user_id), cookiejar_uuid)
|
||||
else:
|
||||
r_serv_onion.srem('cookiejar:global', cookiejar_uuid)
|
||||
|
||||
r_serv_onion.delete('cookiejar_metadata:{}'.format(cookiejar_uuid))
|
||||
|
||||
def get_cookiejar_cookies_uuid(cookiejar_uuid):
|
||||
res = r_serv_onion.smembers('cookiejar:{}:cookies:uuid'.format(cookiejar_uuid))
|
||||
if not res:
|
||||
res = []
|
||||
return res
|
||||
|
||||
def get_cookiejar_cookies_list(cookiejar_uuid, add_cookie_uuid=False):
|
||||
l_cookiejar = []
|
||||
for cookie_uuid in get_cookiejar_cookies_uuid(cookiejar_uuid):
|
||||
if add_cookie_uuid:
|
||||
l_cookiejar.append((get_cookie_dict(cookie_uuid), cookie_uuid))
|
||||
else:
|
||||
l_cookiejar.append(get_cookie_dict(cookie_uuid))
|
||||
return l_cookiejar
|
||||
|
||||
## Cookiejar metadata ##
|
||||
def get_cookiejar_description(cookiejar_uuid):
|
||||
return r_serv_onion.hget('cookiejar_metadata:{}'.format(cookiejar_uuid), 'description')
|
||||
|
||||
def get_cookiejar_date(cookiejar_uuid):
|
||||
return r_serv_onion.hget('cookiejar_metadata:{}'.format(cookiejar_uuid), 'date')
|
||||
|
||||
def get_cookiejar_owner(cookiejar_uuid):
|
||||
return r_serv_onion.hget('cookiejar_metadata:{}'.format(cookiejar_uuid), 'user_id')
|
||||
|
||||
def get_cookiejar_date(cookiejar_uuid):
|
||||
return r_serv_onion.hget('cookiejar_metadata:{}'.format(cookiejar_uuid), 'date')
|
||||
|
||||
def get_cookiejar_level(cookiejar_uuid):
|
||||
res = r_serv_onion.hget('cookiejar_metadata:{}'.format(cookiejar_uuid), 'level')
|
||||
if not res:
|
||||
res = 1
|
||||
return int(res)
|
||||
|
||||
def get_cookiejar_metadata(cookiejar_uuid, level=False):
|
||||
dict_cookiejar = {}
|
||||
if exist_cookiejar(cookiejar_uuid):
|
||||
dict_cookiejar['cookiejar_uuid'] = cookiejar_uuid
|
||||
dict_cookiejar['description'] = get_cookiejar_description(cookiejar_uuid)
|
||||
dict_cookiejar['date'] = get_cookiejar_date(cookiejar_uuid)
|
||||
dict_cookiejar['user_id'] = get_cookiejar_owner(cookiejar_uuid)
|
||||
if level:
|
||||
dict_cookiejar['level'] = get_cookies_level(cookiejar_uuid)
|
||||
return dict_cookiejar
|
||||
|
||||
def get_cookiejar_metadata_by_iterator(iter_cookiejar_uuid):
|
||||
l_cookiejar_metadata = []
|
||||
for cookiejar_uuid in iter_cookiejar_uuid:
|
||||
l_cookiejar_metadata.append(get_cookiejar_metadata(cookiejar_uuid))
|
||||
return l_cookiejar_metadata
|
||||
|
||||
def edit_cookiejar_description(cookiejar_uuid, description):
|
||||
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'description', description)
|
||||
|
||||
# # # # # # # #
|
||||
# #
|
||||
# COOKIES #
|
||||
# #
|
||||
# # # # # # # #
|
||||
|
||||
# # # #
|
||||
# Cookies Fields:
|
||||
# - name
|
||||
# - value
|
||||
# - path (optional)
|
||||
# - domain (optional)
|
||||
# - secure (optional)
|
||||
# - httpOnly (optional)
|
||||
# - text (optional)
|
||||
# # # #
|
||||
def get_cookie_all_keys_name():
|
||||
return ['name', 'value', 'domain', 'path', 'httpOnly', 'secure']
|
||||
|
||||
def exists_cookie(cookie_uuid):
|
||||
if int(r_serv_onion.scard('cookies:map:cookiejar:{}'.format(cookie_uuid))) > 0:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_cookie_value(cookie_uuid, name):
|
||||
return r_serv_onion.hget('cookiejar:cookie:{}'.format(cookie_uuid), name)
|
||||
|
||||
def set_cookie_value(cookie_uuid, name, value):
|
||||
r_serv_onion.hset('cookiejar:cookie:{}'.format(cookie_uuid), name, value)
|
||||
|
||||
def delete_cookie_value(cookie_uuid, name):
|
||||
r_serv_onion.hdel('cookiejar:cookie:{}'.format(cookie_uuid), name)
|
||||
|
||||
def get_cookie_dict(cookie_uuid):
|
||||
cookie_dict = {}
|
||||
for key_name in r_serv_onion.hkeys('cookiejar:cookie:{}'.format(cookie_uuid)):
|
||||
cookie_dict[key_name] = get_cookie_value(cookie_uuid, key_name)
|
||||
return cookie_dict
|
||||
|
||||
# name, value, path=None, httpOnly=None, secure=None, domain=None, text=None
|
||||
def add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict):
|
||||
cookie_uuid = generate_uuid()
|
||||
r_serv_onion.sadd('cookiejar:{}:cookies:uuid'.format(cookiejar_uuid), cookie_uuid)
|
||||
r_serv_onion.sadd('cookies:map:cookiejar:{}'.format(cookie_uuid), cookiejar_uuid)
|
||||
|
||||
set_cookie_value(cookie_uuid, 'name', cookie_dict['name'])
|
||||
set_cookie_value(cookie_uuid, 'value', cookie_dict['value'])
|
||||
if 'path' in cookie_dict:
|
||||
set_cookie_value(cookie_uuid, 'path', cookie_dict['path'])
|
||||
if 'httpOnly' in cookie_dict:
|
||||
set_cookie_value(cookie_uuid, 'httpOnly', cookie_dict['httpOnly'])
|
||||
if 'secure' in cookie_dict:
|
||||
set_cookie_value(cookie_uuid, 'secure', cookie_dict['secure'])
|
||||
if 'domain' in cookie_dict:
|
||||
set_cookie_value(cookie_uuid, 'domain', cookie_dict['domain'])
|
||||
if 'text' in cookie_dict:
|
||||
set_cookie_value(cookie_uuid, 'text', cookie_dict['text'])
|
||||
return cookie_uuid
|
||||
|
||||
def add_cookies_to_cookiejar(cookiejar_uuid, l_cookies):
|
||||
for cookie_dict in l_cookies:
|
||||
add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict)
|
||||
|
||||
def delete_all_cookies_from_cookiejar(cookiejar_uuid):
|
||||
for cookie_uuid in get_cookiejar_cookies_uuid(cookiejar_uuid):
|
||||
delete_cookie_from_cookiejar(cookiejar_uuid, cookie_uuid)
|
||||
|
||||
def delete_cookie_from_cookiejar(cookiejar_uuid, cookie_uuid):
|
||||
r_serv_onion.srem('cookiejar:{}:cookies:uuid'.format(cookiejar_uuid), cookie_uuid)
|
||||
r_serv_onion.srem('cookies:map:cookiejar:{}'.format(cookie_uuid), cookiejar_uuid)
|
||||
if not exists_cookie(cookie_uuid):
|
||||
r_serv_onion.delete('cookiejar:cookie:{}'.format(cookie_uuid))
|
||||
|
||||
def edit_cookie(cookiejar_uuid, cookie_uuid, cookie_dict):
|
||||
# delete old keys
|
||||
for key_name in r_serv_onion.hkeys('cookiejar:cookie:{}'.format(cookie_uuid)):
|
||||
if key_name not in cookie_dict:
|
||||
delete_cookie_value(cookie_uuid, key_name)
|
||||
# add new keys
|
||||
cookie_all_keys_name = get_cookie_all_keys_name()
|
||||
for key_name in cookie_dict:
|
||||
if key_name in cookie_all_keys_name:
|
||||
set_cookie_value(cookie_uuid, key_name, cookie_dict[key_name])
|
||||
|
||||
## - - ##
|
||||
## Cookies import ## # TODO: add browser type ?
|
||||
def import_cookies_from_json(json_cookies, cookiejar_uuid):
|
||||
for cookie in json_cookies:
|
||||
try:
|
||||
cookie_dict = unpack_imported_json_cookie(cookie)
|
||||
add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict)
|
||||
except KeyError:
|
||||
return {'error': 'Invalid cookie key, please submit a valid JSON', 'cookiejar_uuid': cookiejar_uuid}
|
||||
|
||||
# # TODO: add text field
|
||||
def unpack_imported_json_cookie(json_cookie):
|
||||
cookie_dict = {'name': json_cookie['Name raw'], 'value': json_cookie['Content raw']}
|
||||
if 'Path raw' in json_cookie:
|
||||
cookie_dict['path'] = json_cookie['Path raw']
|
||||
if 'httpOnly' in json_cookie:
|
||||
cookie_dict['httpOnly'] = json_cookie['HTTP only raw'] == 'true'
|
||||
if 'secure' in json_cookie:
|
||||
cookie_dict['secure'] = json_cookie['Send for'] == 'Encrypted connections only'
|
||||
if 'Host raw' in json_cookie:
|
||||
url = urlparse(json_cookie['Host raw'])
|
||||
cookie_dict['domain'] = url.netloc.split(':', 1)[0]
|
||||
return cookie_dict
|
||||
|
||||
def misp_cookie_import(misp_object, cookiejar_uuid):
|
||||
pass
|
||||
## - - ##
|
||||
#### COOKIEJAR API ####
|
||||
def api_import_cookies_from_json(json_cookies_str, cookiejar_uuid): # # TODO: add catch
|
||||
json_cookies = json.loads(json_cookies_str)
|
||||
res = import_cookies_from_json(json_cookies, cookiejar_uuid)
|
||||
if res:
|
||||
return (res, 400)
|
||||
#### ####
|
||||
|
||||
#### COOKIES API ####
|
||||
|
||||
def api_verify_basic_cookiejar(cookiejar_uuid, user_id):
|
||||
if not exist_cookiejar(cookiejar_uuid):
|
||||
return ({'error': 'unknow cookiejar uuid', 'cookiejar_uuid': cookiejar_uuid}, 404)
|
||||
level = get_cookiejar_level(cookiejar_uuid)
|
||||
if level == 0: # # TODO: check if user is admin
|
||||
cookie_owner = get_cookiejar_owner(cookiejar_uuid)
|
||||
if cookie_owner != user_id:
|
||||
return ({'error': 'The access to this cookiejar is restricted'}, 403)
|
||||
|
||||
def api_get_cookiejar_cookies(cookiejar_uuid, user_id):
|
||||
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
|
||||
if res:
|
||||
return res
|
||||
res = get_cookiejar_cookies_list(cookiejar_uuid)
|
||||
return (res, 200)
|
||||
|
||||
def api_edit_cookiejar_description(user_id, cookiejar_uuid, description):
|
||||
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
|
||||
if res:
|
||||
return res
|
||||
edit_cookiejar_description(cookiejar_uuid, description)
|
||||
return ({'cookiejar_uuid': cookiejar_uuid}, 200)
|
||||
|
||||
def api_get_cookiejar_cookies_with_uuid(cookiejar_uuid, user_id):
|
||||
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
|
||||
if res:
|
||||
return res
|
||||
res = get_cookiejar_cookies_list(cookiejar_uuid, add_cookie_uuid=True)
|
||||
return (res, 200)
|
||||
|
||||
def api_get_cookies_list_select(user_id):
|
||||
l_cookiejar = []
|
||||
for cookies_uuid in get_global_cookiejar():
|
||||
l_cookiejar.append('{} : {}'.format(get_cookiejar_description(cookies_uuid), cookies_uuid))
|
||||
for cookies_uuid in get_user_cookiejar(user_id):
|
||||
l_cookiejar.append('{} : {}'.format(get_cookiejar_description(cookies_uuid), cookies_uuid))
|
||||
return sorted(l_cookiejar)
|
||||
|
||||
def api_delete_cookie_from_cookiejar(user_id, cookiejar_uuid, cookie_uuid):
|
||||
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
|
||||
if res:
|
||||
return res
|
||||
delete_cookie_from_cookiejar(cookiejar_uuid, cookie_uuid)
|
||||
return ({'cookiejar_uuid': cookiejar_uuid, 'cookie_uuid': cookie_uuid}, 200)
|
||||
|
||||
def api_delete_cookie_jar(user_id, cookiejar_uuid):
|
||||
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
|
||||
if res:
|
||||
return res
|
||||
delete_cookie_jar(cookiejar_uuid)
|
||||
return ({'cookiejar_uuid': cookiejar_uuid}, 200)
|
||||
|
||||
def api_edit_cookie(user_id, cookiejar_uuid, cookie_uuid, cookie_dict):
|
||||
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
|
||||
if res:
|
||||
return res
|
||||
if 'name' not in cookie_dict or 'value' not in cookie_dict or cookie_dict['name'] == '':
|
||||
({'error': 'cookie name or value not provided'}, 400)
|
||||
edit_cookie(cookiejar_uuid, cookie_uuid, cookie_dict)
|
||||
return (get_cookie_dict(cookie_uuid), 200)
|
||||
|
||||
def api_create_cookie(user_id, cookiejar_uuid, cookie_dict):
|
||||
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
|
||||
if res:
|
||||
return res
|
||||
if 'name' not in cookie_dict or 'value' not in cookie_dict or cookie_dict['name'] == '':
|
||||
({'error': 'cookie name or value not provided'}, 400)
|
||||
res = add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict)
|
||||
return (res, 200)
|
||||
|
||||
#### ####
|
||||
|
||||
#### CRAWLER TASK ####
|
||||
def create_crawler_task(url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, cookiejar_uuid=None, user_agent=None):
|
||||
crawler_config = {}
|
||||
crawler_config['depth_limit'] = depth_limit
|
||||
crawler_config['closespider_pagecount'] = max_pages
|
||||
|
||||
if screenshot:
|
||||
crawler_config['screenshot'] = True
|
||||
else:
|
||||
crawler_config['screenshot'] = False
|
||||
if har:
|
||||
crawler_config['har'] = True
|
||||
else:
|
||||
crawler_config['har'] = False
|
||||
|
||||
if user_agent:
|
||||
crawler_config['user_agent'] = user_agent
|
||||
if cookiejar_uuid:
|
||||
crawler_config['cookiejar_uuid'] = cookiejar_uuid
|
||||
|
||||
if auto_crawler:
|
||||
crawler_mode = 'auto'
|
||||
else:
|
||||
crawler_mode = 'manual'
|
||||
|
||||
# get crawler_mode
|
||||
faup.decode(url)
|
||||
unpack_url = faup.get()
|
||||
## TODO: # FIXME: remove me
|
||||
try:
|
||||
domain = unpack_url['domain'].decode()
|
||||
except:
|
||||
domain = unpack_url['domain']
|
||||
|
||||
## TODO: # FIXME: remove me
|
||||
try:
|
||||
tld = unpack_url['tld'].decode()
|
||||
except:
|
||||
tld = unpack_url['tld']
|
||||
if tld == 'onion':
|
||||
crawler_type = 'onion'
|
||||
else:
|
||||
crawler_type = 'regular'
|
||||
|
||||
save_crawler_config(crawler_mode, crawler_type, crawler_config, domain, url=url)
|
||||
send_url_to_crawl_in_queue(crawler_mode, crawler_type, url)
|
||||
|
||||
def save_crawler_config(crawler_mode, crawler_type, crawler_config, domain, url=None):
|
||||
if crawler_mode == 'manual':
|
||||
r_cache.set('crawler_config:{}:{}:{}'.format(crawler_mode, crawler_type, domain), json.dumps(crawler_config))
|
||||
elif crawler_mode == 'auto':
|
||||
r_serv_onion.set('crawler_config:{}:{}:{}:{}'.format(crawler_type, crawler_type, domain, url), json.dumps(crawler_config))
|
||||
|
||||
def send_url_to_crawl_in_queue(crawler_mode, crawler_type, url):
|
||||
r_serv_onion.sadd('{}_crawler_priority_queue'.format(crawler_type), '{};{}'.format(url, crawler_mode))
|
||||
# add auto crawled url for user UI
|
||||
if crawler_mode == 'auto':
|
||||
r_serv_onion.sadd('auto_crawler_url:{}'.format(crawler_type), url)
|
||||
|
||||
#### ####
|
||||
#### CRAWLER TASK API ####
|
||||
def api_create_crawler_task(user_id, url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, cookiejar_uuid=None, user_agent=None):
|
||||
# validate url
|
||||
if url is None or url=='' or url=='\n':
|
||||
return ({'error':'invalid depth limit'}, 400)
|
||||
|
||||
if depth_limit:
|
||||
try:
|
||||
depth_limit = int(depth_limit)
|
||||
if depth_limit < 0:
|
||||
depth_limit = 0
|
||||
except ValueError:
|
||||
return ({'error':'invalid depth limit'}, 400)
|
||||
if max_pages:
|
||||
try:
|
||||
max_pages = int(max_pages)
|
||||
if max_pages < 1:
|
||||
max_pages = 1
|
||||
except ValueError:
|
||||
return ({'error':'invalid max_pages limit'}, 400)
|
||||
|
||||
if auto_crawler:
|
||||
try:
|
||||
crawler_time = int(crawler_time)
|
||||
if crawler_time < 0:
|
||||
return ({'error':'invalid delta bettween two pass of the crawler'}, 400)
|
||||
except ValueError:
|
||||
return ({'error':'invalid delta bettween two pass of the crawler'}, 400)
|
||||
|
||||
if cookiejar_uuid:
|
||||
if not exist_cookiejar(cookiejar_uuid):
|
||||
return ({'error': 'unknow cookiejar uuid', 'cookiejar_uuid': cookiejar_uuid}, 404)
|
||||
level = get_cookiejar_level(cookiejar_uuid)
|
||||
if level == 0: # # TODO: check if user is admin
|
||||
cookie_owner = get_cookiejar_owner(cookiejar_uuid)
|
||||
if cookie_owner != user_id:
|
||||
return ({'error': 'The access to this cookiejar is restricted'}, 403)
|
||||
|
||||
create_crawler_task(url, screenshot=screenshot, har=har, depth_limit=depth_limit, max_pages=max_pages,
|
||||
auto_crawler=auto_crawler, crawler_delta=crawler_delta, cookiejar_uuid=cookiejar_uuid, user_agent=user_agent)
|
||||
return None
|
||||
#### ####
|
||||
|
||||
def is_redirection(domain, last_url):
|
||||
url = urlparse(last_url)
|
||||
last_domain = url.netloc
|
||||
last_domain = last_domain.split('.')
|
||||
last_domain = '{}.{}'.format(last_domain[-2], last_domain[-1])
|
||||
return domain != last_domain
|
||||
|
||||
# domain up
|
||||
def create_domain_metadata(domain_type, domain, current_port, date, date_month):
|
||||
# Add to global set
|
||||
r_serv_onion.sadd('{}_up:{}'.format(domain_type, date), domain)
|
||||
r_serv_onion.sadd('full_{}_up'.format(domain_type), domain)
|
||||
r_serv_onion.sadd('month_{}_up:{}'.format(domain_type, date_month), domain)
|
||||
|
||||
# create onion metadata
|
||||
if not r_serv_onion.exists('{}_metadata:{}'.format(domain_type, domain)):
|
||||
r_serv_onion.hset('{}_metadata:{}'.format(domain_type, domain), 'first_seen', date)
|
||||
r_serv_onion.hset('{}_metadata:{}'.format(domain_type, domain), 'last_check', date)
|
||||
|
||||
# Update domain port number
|
||||
all_domain_ports = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'ports')
|
||||
if all_domain_ports:
|
||||
all_domain_ports = all_domain_ports.split(';')
|
||||
else:
|
||||
all_domain_ports = []
|
||||
if current_port not in all_domain_ports:
|
||||
all_domain_ports.append(current_port)
|
||||
r_serv_onion.hset('{}_metadata:{}'.format(domain_type, domain), 'ports', ';'.join(all_domain_ports))
|
||||
|
||||
# add root_item to history
|
||||
def add_domain_root_item(root_item, domain_type, domain, epoch_date, port):
|
||||
# Create/Update crawler history
|
||||
r_serv_onion.zadd('crawler_history_{}:{}:{}'.format(domain_type, domain, port), epoch_date, root_item)
|
||||
|
||||
def create_item_metadata(item_id, domain, url, port, item_father):
|
||||
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', item_father)
|
||||
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'domain', '{}:{}'.format(domain, port))
|
||||
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'real_link', url)
|
||||
# add this item_id to his father
|
||||
r_serv_metadata.sadd('paste_children:{}'.format(item_father), item_id)
|
||||
|
||||
def create_item_id(item_dir, domain):
|
||||
if len(domain) > 215:
|
||||
UUID = domain[-215:]+str(uuid.uuid4())
|
||||
else:
|
||||
UUID = domain+str(uuid.uuid4())
|
||||
return os.path.join(item_dir, UUID)
|
||||
|
||||
def save_crawled_item(item_id, item_content):
|
||||
try:
|
||||
gzipencoded = gzip.compress(item_content.encode())
|
||||
gzip64encoded = base64.standard_b64encode(gzipencoded).decode()
|
||||
return gzip64encoded
|
||||
except:
|
||||
print("file error: {}".format(item_id))
|
||||
return False
|
||||
|
||||
def save_har(har_dir, item_id, har_content):
|
||||
if not os.path.exists(har_dir):
|
||||
os.makedirs(har_dir)
|
||||
item_id = item_id.split('/')[-1]
|
||||
filename = os.path.join(har_dir, item_id + '.json')
|
||||
with open(filename, 'w') as f:
|
||||
f.write(json.dumps(har_content))
|
|
@ -3,11 +3,8 @@
|
|||
|
||||
import os
|
||||
import sys
|
||||
import gzip
|
||||
import base64
|
||||
import uuid
|
||||
import datetime
|
||||
import base64
|
||||
import redis
|
||||
import json
|
||||
import time
|
||||
|
@ -23,15 +20,73 @@ from scrapy import Spider
|
|||
from scrapy.linkextractors import LinkExtractor
|
||||
from scrapy.crawler import CrawlerProcess, Crawler
|
||||
|
||||
from scrapy_splash import SplashRequest
|
||||
from scrapy_splash import SplashRequest, SplashJsonResponse
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
from Helper import Process
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
#import ConfigLoader
|
||||
import Screenshot
|
||||
import crawlers
|
||||
|
||||
script_cookie = """
|
||||
function main(splash, args)
|
||||
-- Default values
|
||||
splash.js_enabled = true
|
||||
splash.private_mode_enabled = true
|
||||
splash.images_enabled = true
|
||||
splash.webgl_enabled = true
|
||||
splash.media_source_enabled = true
|
||||
|
||||
-- Force enable things
|
||||
splash.plugins_enabled = true
|
||||
splash.request_body_enabled = true
|
||||
splash.response_body_enabled = true
|
||||
|
||||
splash.indexeddb_enabled = true
|
||||
splash.html5_media_enabled = true
|
||||
splash.http2_enabled = true
|
||||
|
||||
-- User defined
|
||||
splash.resource_timeout = args.resource_timeout
|
||||
splash.timeout = args.timeout
|
||||
|
||||
-- Allow to pass cookies
|
||||
splash:init_cookies(args.cookies)
|
||||
|
||||
-- Run
|
||||
ok, reason = splash:go{args.url}
|
||||
if not ok and not reason:find("http") then
|
||||
return {
|
||||
error = reason,
|
||||
last_url = splash:url()
|
||||
}
|
||||
end
|
||||
if reason == "http504" then
|
||||
splash:set_result_status_code(504)
|
||||
return ''
|
||||
end
|
||||
|
||||
splash:wait{args.wait}
|
||||
-- Page instrumentation
|
||||
-- splash.scroll_position = {y=1000}
|
||||
splash:wait{args.wait}
|
||||
-- Response
|
||||
return {
|
||||
har = splash:har(),
|
||||
html = splash:html(),
|
||||
png = splash:png{render_all=true},
|
||||
cookies = splash:get_cookies(),
|
||||
last_url = splash:url()
|
||||
}
|
||||
end
|
||||
"""
|
||||
|
||||
class TorSplashCrawler():
|
||||
|
||||
def __init__(self, splash_url, crawler_options):
|
||||
self.process = CrawlerProcess({'LOG_ENABLED': False})
|
||||
self.process = CrawlerProcess({'LOG_ENABLED': True})
|
||||
self.crawler = Crawler(self.TorSplashSpider, {
|
||||
'USER_AGENT': crawler_options['user_agent'],
|
||||
'SPLASH_URL': splash_url,
|
||||
|
@ -39,24 +94,26 @@ class TorSplashCrawler():
|
|||
'DOWNLOADER_MIDDLEWARES': {'scrapy_splash.SplashCookiesMiddleware': 723,
|
||||
'scrapy_splash.SplashMiddleware': 725,
|
||||
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
|
||||
'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
|
||||
},
|
||||
'SPIDER_MIDDLEWARES': {'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,},
|
||||
'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter',
|
||||
'HTTPERROR_ALLOW_ALL': True,
|
||||
'RETRY_TIMES': 2,
|
||||
'CLOSESPIDER_PAGECOUNT': crawler_options['closespider_pagecount'],
|
||||
'DEPTH_LIMIT': crawler_options['depth_limit']
|
||||
'DEPTH_LIMIT': crawler_options['depth_limit'],
|
||||
'SPLASH_COOKIES_DEBUG': False
|
||||
})
|
||||
|
||||
def crawl(self, type, crawler_options, date, requested_mode, url, domain, port, original_item):
|
||||
self.process.crawl(self.crawler, type=type, crawler_options=crawler_options, date=date, requested_mode=requested_mode, url=url, domain=domain, port=port, original_item=original_item)
|
||||
def crawl(self, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item):
|
||||
self.process.crawl(self.crawler, type=type, crawler_options=crawler_options, date=date, requested_mode=requested_mode, url=url, domain=domain, port=port, cookies=cookies, original_item=original_item)
|
||||
self.process.start()
|
||||
|
||||
class TorSplashSpider(Spider):
|
||||
name = 'TorSplashSpider'
|
||||
|
||||
def __init__(self, type, crawler_options, date, requested_mode, url, domain, port, original_item, *args, **kwargs):
|
||||
self.type = type
|
||||
def __init__(self, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item, *args, **kwargs):
|
||||
self.domain_type = type
|
||||
self.requested_mode = requested_mode
|
||||
self.original_item = original_item
|
||||
self.root_key = None
|
||||
|
@ -68,166 +125,101 @@ class TorSplashCrawler():
|
|||
self.date_month = date['date_month']
|
||||
self.date_epoch = int(date['epoch'])
|
||||
|
||||
# # TODO: timeout in config
|
||||
self.arg_crawler = { 'html': crawler_options['html'],
|
||||
'wait': 10,
|
||||
'render_all': 1,
|
||||
'timeout': 30,
|
||||
'har': crawler_options['har'],
|
||||
'png': crawler_options['png']}
|
||||
self.png = crawler_options['png']
|
||||
self.har = crawler_options['har']
|
||||
self.cookies = cookies
|
||||
|
||||
config_section = 'Crawler'
|
||||
self.p = Process(config_section)
|
||||
|
||||
self.r_cache = redis.StrictRedis(
|
||||
host=self.p.config.get("Redis_Cache", "host"),
|
||||
port=self.p.config.getint("Redis_Cache", "port"),
|
||||
db=self.p.config.getint("Redis_Cache", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
self.item_dir = os.path.join(self.p.config.get("Directories", "crawled"), date_str )
|
||||
self.har_dir = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date_str )
|
||||
self.r_serv_log_submit = redis.StrictRedis(
|
||||
host=self.p.config.get("Redis_Log_submit", "host"),
|
||||
port=self.p.config.getint("Redis_Log_submit", "port"),
|
||||
db=self.p.config.getint("Redis_Log_submit", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
self.r_serv_metadata = redis.StrictRedis(
|
||||
host=self.p.config.get("ARDB_Metadata", "host"),
|
||||
port=self.p.config.getint("ARDB_Metadata", "port"),
|
||||
db=self.p.config.getint("ARDB_Metadata", "db"),
|
||||
decode_responses=True)
|
||||
self.root_key = None
|
||||
|
||||
self.r_serv_onion = redis.StrictRedis(
|
||||
host=self.p.config.get("ARDB_Onion", "host"),
|
||||
port=self.p.config.getint("ARDB_Onion", "port"),
|
||||
db=self.p.config.getint("ARDB_Onion", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
self.crawler_path = os.path.join(self.p.config.get("Directories", "crawled"), date_str )
|
||||
|
||||
self.crawled_paste_filemame = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "pastes"),
|
||||
self.p.config.get("Directories", "crawled"), date_str )
|
||||
|
||||
self.crawled_har = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date_str )
|
||||
self.crawled_screenshot = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot") )
|
||||
def build_request_arg(self, cookies):
|
||||
return {'wait': 10,
|
||||
'resource_timeout': 30, # /!\ Weird behaviour if timeout < resource_timeout /!\
|
||||
'timeout': 30,
|
||||
'cookies': cookies,
|
||||
'lua_source': script_cookie
|
||||
}
|
||||
|
||||
def start_requests(self):
|
||||
l_cookies = self.build_request_arg(self.cookies)
|
||||
yield SplashRequest(
|
||||
self.start_urls,
|
||||
self.parse,
|
||||
errback=self.errback_catcher,
|
||||
endpoint='render.json',
|
||||
meta={'father': self.original_item, 'root_key': None},
|
||||
args=self.arg_crawler
|
||||
endpoint='execute',
|
||||
meta={'father': self.original_item},
|
||||
args=l_cookies
|
||||
)
|
||||
|
||||
# # TODO: remove duplicate and anchor
|
||||
def parse(self,response):
|
||||
#print(response.headers)
|
||||
#print(response.status)
|
||||
if response.status == 504:
|
||||
# down ?
|
||||
print('504 detected')
|
||||
# no response
|
||||
#print('504 detected')
|
||||
pass
|
||||
|
||||
# LUA ERROR # # TODO: print/display errors
|
||||
elif 'error' in response.data:
|
||||
if(response.data['error'] == 'network99'):
|
||||
print('Connection to proxy refused')
|
||||
else:
|
||||
print(response.data['error'])
|
||||
|
||||
elif response.status != 200:
|
||||
print('other response: {}'.format(response.status))
|
||||
#print(error_log)
|
||||
#detect connection to proxy refused
|
||||
# detect connection to proxy refused
|
||||
error_log = (json.loads(response.body.decode()))
|
||||
if(error_log['info']['text'] == 'Connection to proxy refused'):
|
||||
print('Connection to proxy refused')
|
||||
print(error_log)
|
||||
#elif crawlers.is_redirection(self.domains[0], response.data['last_url']):
|
||||
# pass # ignore response
|
||||
else:
|
||||
|
||||
#avoid filename too big
|
||||
if len(self.domains[0]) > 215:
|
||||
UUID = self.domains[0][-215:]+str(uuid.uuid4())
|
||||
item_id = crawlers.create_item_id(self.item_dir, self.domains[0])
|
||||
self.save_crawled_item(item_id, response.data['html'])
|
||||
crawlers.create_item_metadata(item_id, self.domains[0], response.data['last_url'], self.port, response.meta['father'])
|
||||
|
||||
if self.root_key is None:
|
||||
self.root_key = item_id
|
||||
crawlers.add_domain_root_item(item_id, self.domain_type, self.domains[0], self.date_epoch, self.port)
|
||||
crawlers.create_domain_metadata(self.domain_type, self.domains[0], self.port, self.full_date, self.date_month)
|
||||
|
||||
if 'cookies' in response.data:
|
||||
all_cookies = response.data['cookies']
|
||||
else:
|
||||
UUID = self.domains[0]+str(uuid.uuid4())
|
||||
filename_paste_full = os.path.join(self.crawled_paste_filemame, UUID)
|
||||
relative_filename_paste = os.path.join(self.crawler_path, UUID)
|
||||
filename_har = os.path.join(self.crawled_har, UUID)
|
||||
all_cookies = []
|
||||
|
||||
# # TODO: modify me
|
||||
# save new paste on disk
|
||||
if self.save_crawled_paste(relative_filename_paste, response.data['html']):
|
||||
# SCREENSHOT
|
||||
if 'png' in response.data:
|
||||
sha256_string = Screenshot.save_crawled_screeshot(response.data['png'], 5000000, f_save=self.requested_mode)
|
||||
if sha256_string:
|
||||
Screenshot.save_item_relationship(sha256_string, item_id)
|
||||
Screenshot.save_domain_relationship(sha256_string, self.domains[0])
|
||||
# HAR
|
||||
if 'har' in response.data:
|
||||
crawlers.save_har(self.har_dir, item_id, response.data['har'])
|
||||
|
||||
# add this paste to the domain crawled set # TODO: # FIXME: put this on cache ?
|
||||
#self.r_serv_onion.sadd('temp:crawled_domain_pastes:{}'.format(self.domains[0]), filename_paste)
|
||||
|
||||
self.r_serv_onion.sadd('{}_up:{}'.format(self.type, self.full_date), self.domains[0])
|
||||
self.r_serv_onion.sadd('full_{}_up'.format(self.type), self.domains[0])
|
||||
self.r_serv_onion.sadd('month_{}_up:{}'.format(self.type, self.date_month), self.domains[0])
|
||||
|
||||
# create onion metadata
|
||||
if not self.r_serv_onion.exists('{}_metadata:{}'.format(self.type, self.domains[0])):
|
||||
self.r_serv_onion.hset('{}_metadata:{}'.format(self.type, self.domains[0]), 'first_seen', self.full_date)
|
||||
|
||||
# create root_key
|
||||
if self.root_key is None:
|
||||
self.root_key = relative_filename_paste
|
||||
# Create/Update crawler history
|
||||
self.r_serv_onion.zadd('crawler_history_{}:{}:{}'.format(self.type, self.domains[0], self.port), self.date_epoch, self.root_key)
|
||||
# Update domain port number
|
||||
all_domain_ports = self.r_serv_onion.hget('{}_metadata:{}'.format(self.type, self.domains[0]), 'ports')
|
||||
if all_domain_ports:
|
||||
all_domain_ports = all_domain_ports.split(';')
|
||||
else:
|
||||
all_domain_ports = []
|
||||
if self.port not in all_domain_ports:
|
||||
all_domain_ports.append(self.port)
|
||||
self.r_serv_onion.hset('{}_metadata:{}'.format(self.type, self.domains[0]), 'ports', ';'.join(all_domain_ports))
|
||||
|
||||
#create paste metadata
|
||||
self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'super_father', self.root_key)
|
||||
self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'father', response.meta['father'])
|
||||
self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'domain', '{}:{}'.format(self.domains[0], self.port))
|
||||
self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'real_link', response.url)
|
||||
|
||||
self.r_serv_metadata.sadd('paste_children:'+response.meta['father'], relative_filename_paste)
|
||||
|
||||
if 'png' in response.data:
|
||||
size_screenshot = (len(response.data['png'])*3) /4
|
||||
|
||||
if size_screenshot < 5000000 or self.requested_mode: #bytes or manual/auto
|
||||
image_content = base64.standard_b64decode(response.data['png'].encode())
|
||||
hash = sha256(image_content).hexdigest()
|
||||
img_dir_path = os.path.join(hash[0:2], hash[2:4], hash[4:6], hash[6:8], hash[8:10], hash[10:12])
|
||||
filename_img = os.path.join(self.crawled_screenshot, 'screenshot', img_dir_path, hash[12:] +'.png')
|
||||
dirname = os.path.dirname(filename_img)
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
if not os.path.exists(filename_img):
|
||||
with open(filename_img, 'wb') as f:
|
||||
f.write(image_content)
|
||||
# add item metadata
|
||||
self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'screenshot', hash)
|
||||
# add sha256 metadata
|
||||
self.r_serv_onion.sadd('screenshot:{}'.format(hash), relative_filename_paste)
|
||||
# domain map
|
||||
self.r_serv_onion.sadd('domain_screenshot:{}'.format(self.domains[0]), hash)
|
||||
self.r_serv_onion.sadd('screenshot_domain:{}'.format(hash), self.domains[0])
|
||||
|
||||
if 'har' in response.data:
|
||||
dirname = os.path.dirname(filename_har)
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
with open(filename_har+'.json', 'wb') as f:
|
||||
f.write(json.dumps(response.data['har']).encode())
|
||||
|
||||
# save external links in set
|
||||
#lext = LinkExtractor(deny_domains=self.domains, unique=True)
|
||||
#for link in lext.extract_links(response):
|
||||
# self.r_serv_onion.sadd('domain_{}_external_links:{}'.format(self.type, self.domains[0]), link.url)
|
||||
# self.r_serv_metadata.sadd('paste_{}_external_links:{}'.format(self.type, filename_paste), link.url)
|
||||
|
||||
le = LinkExtractor(allow_domains=self.domains, unique=True)
|
||||
for link in le.extract_links(response):
|
||||
yield SplashRequest(
|
||||
link.url,
|
||||
self.parse,
|
||||
errback=self.errback_catcher,
|
||||
endpoint='render.json',
|
||||
meta={'father': relative_filename_paste, 'root_key': response.meta['root_key']},
|
||||
args=self.arg_crawler
|
||||
)
|
||||
le = LinkExtractor(allow_domains=self.domains, unique=True)
|
||||
for link in le.extract_links(response):
|
||||
l_cookies = self.build_request_arg(all_cookies)
|
||||
yield SplashRequest(
|
||||
link.url,
|
||||
self.parse,
|
||||
errback=self.errback_catcher,
|
||||
endpoint='execute',
|
||||
meta={'father': item_id},
|
||||
args=l_cookies
|
||||
)
|
||||
|
||||
def errback_catcher(self, failure):
|
||||
# catch all errback failures,
|
||||
|
@ -235,7 +227,7 @@ class TorSplashCrawler():
|
|||
|
||||
if failure.check(ResponseNeverReceived):
|
||||
request = failure.request
|
||||
url = request.meta['splash']['args']['url']
|
||||
url= response.data['last_url']
|
||||
father = request.meta['father']
|
||||
|
||||
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
|
||||
|
@ -248,62 +240,28 @@ class TorSplashCrawler():
|
|||
url,
|
||||
self.parse,
|
||||
errback=self.errback_catcher,
|
||||
endpoint='render.json',
|
||||
meta={'father': father, 'root_key': response.meta['root_key']},
|
||||
args=self.arg_crawler
|
||||
endpoint='execute',
|
||||
cache_args=['lua_source'],
|
||||
meta={'father': father},
|
||||
args=self.build_request_arg(response.cookiejar)
|
||||
)
|
||||
|
||||
else:
|
||||
print('failure')
|
||||
#print(failure)
|
||||
print(failure.type)
|
||||
#print(failure.request.meta['item'])
|
||||
|
||||
'''
|
||||
#if isinstance(failure.value, HttpError):
|
||||
elif failure.check(HttpError):
|
||||
# you can get the response
|
||||
response = failure.value.response
|
||||
print('HttpError')
|
||||
self.logger.error('HttpError on %s', response.url)
|
||||
|
||||
#elif isinstance(failure.value, DNSLookupError):
|
||||
elif failure.check(DNSLookupError):
|
||||
# this is the original request
|
||||
request = failure.request
|
||||
print(DNSLookupError)
|
||||
print('DNSLookupError')
|
||||
self.logger.error('DNSLookupError on %s', request.url)
|
||||
|
||||
#elif isinstance(failure.value, TimeoutError):
|
||||
elif failure.check(TimeoutError):
|
||||
request = failure.request
|
||||
print('TimeoutError')
|
||||
print(TimeoutError)
|
||||
self.logger.error('TimeoutError on %s', request.url)
|
||||
'''
|
||||
|
||||
def save_crawled_paste(self, filename, content):
|
||||
|
||||
if os.path.isfile(filename):
|
||||
print('File: {} already exist in submitted pastes'.format(filename))
|
||||
return False
|
||||
|
||||
try:
|
||||
gzipencoded = gzip.compress(content.encode())
|
||||
gzip64encoded = base64.standard_b64encode(gzipencoded).decode()
|
||||
except:
|
||||
print("file error: {}".format(filename))
|
||||
return False
|
||||
def save_crawled_item(self, item_id, item_content):
|
||||
gzip64encoded = crawlers.save_crawled_item(item_id, item_content)
|
||||
|
||||
# Send item to queue
|
||||
# send paste to Global
|
||||
relay_message = "{0} {1}".format(filename, gzip64encoded)
|
||||
relay_message = "{0} {1}".format(item_id, gzip64encoded)
|
||||
self.p.populate_set_out(relay_message, 'Mixer')
|
||||
|
||||
# increase nb of paste by feeder name
|
||||
self.r_serv_log_submit.hincrby("mixer_cache:list_feeder", "crawler", 1)
|
||||
|
||||
# tag crawled paste
|
||||
msg = 'infoleak:submission="crawler";{}'.format(filename)
|
||||
msg = 'infoleak:submission="crawler";{}'.format(item_id)
|
||||
self.p.populate_set_out(msg, 'Tags')
|
||||
return True
|
||||
|
|
|
@ -9,6 +9,7 @@ from TorSplashCrawler import TorSplashCrawler
|
|||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
import crawlers
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
@ -37,7 +38,12 @@ if __name__ == '__main__':
|
|||
date = crawler_json['date']
|
||||
requested_mode = crawler_json['requested']
|
||||
|
||||
if crawler_options['cookiejar_uuid']:
|
||||
cookies = crawlers.load_crawler_cookies(crawler_options['cookiejar_uuid'], domain, crawler_type=service_type)
|
||||
else:
|
||||
cookies = []
|
||||
|
||||
redis_cache.delete('crawler_request:{}'.format(uuid))
|
||||
|
||||
crawler = TorSplashCrawler(splash_url, crawler_options)
|
||||
crawler.crawl(service_type, crawler_options, date, requested_mode, url, domain, port, original_item)
|
||||
crawler.crawl(service_type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item)
|
||||
|
|
BIN
doc/screenshots/crawler-cookie-edit.png
Normal file
BIN
doc/screenshots/crawler-cookie-edit.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 31 KiB |
BIN
doc/screenshots/crawler-cookiejar-all.png
Normal file
BIN
doc/screenshots/crawler-cookiejar-all.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 110 KiB |
BIN
doc/screenshots/crawler-cookiejar-create.png
Normal file
BIN
doc/screenshots/crawler-cookiejar-create.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 39 KiB |
BIN
doc/screenshots/crawler-cookiejar-domain-crawled.png
Normal file
BIN
doc/screenshots/crawler-cookiejar-domain-crawled.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 190 KiB |
BIN
doc/screenshots/crawler-cookiejar-edit.png
Normal file
BIN
doc/screenshots/crawler-cookiejar-edit.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 78 KiB |
BIN
doc/screenshots/crawler-manual-crawler.png
Normal file
BIN
doc/screenshots/crawler-manual-crawler.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 47 KiB |
|
@ -10,7 +10,7 @@ import sys
|
|||
import json
|
||||
import random
|
||||
|
||||
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response
|
||||
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, make_response
|
||||
from flask_login import login_required, current_user, login_user, logout_user
|
||||
|
||||
sys.path.append('modules')
|
||||
|
@ -25,6 +25,7 @@ import Tag
|
|||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
import Domain
|
||||
import crawlers
|
||||
|
||||
r_cache = Flask_config.r_cache
|
||||
r_serv_db = Flask_config.r_serv_db
|
||||
|
@ -43,7 +44,47 @@ def api_validator(api_response):
|
|||
if api_response:
|
||||
return Response(json.dumps(api_response[0], indent=2, sort_keys=True), mimetype='application/json'), api_response[1]
|
||||
|
||||
def create_json_response(data, status_code):
|
||||
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
|
||||
|
||||
# ============= ROUTES ==============
|
||||
@crawler_splash.route("/crawlers/manual", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def manual():
|
||||
user_id = current_user.get_id()
|
||||
l_cookiejar = crawlers.api_get_cookies_list_select(user_id)
|
||||
return render_template("crawler_manual.html", crawler_enabled=True, l_cookiejar=l_cookiejar)
|
||||
|
||||
@crawler_splash.route("/crawlers/send_to_spider", methods=['POST'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def send_to_spider():
|
||||
user_id = current_user.get_id()
|
||||
|
||||
# POST val
|
||||
url = request.form.get('url_to_crawl')
|
||||
auto_crawler = request.form.get('crawler_type')
|
||||
crawler_delta = request.form.get('crawler_epoch')
|
||||
screenshot = request.form.get('screenshot')
|
||||
har = request.form.get('har')
|
||||
depth_limit = request.form.get('depth_limit')
|
||||
max_pages = request.form.get('max_pages')
|
||||
cookiejar_uuid = request.form.get('cookiejar')
|
||||
|
||||
if cookiejar_uuid:
|
||||
if cookiejar_uuid == 'None':
|
||||
cookiejar_uuid = None
|
||||
else:
|
||||
cookiejar_uuid = cookiejar_uuid.rsplit(':')
|
||||
cookiejar_uuid = cookiejar_uuid[-1].replace(' ', '')
|
||||
|
||||
res = crawlers.api_create_crawler_task(user_id, url, screenshot=screenshot, har=har, depth_limit=depth_limit, max_pages=max_pages,
|
||||
auto_crawler=auto_crawler, crawler_delta=crawler_delta, cookiejar_uuid=cookiejar_uuid)
|
||||
if res:
|
||||
return create_json_response(res[0], res[1])
|
||||
return redirect(url_for('crawler_splash.manual'))
|
||||
|
||||
# add route : /crawlers/show_domain
|
||||
@crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
|
@ -156,3 +197,210 @@ def domains_explorer_web():
|
|||
|
||||
dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to)
|
||||
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')
|
||||
|
||||
## Cookiejar ##
|
||||
@crawler_splash.route('/crawler/cookiejar/add', methods=['GET'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def crawler_cookiejar_add():
|
||||
return render_template("add_cookiejar.html")
|
||||
|
||||
@crawler_splash.route('/crawler/cookiejar/add_post', methods=['POST'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def crawler_cookiejar_add_post():
|
||||
user_id = current_user.get_id()
|
||||
|
||||
description = request.form.get('description')
|
||||
level = request.form.get('level')
|
||||
if level:
|
||||
level = 1
|
||||
else:
|
||||
level = 0
|
||||
|
||||
if 'file' in request.files:
|
||||
file = request.files['file']
|
||||
json_cookies = file.read().decode()
|
||||
else:
|
||||
json_cookies = None
|
||||
|
||||
# Get cookies to add
|
||||
l_manual_cookie = []
|
||||
l_invalid_cookie = []
|
||||
for obj_tuple in list(request.form):
|
||||
l_input = request.form.getlist(obj_tuple)
|
||||
if len(l_input) == 2:
|
||||
if l_input[0]: # cookie_name
|
||||
cookie_dict = {'name': l_input[0], 'value': l_input[1]}
|
||||
l_manual_cookie.append(cookie_dict)
|
||||
elif l_input[1]: # cookie_value
|
||||
l_invalid_cookie.append({'name': '', 'value': l_input[1]})
|
||||
if l_invalid_cookie:
|
||||
return create_json_response({'error': 'invalid cookie', 'invalid fileds': l_invalid_cookie}, 400)
|
||||
|
||||
cookiejar_uuid = crawlers.create_cookiejar(user_id, level=level, description=description)
|
||||
if json_cookies:
|
||||
res = crawlers.api_import_cookies_from_json(json_cookies, cookiejar_uuid)
|
||||
if res:
|
||||
return create_json_response(res[0], res[1])
|
||||
if l_manual_cookie:
|
||||
crawlers.add_cookies_to_cookiejar(cookiejar_uuid, l_manual_cookie)
|
||||
|
||||
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
|
||||
|
||||
@crawler_splash.route('/crawler/cookiejar/all', methods=['GET'])
|
||||
#@login_required
|
||||
#@login_read_only
|
||||
def crawler_cookiejar_all():
|
||||
user_id = current_user.get_id()
|
||||
user_cookiejar = crawlers.get_cookiejar_metadata_by_iterator(crawlers.get_user_cookiejar(user_id))
|
||||
global_cookiejar = crawlers.get_cookiejar_metadata_by_iterator(crawlers.get_global_cookiejar())
|
||||
return render_template("all_cookiejar.html", user_cookiejar=user_cookiejar, global_cookiejar=global_cookiejar)
|
||||
|
||||
@crawler_splash.route('/crawler/cookiejar/show', methods=['GET'])
|
||||
#@login_required
|
||||
#@login_read_only
|
||||
def crawler_cookiejar_show():
|
||||
user_id = current_user.get_id()
|
||||
cookiejar_uuid = request.args.get('cookiejar_uuid')
|
||||
|
||||
res = crawlers.api_get_cookiejar_cookies_with_uuid(cookiejar_uuid, user_id)
|
||||
if res[1] !=200:
|
||||
return create_json_response(res[0], res[1])
|
||||
|
||||
cookiejar_metadata = crawlers.get_cookiejar_metadata(cookiejar_uuid, level=False)
|
||||
|
||||
l_cookies = []
|
||||
l_cookie_uuid = []
|
||||
for cookie in res[0]:
|
||||
l_cookies.append(json.dumps(cookie[0], indent=4, sort_keys=True))
|
||||
l_cookie_uuid.append(cookie[1])
|
||||
return render_template("show_cookiejar.html", cookiejar_uuid=cookiejar_uuid, cookiejar_metadata=cookiejar_metadata,
|
||||
l_cookies=l_cookies, l_cookie_uuid=l_cookie_uuid)
|
||||
|
||||
@crawler_splash.route('/crawler/cookiejar/cookie/delete', methods=['GET'])
|
||||
#@login_required
|
||||
#@login_read_only
|
||||
def crawler_cookiejar_cookie_delete():
|
||||
user_id = current_user.get_id()
|
||||
cookiejar_uuid = request.args.get('cookiejar_uuid')
|
||||
cookie_uuid = request.args.get('cookie_uuid')
|
||||
|
||||
res = crawlers.api_delete_cookie_from_cookiejar(user_id, cookiejar_uuid, cookie_uuid)
|
||||
if res[1] !=200:
|
||||
return create_json_response(res[0], res[1])
|
||||
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
|
||||
|
||||
@crawler_splash.route('/crawler/cookiejar/delete', methods=['GET'])
|
||||
#@login_required
|
||||
#@login_read_only
|
||||
def crawler_cookiejar_delete():
|
||||
user_id = current_user.get_id()
|
||||
cookiejar_uuid = request.args.get('cookiejar_uuid')
|
||||
|
||||
res = crawlers.api_delete_cookie_jar(user_id, cookiejar_uuid)
|
||||
if res[1] !=200:
|
||||
return create_json_response(res[0], res[1])
|
||||
return redirect(url_for('crawler_splash.crawler_cookiejar_all'))
|
||||
|
||||
@crawler_splash.route('/crawler/cookiejar/edit', methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawler_cookiejar_edit():
|
||||
user_id = current_user.get_id()
|
||||
cookiejar_uuid = request.args.get('cookiejar_uuid')
|
||||
description = request.args.get('description')
|
||||
|
||||
res = crawlers.api_edit_cookiejar_description(user_id, cookiejar_uuid, description)
|
||||
return create_json_response(res[0], res[1])
|
||||
|
||||
@crawler_splash.route('/crawler/cookiejar/cookie/edit', methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawler_cookiejar_cookie_edit():
|
||||
user_id = current_user.get_id()
|
||||
cookiejar_uuid = request.args.get('cookiejar_uuid')
|
||||
cookie_uuid = request.args.get('cookie_uuid')
|
||||
|
||||
cookie_dict = crawlers.get_cookie_dict(cookie_uuid)
|
||||
return render_template("edit_cookie.html", cookiejar_uuid=cookiejar_uuid, cookie_uuid=cookie_uuid, cookie_dict=cookie_dict)
|
||||
|
||||
@crawler_splash.route('/crawler/cookiejar/cookie/edit_post', methods=['POST'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawler_cookiejar_cookie_edit_post():
|
||||
user_id = current_user.get_id()
|
||||
cookiejar_uuid = request.form.get('cookiejar_uuid')
|
||||
cookie_uuid = request.form.get('cookie_uuid')
|
||||
name = request.form.get('name')
|
||||
value = request.form.get('value')
|
||||
domain = request.form.get('domain')
|
||||
path = request.form.get('path')
|
||||
httpOnly = request.form.get('httpOnly')
|
||||
secure = request.form.get('secure')
|
||||
|
||||
cookie_dict = {'name': name, 'value': value}
|
||||
if domain:
|
||||
cookie_dict['domain'] = domain
|
||||
if path:
|
||||
cookie_dict['path'] = path
|
||||
if httpOnly:
|
||||
cookie_dict['httpOnly'] = True
|
||||
if secure:
|
||||
cookie_dict['secure'] = True
|
||||
|
||||
res = crawlers.api_edit_cookie(user_id, cookiejar_uuid, cookie_uuid, cookie_dict)
|
||||
if res[1] != 200:
|
||||
return create_json_response(res[0], res[1])
|
||||
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
|
||||
|
||||
@crawler_splash.route('/crawler/cookiejar/cookie/add', methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawler_cookiejar_cookie_add():
|
||||
user_id = current_user.get_id()
|
||||
cookiejar_uuid = request.args.get('cookiejar_uuid')
|
||||
return render_template("add_cookie.html", cookiejar_uuid=cookiejar_uuid)
|
||||
|
||||
@crawler_splash.route('/crawler/cookiejar/cookie/manual_add_post', methods=['POST'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawler_cookiejar_cookie_manual_add_post():
|
||||
user_id = current_user.get_id()
|
||||
cookiejar_uuid = request.form.get('cookiejar_uuid')
|
||||
name = request.form.get('name')
|
||||
value = request.form.get('value')
|
||||
domain = request.form.get('domain')
|
||||
path = request.form.get('path')
|
||||
httpOnly = request.form.get('httpOnly')
|
||||
secure = request.form.get('secure')
|
||||
|
||||
cookie_dict = {'name': name, 'value': value}
|
||||
if domain:
|
||||
cookie_dict['domain'] = domain
|
||||
if path:
|
||||
cookie_dict['path'] = path
|
||||
if httpOnly:
|
||||
cookie_dict['httpOnly'] = True
|
||||
if secure:
|
||||
cookie_dict['secure'] = True
|
||||
|
||||
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
|
||||
|
||||
@crawler_splash.route('/crawler/cookiejar/cookie/json_add_post', methods=['POST'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawler_cookiejar_cookie_json_add_post():
|
||||
user_id = current_user.get_id()
|
||||
cookiejar_uuid = request.form.get('cookiejar_uuid')
|
||||
|
||||
if 'file' in request.files:
|
||||
file = request.files['file']
|
||||
json_cookies = file.read().decode()
|
||||
if json_cookies:
|
||||
res = crawlers.api_import_cookies_from_json(json_cookies, cookiejar_uuid)
|
||||
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
|
||||
|
||||
return redirect(url_for('crawler_splash.crawler_cookiejar_cookie_add', cookiejar_uuid=cookiejar_uuid))
|
||||
|
||||
## - - ##
|
||||
|
|
|
@ -30,6 +30,9 @@ r_serv_metadata = Flask_config.r_serv_metadata
|
|||
crawler_enabled = Flask_config.crawler_enabled
|
||||
bootstrap_label = Flask_config.bootstrap_label
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
import crawlers
|
||||
|
||||
hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates')
|
||||
|
||||
faup = Faup()
|
||||
|
@ -214,18 +217,6 @@ def get_crawler_splash_status(type):
|
|||
|
||||
return crawler_metadata
|
||||
|
||||
def create_crawler_config(mode, service_type, crawler_config, domain, url=None):
|
||||
if mode == 'manual':
|
||||
r_cache.set('crawler_config:{}:{}:{}'.format(mode, service_type, domain), json.dumps(crawler_config))
|
||||
elif mode == 'auto':
|
||||
r_serv_onion.set('crawler_config:{}:{}:{}:{}'.format(mode, service_type, domain, url), json.dumps(crawler_config))
|
||||
|
||||
def send_url_to_crawl_in_queue(mode, service_type, url):
|
||||
r_serv_onion.sadd('{}_crawler_priority_queue'.format(service_type), '{};{}'.format(url, mode))
|
||||
# add auto crawled url for user UI
|
||||
if mode == 'auto':
|
||||
r_serv_onion.sadd('auto_crawler_url:{}'.format(service_type), url)
|
||||
|
||||
def delete_auto_crawler(url):
|
||||
domain = get_domain_from_url(url)
|
||||
type = get_type_domain(domain)
|
||||
|
@ -257,12 +248,6 @@ def dashboard():
|
|||
crawler_metadata_regular=crawler_metadata_regular,
|
||||
statDomains_onion=statDomains_onion, statDomains_regular=statDomains_regular)
|
||||
|
||||
@hiddenServices.route("/crawlers/manual", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def manual():
|
||||
return render_template("Crawler_Splash_manual.html", crawler_enabled=crawler_enabled)
|
||||
|
||||
@hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
|
@ -389,94 +374,6 @@ def unblacklist_domain():
|
|||
else:
|
||||
return 'Incorrect type'
|
||||
|
||||
@hiddenServices.route("/crawlers/create_spider_splash", methods=['POST'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def create_spider_splash():
|
||||
url = request.form.get('url_to_crawl')
|
||||
automatic = request.form.get('crawler_type')
|
||||
crawler_time = request.form.get('crawler_epoch')
|
||||
#html = request.form.get('html_content_id')
|
||||
screenshot = request.form.get('screenshot')
|
||||
har = request.form.get('har')
|
||||
depth_limit = request.form.get('depth_limit')
|
||||
max_pages = request.form.get('max_pages')
|
||||
|
||||
# validate url
|
||||
if url is None or url=='' or url=='\n':
|
||||
return 'incorrect url'
|
||||
|
||||
crawler_config = {}
|
||||
|
||||
# verify user input
|
||||
if automatic:
|
||||
automatic = True
|
||||
else:
|
||||
automatic = False
|
||||
if not screenshot:
|
||||
crawler_config['png'] = 0
|
||||
if not har:
|
||||
crawler_config['har'] = 0
|
||||
|
||||
# verify user input
|
||||
if depth_limit:
|
||||
try:
|
||||
depth_limit = int(depth_limit)
|
||||
if depth_limit < 0:
|
||||
return 'incorrect depth_limit'
|
||||
else:
|
||||
crawler_config['depth_limit'] = depth_limit
|
||||
except:
|
||||
return 'incorrect depth_limit'
|
||||
if max_pages:
|
||||
try:
|
||||
max_pages = int(max_pages)
|
||||
if max_pages < 1:
|
||||
return 'incorrect max_pages'
|
||||
else:
|
||||
crawler_config['closespider_pagecount'] = max_pages
|
||||
except:
|
||||
return 'incorrect max_pages'
|
||||
|
||||
# get service_type
|
||||
faup.decode(url)
|
||||
unpack_url = faup.get()
|
||||
## TODO: # FIXME: remove me
|
||||
try:
|
||||
domain = unpack_url['domain'].decode()
|
||||
except:
|
||||
domain = unpack_url['domain']
|
||||
|
||||
## TODO: # FIXME: remove me
|
||||
try:
|
||||
tld = unpack_url['tld'].decode()
|
||||
except:
|
||||
tld = unpack_url['tld']
|
||||
|
||||
if tld == 'onion':
|
||||
service_type = 'onion'
|
||||
else:
|
||||
service_type = 'regular'
|
||||
|
||||
if automatic:
|
||||
mode = 'auto'
|
||||
try:
|
||||
crawler_time = int(crawler_time)
|
||||
if crawler_time < 0:
|
||||
return 'incorrect epoch'
|
||||
else:
|
||||
crawler_config['time'] = crawler_time
|
||||
except:
|
||||
return 'incorrect epoch'
|
||||
else:
|
||||
mode = 'manual'
|
||||
epoch = None
|
||||
|
||||
create_crawler_config(mode, service_type, crawler_config, domain, url=url)
|
||||
send_url_to_crawl_in_queue(mode, service_type, url)
|
||||
|
||||
return redirect(url_for('hiddenServices.manual'))
|
||||
|
||||
@hiddenServices.route("/crawlers/auto_crawler", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
|
|
116
var/www/templates/crawler/crawler_splash/add_cookie.html
Normal file
116
var/www/templates/crawler/crawler_splash/add_cookie.html
Normal file
|
@ -0,0 +1,116 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>AIL - Add Cookies</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
<div class="card mb-3 mt-1">
|
||||
<div class="card-header text-white bg-dark">
|
||||
<div class="row">
|
||||
<div class="col-8">
|
||||
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Add Cookie to cookiejar: {{cookiejar_uuid}}</h5>
|
||||
</div>
|
||||
<div class="col-4">
|
||||
<a class="btn btn-danger float-right" href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_delete') }}?cookiejar_uuid={{cookiejar_uuid}}&cookie_uuid={{cookie_uuid}}">
|
||||
<i class="fas fa-trash-alt"></i>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
|
||||
<form action="{{ url_for('crawler_splash.crawler_cookiejar_cookie_manual_add_post') }}" method="post" enctype="multipart/form-data">
|
||||
<input type="text" name="cookiejar_uuid" value="{{cookiejar_uuid}}" hidden>
|
||||
{% include 'crawler/crawler_splash/cookie_edit_block.html' %}
|
||||
<div class="form-group">
|
||||
<button class="btn btn-info" type="submit" value=Upload><i class="fas fa-cookie"></i> Create Cookie</button>
|
||||
</div>
|
||||
|
||||
</form>
|
||||
|
||||
<hr>
|
||||
|
||||
<form action="{{ url_for('crawler_splash.crawler_cookiejar_cookie_json_add_post') }}" method="post" enctype="multipart/form-data">
|
||||
<input type="text" name="cookiejar_uuid" value="{{cookiejar_uuid}}" hidden>
|
||||
<h5>Import cookies from file:</h5>
|
||||
<div class="form-group">
|
||||
<label for="file"><b>JSON File</b></label>
|
||||
<input type="file" class="form-control-file btn btn-outline-secondary" id="file" name="file">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<button class="btn btn-info" type="submit" value=Upload><i class="fas fa-cookie"></i> Import Cookies</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$('#description-edit-block').hide();
|
||||
$("#page-crawler").addClass("active");
|
||||
$("#nav_title_cookiejar").removeClass("text-muted");
|
||||
});
|
||||
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
$('#side_menu').removeClass('border-right')
|
||||
$('#side_menu').removeClass('col-lg-2')
|
||||
$('#core_content').removeClass('col-lg-10')
|
||||
}else{
|
||||
$('#nav_menu').show();
|
||||
$('#side_menu').addClass('border-right')
|
||||
$('#side_menu').addClass('col-lg-2')
|
||||
$('#core_content').addClass('col-lg-10')
|
||||
}
|
||||
}
|
||||
|
||||
function show_edit_description(){
|
||||
console.log('edit');
|
||||
$('#description-edit-block').show();
|
||||
}
|
||||
|
||||
function edit_description(){
|
||||
var new_description = $('#input-description').val()
|
||||
var data_to_send = { cookiejar_uuid: "{{cookiejar_uuid}}", "description": new_description}
|
||||
|
||||
$.get("{{ url_for('crawler_splash.crawler_cookiejar_edit') }}", data_to_send, function(data, status){
|
||||
if(status == "success") {
|
||||
$('#description-text').text(new_description)
|
||||
$('#description-edit-block').hide();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
</script>
|
99
var/www/templates/crawler/crawler_splash/add_cookiejar.html
Normal file
99
var/www/templates/crawler/crawler_splash/add_cookiejar.html
Normal file
|
@ -0,0 +1,99 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>AIL - Add Cookies</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
<div class="card mb-3 mt-1">
|
||||
<div class="card-header text-white bg-dark">
|
||||
<h5 class="card-title"><i class="fas fa-box"></i> Create Cookijar <i class="fas fa-cookie"></i></h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
|
||||
<form action="{{ url_for('crawler_splash.crawler_cookiejar_add_post') }}" method="post" enctype="multipart/form-data">
|
||||
|
||||
<div class="row">
|
||||
<div class="col-12 col-md-9">
|
||||
<div class="input-group mb-2 mr-sm-2">
|
||||
<div class="input-group-prepend">
|
||||
<div class="input-group-text"><i class="fas fa-tag"></i></div>
|
||||
</div>
|
||||
<input id="description" name="description" class="form-control" placeholder="cookies description - (optional)" type="text">
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-12 col-md-3">
|
||||
<div class="custom-control custom-switch mt-1">
|
||||
<input class="custom-control-input" type="checkbox" name="level" id="id_level" checked="">
|
||||
<label class="custom-control-label" for="id_level">
|
||||
<i class="fas fa-users"></i> Show cookiejar to all Users
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
|
||||
{% include 'crawler/crawler_splash/add_cookies_block.html' %}
|
||||
|
||||
<div class="form-group">
|
||||
<button class="btn btn-info" type="submit" value=Upload><i class="fas fa-cookie-bite"></i> Create Cookiejar</button>
|
||||
</div>
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
|
||||
<script>
|
||||
var chart = {};
|
||||
$(document).ready(function(){
|
||||
$("#page-crawler").addClass("active");
|
||||
$("#nav_cookiejar_add").addClass("active");
|
||||
$("#nav_title_cookiejar").removeClass("text-muted");
|
||||
});
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
$('#side_menu').removeClass('border-right')
|
||||
$('#side_menu').removeClass('col-lg-2')
|
||||
$('#core_content').removeClass('col-lg-10')
|
||||
}else{
|
||||
$('#nav_menu').show();
|
||||
$('#side_menu').addClass('border-right')
|
||||
$('#side_menu').addClass('col-lg-2')
|
||||
$('#core_content').addClass('col-lg-10')
|
||||
}
|
||||
}
|
||||
|
||||
</script>
|
|
@ -0,0 +1,58 @@
|
|||
<h5>Import cookies:</h5>
|
||||
<div class="form-group">
|
||||
<label for="file"><b>JSON File</b></label>
|
||||
<input type="file" class="form-control-file btn btn-outline-secondary" id="file" name="file">
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
|
||||
<div>
|
||||
<h5>Create cookies:</h5>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-5" for="obj_input_cookie_name"><b>Cookie Name</b></div>
|
||||
<div class="col-6" for="obj_input_cookie_value"><b>Cookie Value</b></div>
|
||||
</div>
|
||||
|
||||
<div class="form-horizontal">
|
||||
<div class="form-body">
|
||||
<div class="form-group">
|
||||
<div class="fields">
|
||||
<div class="input-group mb-1">
|
||||
<input type="text" class="form-control col-5" name="first_cookie" id="obj_input_cookie_name">
|
||||
<input type="text" class="form-control col-6" name="first_cookie" id="obj_input_cookie_value">
|
||||
<span class="btn btn-info input-group-addon add-field col-1"><i class="fas fa-plus"></i></span>
|
||||
</div>
|
||||
|
||||
<br>
|
||||
|
||||
<span class="help-block" hidden>Manual Cookies></span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
var input_1 = '<div class="input-group mb-1"><input type="text" class="form-control col-5" name="'
|
||||
var input_2 = '"><input type="text" class="form-control col-6" name="'
|
||||
var input_3 = '">';
|
||||
var minusButton = '<span class="btn btn-danger input-group-addon delete-field col-1"><i class="fas fa-trash-alt"></i></span></div>';
|
||||
|
||||
$('.add-field').click(function() {
|
||||
var new_uuid = uuidv4();
|
||||
var template = input_1 + new_uuid + input_2 + new_uuid + input_3;
|
||||
var temp = $(template).insertBefore('.help-block');
|
||||
temp.append(minusButton);
|
||||
});
|
||||
|
||||
$('.fields').on('click', '.delete-field', function(){
|
||||
$(this).parent().remove();
|
||||
});
|
||||
|
||||
function uuidv4() {
|
||||
return ([1e7]+-1e3+-4e3+-8e3+-1e11).replace(/[018]/g, c =>
|
||||
(c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
|
||||
);
|
||||
}
|
||||
</script>
|
99
var/www/templates/crawler/crawler_splash/all_cookiejar.html
Normal file
99
var/www/templates/crawler/crawler_splash/all_cookiejar.html
Normal file
|
@ -0,0 +1,99 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>AIL - Cookies</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
|
||||
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
|
||||
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
<div class="card mb-3 mt-1">
|
||||
<div class="card-header text-white bg-dark">
|
||||
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Your Cookiejar</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
{% with all_cookiejar=user_cookiejar, table_id='table_user'%}
|
||||
{% include 'crawler/crawler_splash/table_cookiejar.html' %}
|
||||
{% endwith %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card mb-3 mt-1">
|
||||
<div class="card-header text-white bg-dark">
|
||||
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Global Cookiejar</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
{% with all_cookiejar=global_cookiejar, table_id='table_global'%}
|
||||
{% include 'crawler/crawler_splash/table_cookiejar.html' %}
|
||||
{% endwith %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<a class="btn btn-info my-4" href="{{url_for('crawler_splash.crawler_cookiejar_add')}}">
|
||||
<i class="fas fa-plus-circle ml-auto"></i>
|
||||
Create Cookiejar
|
||||
</a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$("#page-crawler").addClass("active");
|
||||
$("#nav_cookiejar_all").addClass("active");
|
||||
$("#nav_title_cookiejar").removeClass("text-muted");
|
||||
|
||||
$('#table_user').DataTable({
|
||||
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
|
||||
"iDisplayLength": 10,
|
||||
"order": [[ 0, "desc" ]]
|
||||
});
|
||||
$('#table_global').DataTable({
|
||||
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
|
||||
"iDisplayLength": 10,
|
||||
"order": [[ 0, "desc" ]]
|
||||
});
|
||||
|
||||
|
||||
});
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
$('#side_menu').removeClass('border-right')
|
||||
$('#side_menu').removeClass('col-lg-2')
|
||||
$('#core_content').removeClass('col-lg-10')
|
||||
}else{
|
||||
$('#nav_menu').show();
|
||||
$('#side_menu').addClass('border-right')
|
||||
$('#side_menu').addClass('col-lg-2')
|
||||
$('#core_content').addClass('col-lg-10')
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,37 @@
|
|||
<div class="form-group row">
|
||||
<label for="name" class="col-sm-2 col-form-label">name</label>
|
||||
<div class="col-sm-10">
|
||||
<input type="text" class="form-control" id="name" name="name" placeholder="cookie name" value="{%if 'name' in cookie_dict%}{{cookie_dict['name']}}{%endif%}" required>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group row">
|
||||
<label for="value" class="col-sm-2 col-form-label">value</label>
|
||||
<div class="col-sm-10">
|
||||
<input type="text" class="form-control" id="value" name="value" placeholder="cookie value" value="{%if 'value' in cookie_dict%}{{cookie_dict['value']}}{%endif%}" required>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group row">
|
||||
<label for="domain" class="col-sm-2 col-form-label">domain</label>
|
||||
<div class="col-sm-10">
|
||||
<input type="text" class="form-control" id="domain" name="domain" placeholder=".domain - optional" value="{%if 'domain' in cookie_dict%}{{cookie_dict['domain']}}{%endif%}">
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group row">
|
||||
<label for="path" class="col-sm-2 col-form-label">path</label>
|
||||
<div class="col-sm-10">
|
||||
<input type="text" class="form-control" id="path" name="path" placeholder="cookie path - optional" value="{%if 'path' in cookie_dict%}{{cookie_dict['path']}}{%endif%}">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="custom-control custom-switch mt-1">
|
||||
<input class="custom-control-input" type="checkbox" name="httpOnly" id="httpOnly" {%if 'httpOnly' in cookie_dict%}{%if cookie_dict['httpOnly']%}checked=""{%endif%}{%endif%}>
|
||||
<label class="custom-control-label" for="httpOnly">
|
||||
httpOnly
|
||||
</label>
|
||||
</div>
|
||||
<div class="custom-control custom-switch mt-1">
|
||||
<input class="custom-control-input" type="checkbox" name="secure" id="secure" {%if 'secure' in cookie_dict%}{%if cookie_dict['secure']%}checked=""{%endif%}{%endif%}>
|
||||
<label class="custom-control-label" for="secure">
|
||||
secure
|
||||
</label>
|
||||
</div>
|
|
@ -0,0 +1,36 @@
|
|||
{% for dict_cookie in l_elem %}
|
||||
|
||||
{% if loop.index0 % 4 == 0 %}
|
||||
<div class="card-deck mt-3">
|
||||
{% endif %}
|
||||
|
||||
<div class="card">
|
||||
<div class="card-header py-0">
|
||||
<div class="d-flex flex-row-reverse">
|
||||
<div>
|
||||
<a class="btn btn-light" href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_edit') }}?cookiejar_uuid={{cookiejar_uuid}}&cookie_uuid={{l_cookie_uuid[loop.index0]}}" style="font-size: 15px">
|
||||
<i class="text-secondary fas fa-pencil-alt"></i>
|
||||
</a>
|
||||
</div>
|
||||
<div>
|
||||
<a class="btn btn-light" href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_delete') }}?cookiejar_uuid={{cookiejar_uuid}}&cookie_uuid={{l_cookie_uuid[loop.index0]}}" style="font-size: 15px">
|
||||
<i class="text-danger fas fa-trash-alt"></i>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<pre>{{dict_cookie}}</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if loop.index0 % 4 == 3 %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% endfor %}
|
||||
|
||||
{% if l_elem|length % 4 != 0 %}
|
||||
</div>
|
||||
{% endif %}
|
|
@ -38,7 +38,7 @@
|
|||
</div>
|
||||
<div class="card-body">
|
||||
<p class="card-text">Enter a domain and choose what kind of data you want.</p>
|
||||
<form action="{{ url_for('hiddenServices.create_spider_splash') }}" method='post'>
|
||||
<form action="{{ url_for('crawler_splash.send_to_spider') }}" method='post'>
|
||||
<div class="row">
|
||||
<div class="col-12 col-lg-6">
|
||||
<div class="input-group" id="date-range-from">
|
||||
|
@ -108,8 +108,18 @@
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="mt-1">
|
||||
<i class="mt-2 text-white fas fa-cookie-bite"></i> Cookiejar:
|
||||
<select class="custom-select form-control mt-1" name="cookiejar" id="cookiejar">
|
||||
<option value="None" selected>Don't use any cookiejar</option>
|
||||
{%for cookiejar in l_cookiejar%}
|
||||
<option value="{{cookiejar}}">{{cookiejar}}</option>
|
||||
{%endfor%}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<button class="btn btn-primary mt-2">
|
102
var/www/templates/crawler/crawler_splash/edit_cookie.html
Normal file
102
var/www/templates/crawler/crawler_splash/edit_cookie.html
Normal file
|
@ -0,0 +1,102 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>AIL - Add Cookies</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
<div class="card mb-3 mt-1">
|
||||
<div class="card-header text-white bg-dark">
|
||||
<div class="row">
|
||||
<div class="col-8">
|
||||
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Edit Cookie: {{cookie_uuid}}</h5>
|
||||
</div>
|
||||
<div class="col-4">
|
||||
<a class="btn btn-danger float-right" href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_delete') }}?cookiejar_uuid={{cookiejar_uuid}}&cookie_uuid={{cookie_uuid}}">
|
||||
<i class="fas fa-trash-alt"></i>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
|
||||
<form action="{{ url_for('crawler_splash.crawler_cookiejar_cookie_edit_post') }}" method="post" enctype="multipart/form-data">
|
||||
<input type="text" name="cookiejar_uuid" value="{{cookiejar_uuid}}" hidden>
|
||||
<input type="text" name="cookie_uuid" value="{{cookie_uuid}}" hidden>
|
||||
{% include 'crawler/crawler_splash/cookie_edit_block.html' %}
|
||||
<div class="form-group">
|
||||
<button class="btn btn-info" type="submit" value=Upload><i class="fas fa-cookie-bite"></i> Edit Cookie</button>
|
||||
</div>
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$('#description-edit-block').hide();
|
||||
$("#page-crawler").addClass("active");
|
||||
$("#nav_title_cookiejar").removeClass("text-muted");
|
||||
});
|
||||
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
$('#side_menu').removeClass('border-right')
|
||||
$('#side_menu').removeClass('col-lg-2')
|
||||
$('#core_content').removeClass('col-lg-10')
|
||||
}else{
|
||||
$('#nav_menu').show();
|
||||
$('#side_menu').addClass('border-right')
|
||||
$('#side_menu').addClass('col-lg-2')
|
||||
$('#core_content').addClass('col-lg-10')
|
||||
}
|
||||
}
|
||||
|
||||
function show_edit_description(){
|
||||
console.log('edit');
|
||||
$('#description-edit-block').show();
|
||||
}
|
||||
|
||||
function edit_description(){
|
||||
var new_description = $('#input-description').val()
|
||||
var data_to_send = { cookiejar_uuid: "{{cookiejar_uuid}}", "description": new_description}
|
||||
|
||||
$.get("{{ url_for('crawler_splash.crawler_cookiejar_edit') }}", data_to_send, function(data, status){
|
||||
if(status == "success") {
|
||||
$('#description-text').text(new_description)
|
||||
$('#description-edit-block').hide();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
</script>
|
|
@ -445,7 +445,7 @@
|
|||
<div class="text-center">
|
||||
<small class="text-info" style="line-height:0.9;">
|
||||
<a target="_blank" href="" id="screenshot_link"></a>
|
||||
<small>
|
||||
</small>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
@ -519,11 +519,11 @@ var draw_img = false;
|
|||
$("#screenshot_link").attr("href", screenshot_href + "{{dict_domain['crawler_history']['random_item']['id']}}");
|
||||
$("#screenshot_link").text("{{dict_domain['crawler_history']['random_item']['link']}}");
|
||||
{%else%}
|
||||
var screenshot = "";
|
||||
var screenshot = "";
|
||||
{%endif%}
|
||||
{%endif%}
|
||||
{%else%}
|
||||
var screenshot = "";
|
||||
var screenshot = "";
|
||||
{%endif%}
|
||||
|
||||
img.src = base_url + screenshot;
|
||||
|
@ -561,7 +561,9 @@ function img_error() {
|
|||
}
|
||||
|
||||
function reload_image(new_screenshot, link, item_id) {
|
||||
$("#"+screenshot.replace(/\//g, "")).removeClass("icon_selected").addClass("icon_img");
|
||||
if (screenshot) {
|
||||
$("#"+screenshot.replace(/\//g, "")).removeClass("icon_selected").addClass("icon_img");
|
||||
}
|
||||
screenshot = new_screenshot;
|
||||
|
||||
img.src=base_url + screenshot;
|
||||
|
|
122
var/www/templates/crawler/crawler_splash/show_cookiejar.html
Normal file
122
var/www/templates/crawler/crawler_splash/show_cookiejar.html
Normal file
|
@ -0,0 +1,122 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>AIL - Add Cookies</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
<div class="card mb-3 mt-1">
|
||||
<div class="card-header text-white bg-dark">
|
||||
<div class="row">
|
||||
<div class="col-8">
|
||||
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Edit Cookiejar</h5>
|
||||
</div>
|
||||
<div class="col-4">
|
||||
<a class="btn btn-danger float-right" href="{{ url_for('crawler_splash.crawler_cookiejar_delete') }}?cookiejar_uuid={{cookiejar_uuid}}">
|
||||
<i class="fas fa-trash-alt"></i>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
{% with all_cookiejar=[cookiejar_metadata], table_id='table_cookiejar'%}
|
||||
{% include 'crawler/crawler_splash/table_cookiejar.html' %}
|
||||
{% endwith %}
|
||||
|
||||
<button class="btn btn-info" onclick="show_edit_description();">
|
||||
Edit Description <i class="fas fa-pencil-alt"></i>
|
||||
</button>
|
||||
|
||||
<a href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_add')}}?cookiejar_uuid={{cookiejar_uuid}}">
|
||||
<button class="btn btn-primary">
|
||||
Add Cookies <i class="fas fa-cookie"></i>
|
||||
</button>
|
||||
</a>
|
||||
|
||||
<span class="mt-1" id="description-edit-block">
|
||||
<div class="input-group">
|
||||
<input class="form-control" type="text" id="input-description" value="{{cookiejar_metadata['description']}}"></input>
|
||||
<div class="input-group-append">
|
||||
<button class="btn btn-info" onclick="edit_description();">
|
||||
<i class="fas fa-pencil-alt"></i> Edit
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</span>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% with l_elem=l_cookies, l_cookie_uuid=l_cookie_uuid, cookiejar_uuid=cookiejar_uuid %}
|
||||
{% include 'crawler/crawler_splash/cookies_card_block.html' %}
|
||||
{% endwith %}
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
|
||||
<script>
|
||||
var chart = {};
|
||||
$(document).ready(function(){
|
||||
$('#description-edit-block').hide();
|
||||
$("#page-crawler").addClass("active");
|
||||
$("#nav_title_cookiejar").removeClass("text-muted");
|
||||
});
|
||||
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
$('#side_menu').removeClass('border-right')
|
||||
$('#side_menu').removeClass('col-lg-2')
|
||||
$('#core_content').removeClass('col-lg-10')
|
||||
}else{
|
||||
$('#nav_menu').show();
|
||||
$('#side_menu').addClass('border-right')
|
||||
$('#side_menu').addClass('col-lg-2')
|
||||
$('#core_content').addClass('col-lg-10')
|
||||
}
|
||||
}
|
||||
|
||||
function show_edit_description(){
|
||||
console.log('edit');
|
||||
$('#description-edit-block').show();
|
||||
}
|
||||
|
||||
function edit_description(){
|
||||
var new_description = $('#input-description').val()
|
||||
var data_to_send = { cookiejar_uuid: "{{cookiejar_uuid}}", "description": new_description}
|
||||
|
||||
$.get("{{ url_for('crawler_splash.crawler_cookiejar_edit') }}", data_to_send, function(data, status){
|
||||
if(status == "success") {
|
||||
$('#description-text').text(new_description)
|
||||
$('#description-edit-block').hide();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
</script>
|
|
@ -0,0 +1,28 @@
|
|||
<table id="{{table_id}}" class="table table-striped table-bordered">
|
||||
<thead class="bg-dark text-white">
|
||||
<tr>
|
||||
<th class="bg-info text-white">Description</th>
|
||||
<th class="bg-info text-white">Date</th>
|
||||
<th class="bg-info text-white">UUID</th>
|
||||
<th class="bg-info text-white">User</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody style="font-size: 15px;">
|
||||
{% for dict_cookiejar in all_cookiejar %}
|
||||
<tr>
|
||||
<td id="description-text">{{dict_cookiejar['description']}}</td>
|
||||
<td>
|
||||
{%if dict_cookiejar['date']%}
|
||||
{{dict_cookiejar['date'][0:4]}}/{{dict_cookiejar['date'][4:6]}}/{{dict_cookiejar['date'][6:8]}}
|
||||
{%endif%}
|
||||
</td>
|
||||
<td>
|
||||
<a target="_blank" href="{{ url_for('crawler_splash.crawler_cookiejar_show') }}?cookiejar_uuid={{ dict_cookiejar['cookiejar_uuid'] }}">
|
||||
{{ dict_cookiejar['cookiejar_uuid']}}
|
||||
</a>
|
||||
</td>
|
||||
<td>{{dict_cookiejar['user_id']}}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
|
@ -8,7 +8,7 @@
|
|||
<nav class="navbar navbar-expand navbar-light bg-light flex-md-column flex-row align-items-start py-2" id="nav_menu">
|
||||
<h5 class="d-flex text-muted w-100">
|
||||
<span>Splash Crawlers </span>
|
||||
<a class="ml-auto" href="{{url_for('hiddenServices.manual')}}">
|
||||
<a class="ml-auto" href="{{url_for('crawler_splash.manual')}}">
|
||||
<i class="fas fa-plus-circle ml-auto"></i>
|
||||
</a>
|
||||
</h5>
|
||||
|
@ -32,7 +32,7 @@
|
|||
</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="{{url_for('hiddenServices.manual')}}" id="nav_manual_crawler">
|
||||
<a class="nav-link" href="{{url_for('crawler_splash.manual')}}" id="nav_manual_crawler">
|
||||
<i class="fas fa-spider"></i>
|
||||
Manual Crawler
|
||||
</a>
|
||||
|
@ -47,9 +47,6 @@
|
|||
|
||||
<h5 class="d-flex text-muted w-100" id="nav_title_domains_explorer">
|
||||
<span>Domain Explorer </span>
|
||||
<a class="ml-auto" href="{{url_for('hiddenServices.manual')}}">
|
||||
<i class="fas fa-plus-circle ml-auto"></i>
|
||||
</a>
|
||||
</h5>
|
||||
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills-->
|
||||
<li class="nav-item">
|
||||
|
@ -64,5 +61,27 @@
|
|||
<span>Web Domain</span>
|
||||
</a>
|
||||
</li>
|
||||
</nav>
|
||||
</ul>
|
||||
|
||||
<h5 class="d-flex text-muted w-100" id="nav_title_cookiejar">
|
||||
<span>Cookiejar </span>
|
||||
<a class="ml-auto" href="{{url_for('crawler_splash.crawler_cookiejar_add')}}">
|
||||
<i class="fas fa-plus-circle ml-auto"></i>
|
||||
</a>
|
||||
</h5>
|
||||
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="{{url_for('crawler_splash.crawler_cookiejar_add')}}" id="nav_cookiejar_add">
|
||||
<i class="fas fa-cookie"></i>
|
||||
<span>Add Cookiejar</span>
|
||||
</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="{{url_for('crawler_splash.crawler_cookiejar_all')}}" id="nav_cookiejar_all">
|
||||
<i class="fas fa-cookie-bite"></i>
|
||||
<span>All Cookiejar</span>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
|
@ -165,7 +165,6 @@ $('.add-field').click(function() {
|
|||
});
|
||||
|
||||
$('.fields').on('click', '.delete-field', function(){
|
||||
console.log($(this).parent());
|
||||
$(this).parent().remove();
|
||||
//$.get( "#")
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue