mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
chg: [crawler] add cookies list by user/global, save cookies from file + dict(name, value), TODO: API + handle errors
This commit is contained in:
parent
db634e8866
commit
1c45571042
8 changed files with 313 additions and 20 deletions
|
@ -351,19 +351,19 @@ if __name__ == '__main__':
|
|||
# get HAR files
|
||||
default_crawler_har = p.config.getboolean("Crawler", "default_crawler_har")
|
||||
if default_crawler_har:
|
||||
default_crawler_har = 1
|
||||
default_crawler_har = True
|
||||
else:
|
||||
default_crawler_har = 0
|
||||
default_crawler_har = False
|
||||
|
||||
# get PNG files
|
||||
default_crawler_png = p.config.getboolean("Crawler", "default_crawler_png")
|
||||
if default_crawler_png:
|
||||
default_crawler_png = 1
|
||||
default_crawler_png = True
|
||||
else:
|
||||
default_crawler_png = 0
|
||||
default_crawler_png = False
|
||||
|
||||
# Default crawler options
|
||||
default_crawler_config = {'html': 1,
|
||||
default_crawler_config = {'html': True,
|
||||
'har': default_crawler_har,
|
||||
'png': default_crawler_png,
|
||||
'depth_limit': p.config.getint("Crawler", "crawler_depth_limit"),
|
||||
|
|
|
@ -27,6 +27,12 @@ r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
||||
config_loader = None
|
||||
|
||||
# # # # # # # #
|
||||
# #
|
||||
# COOKIES #
|
||||
# #
|
||||
# # # # # # # #
|
||||
|
||||
# # # #
|
||||
# Cookies Fields:
|
||||
# - name
|
||||
|
@ -69,17 +75,76 @@ def create_cookie_dict_from_browser(browser_cookie):
|
|||
}
|
||||
return dict_cookie
|
||||
|
||||
def load_cookies(l_cookies, domain=None, crawler_type='regular'):
|
||||
def load_cookies(cookies_uuid, domain=None, crawler_type='regular'):
|
||||
cookies_json, l_cookies = get_cookies(cookies_uuid)
|
||||
all_cookies = []
|
||||
|
||||
for cookie_dict in l_cookies:
|
||||
for cookie_dict in cookies_json:
|
||||
all_cookies.append(create_cookie_dict(browser_cookie=cookie_dict, crawler_type=crawler_type))
|
||||
|
||||
for cookie_name, cookie_value in l_cookies:
|
||||
all_cookies.append(create_cookie_dict( cookie_name=cookie_name, cookie_value=cookie_value, domain=domain, crawler_type=crawler_type))
|
||||
return all_cookies
|
||||
|
||||
def get_cookies():
|
||||
l_cookies = []
|
||||
return l_cookies
|
||||
def get_all_cookies():
|
||||
r_serv_onion.smembers('cookies:all')
|
||||
|
||||
def get_all_global_cookies():
|
||||
r_serv_onion.smembers('cookies:global')
|
||||
|
||||
def get_user_cookies(user_id):
|
||||
r_serv_onion.smembers('cookies:user:{}'.format(user_id))
|
||||
|
||||
def exist_cookies_uuid(cookies_uuid):
|
||||
return r_serv_onion.exists('cookie_metadata:{}'.format(cookies_uuid))
|
||||
|
||||
def get_manual_cookies_keys(cookies_uuid):
|
||||
return r_serv_onion.hgetall('cookies:manual_cookies:{}'.format(cookies_uuid))
|
||||
|
||||
def get_manual_cookie_val(cookies_uuid, cookie_name):
|
||||
return r_serv_onion.hget('cookies:manual_cookies:{}'.format(cookies_uuid), cookie_name)
|
||||
|
||||
def get_cookies(cookies_uuid):
|
||||
cookies_json = r_serv_onion.get('cookies:json_cookies:{}'.format(cookies_uuid))
|
||||
if cookies_json:
|
||||
cookies_json = json.loads(cookies_json)
|
||||
else:
|
||||
cookies_json = []
|
||||
l_cookies = [ ( cookie_name, get_manual_cookie_val(cookies_uuid, cookie_name)) for cookie_name in get_manual_cookies_keys(cookies_uuid) ]
|
||||
return (cookies_json, l_cookies)
|
||||
|
||||
# # TODO: handle errors + add api handler
|
||||
def save_cookies(user_id, json_cookies=None, l_cookies=[], cookies_uuid=None, level=1, description=None):
|
||||
if cookies_uuid is None or not exist_cookies_uuid(cookies_uuid):
|
||||
cookies_uuid = str(uuid.uuid4())
|
||||
|
||||
if json_cookies:
|
||||
json_cookies = json.loads(json_cookies) # # TODO: catch Exception
|
||||
r_serv_onion.set('cookies:json_cookies:{}'.format(cookies_uuid), json.dumps(json_cookies))
|
||||
|
||||
for cookie_dict in l_cookies:
|
||||
r_serv_onion.hset('cookies:manual_cookies:{}'.format(cookies_uuid), cookie_dict['name'], cookie_dict['value'])
|
||||
|
||||
# cookies level # # TODO: edit level set on edit
|
||||
r_serv_onion.sadd('cookies:all', cookies_uuid)
|
||||
if level==0:
|
||||
r_serv_onion.sadd('cookies:user:{}'.format(user_id), cookies_uuid)
|
||||
else:
|
||||
r_serv_onion.sadd('cookies:global', cookies_uuid)
|
||||
|
||||
# metadata
|
||||
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'user_id', user_id)
|
||||
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'level', level)
|
||||
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'description', description)
|
||||
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'date', datetime.date.today().strftime("%Y%m%d"))
|
||||
return cookies_uuid
|
||||
|
||||
#### ####
|
||||
|
||||
def is_redirection(domain, last_url):
|
||||
url = urlparse(last_url)
|
||||
last_domain = url.netloc
|
||||
last_domain = last_domain.split('.')
|
||||
last_domain = '{}.{}'.format(last_domain[-2], last_domain[-1])
|
||||
return domain != last_domain
|
||||
|
||||
# domain up
|
||||
def create_domain_metadata(domain_type, domain, current_port, date, date_month):
|
||||
|
|
|
@ -121,8 +121,8 @@ class TorSplashCrawler():
|
|||
self.date_month = date['date_month']
|
||||
self.date_epoch = int(date['epoch'])
|
||||
|
||||
self.png = True
|
||||
self.har = True
|
||||
self.png = crawler_options['png']
|
||||
self.har = crawler_options['har']
|
||||
self.cookies = cookies
|
||||
|
||||
config_section = 'Crawler'
|
||||
|
@ -176,6 +176,8 @@ class TorSplashCrawler():
|
|||
# detect connection to proxy refused
|
||||
error_log = (json.loads(response.body.decode()))
|
||||
print(error_log)
|
||||
elif crawler_splash.is_redirection(self.domains[0], response.data['last_url']):
|
||||
pass # ignore response
|
||||
else:
|
||||
|
||||
item_id = crawler_splash.create_item_id(self.item_dir, self.domains[0])
|
||||
|
|
|
@ -37,8 +37,7 @@ if __name__ == '__main__':
|
|||
crawler_options = crawler_json['crawler_options']
|
||||
date = crawler_json['date']
|
||||
requested_mode = crawler_json['requested']
|
||||
cookies = crawler_splash.load_cookies(crawler_splash.get_cookies(), domain, crawler_type='onion')
|
||||
print(cookies)
|
||||
cookies = crawler_splash.load_cookies('ccad0090-bdcb-4ba5-875b-3dae8f936216', domain, crawler_type=service_type)
|
||||
|
||||
redis_cache.delete('crawler_request:{}'.format(uuid))
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ import Tag
|
|||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
import Domain
|
||||
import crawler_splash
|
||||
|
||||
r_cache = Flask_config.r_cache
|
||||
r_serv_db = Flask_config.r_serv_db
|
||||
|
@ -156,3 +157,55 @@ def domains_explorer_web():
|
|||
|
||||
dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to)
|
||||
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')
|
||||
|
||||
@crawler_splash.route('/crawler/cookies/add', methods=['GET'])
|
||||
#@login_required
|
||||
#@login_analyst
|
||||
def crawler_cookies_add():
|
||||
return render_template("add_cookies.html")
|
||||
|
||||
@crawler_splash.route('/crawler/cookies/add_post', methods=['POST'])
|
||||
#@login_required
|
||||
#@login_analyst
|
||||
def crawler_cookies_add_post():
|
||||
user_id = current_user.get_id()
|
||||
|
||||
description = request.form.get('description')
|
||||
level = request.form.get('level')
|
||||
if level:
|
||||
level = 1
|
||||
else:
|
||||
level = 0
|
||||
|
||||
if 'file' in request.files:
|
||||
file = request.files['file']
|
||||
json_file = file.read().decode()
|
||||
else:
|
||||
json_file = '[]'
|
||||
|
||||
# Get cookies to add
|
||||
l_manual_cookie = []
|
||||
l_invalid_cookie = []
|
||||
for obj_tuple in list(request.form):
|
||||
l_input = request.form.getlist(obj_tuple)
|
||||
if len(l_input) == 2:
|
||||
if l_input[0]: # cookie_name
|
||||
cookie_dict = {'name': l_input[0], 'value': l_input[1]}
|
||||
l_manual_cookie.append(cookie_dict)
|
||||
elif l_input[1]: # cookie_value
|
||||
l_invalid_cookie.append({'name': '', 'value': l_input[1]})
|
||||
else:
|
||||
#print(l_input)
|
||||
pass
|
||||
|
||||
cookie_uuid = crawler_splash.save_cookies(user_id, json_cookies=json_file, l_cookies=l_manual_cookie, level=level, description=description)
|
||||
return render_template("add_cookies.html")
|
||||
|
||||
@crawler_splash.route('/crawler/cookies/all', methods=['GET'])
|
||||
#@login_required
|
||||
#@login_read_only
|
||||
def crawler_cookies_all():
|
||||
user_id = current_user.get_id(user_id)
|
||||
user_cookies = crawler_splash.get_user_cookies(user_id)
|
||||
global_cookies = crawler_splash.get_all_global_cookies()
|
||||
return render_template("add_cookies.html", user_cookies=user_cookies, global_cookies=global_cookies)
|
||||
|
|
156
var/www/templates/crawler/crawler_splash/add_cookies.html
Normal file
156
var/www/templates/crawler/crawler_splash/add_cookies.html
Normal file
|
@ -0,0 +1,156 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>AIL-Framework</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
<div class="card mb-3 mt-1">
|
||||
<div class="card-header text-white bg-dark">
|
||||
<h5 class="card-title"><i class="fas fa-cookie"></i> Add Cookies</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
|
||||
<form action="{{ url_for('crawler_splash.crawler_cookies_add_post') }}" method="post" enctype="multipart/form-data">
|
||||
|
||||
<div class="row">
|
||||
<div class="col-12 col-md-9">
|
||||
<div class="input-group mb-2 mr-sm-2">
|
||||
<div class="input-group-prepend">
|
||||
<div class="input-group-text"><i class="fas fa-tag"></i></div>
|
||||
</div>
|
||||
<input id="description" name="description" class="form-control" placeholder="cookies description - (optional)" type="text">
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-12 col-md-3">
|
||||
<div class="custom-control custom-switch mt-1">
|
||||
<input class="custom-control-input" type="checkbox" name="level" id="id_level" checked="">
|
||||
<label class="custom-control-label" for="id_level">
|
||||
<i class="fas fa-users"></i> Show cookies to all Users
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="file"><b>JSON File</b> Cookies to import:</label>
|
||||
<input type="file" class="form-control-file btn btn-outline-secondary" id="file" name="file">
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
|
||||
<div>
|
||||
|
||||
<h5>Add manual cookies:</h5>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-5" for="obj_input_cookie_name"><b>Cookie Name</b></div>
|
||||
<div class="col-6" for="obj_input_cookie_value"><b>Cookie Value</b></div>
|
||||
</div>
|
||||
|
||||
<div class="form-horizontal">
|
||||
<div class="form-body">
|
||||
<div class="form-group">
|
||||
<div class="fields">
|
||||
<div class="input-group mb-1">
|
||||
<input type="text" class="form-control col-5" name="first_cookie" id="obj_input_cookie_name">
|
||||
<input type="text" class="form-control col-6" name="first_cookie" id="obj_input_cookie_value">
|
||||
<span class="btn btn-info input-group-addon add-field col-1"><i class="fas fa-plus"></i></span>
|
||||
</div>
|
||||
|
||||
<br>
|
||||
|
||||
<span class="help-block" hidden>Manual Cookies></span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<button class="btn btn-info" type="submit" value=Upload><i class="fas fa-cookie-bite"></i> Add Cookies</button>
|
||||
</div>
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
|
||||
<script>
|
||||
var chart = {};
|
||||
$(document).ready(function(){
|
||||
$("#page-crawler").addClass("active");
|
||||
$("#nav_cookies_add").addClass("active");
|
||||
$("#nav_title_cookies").removeClass("text-muted");
|
||||
});
|
||||
|
||||
var input_1 = '<div class="input-group mb-1"><input type="text" class="form-control col-5" name="'
|
||||
var input_2 = '"><input type="text" class="form-control col-6" name="'
|
||||
var input_3 = '">';
|
||||
var minusButton = '<span class="btn btn-danger input-group-addon delete-field col-1"><i class="fas fa-trash-alt"></i></span></div>';
|
||||
|
||||
$('.add-field').click(function() {
|
||||
var new_uuid = uuidv4();
|
||||
var template = input_1 + new_uuid + input_2 + new_uuid + input_3;
|
||||
var temp = $(template).insertBefore('.help-block');
|
||||
temp.append(minusButton);
|
||||
});
|
||||
|
||||
$('.fields').on('click', '.delete-field', function(){
|
||||
$(this).parent().remove();
|
||||
});
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
$('#side_menu').removeClass('border-right')
|
||||
$('#side_menu').removeClass('col-lg-2')
|
||||
$('#core_content').removeClass('col-lg-10')
|
||||
}else{
|
||||
$('#nav_menu').show();
|
||||
$('#side_menu').addClass('border-right')
|
||||
$('#side_menu').addClass('col-lg-2')
|
||||
$('#core_content').addClass('col-lg-10')
|
||||
}
|
||||
}
|
||||
|
||||
function uuidv4() {
|
||||
return ([1e7]+-1e3+-4e3+-8e3+-1e11).replace(/[018]/g, c =>
|
||||
(c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
|
||||
);
|
||||
}
|
||||
|
||||
</script>
|
|
@ -47,9 +47,6 @@
|
|||
|
||||
<h5 class="d-flex text-muted w-100" id="nav_title_domains_explorer">
|
||||
<span>Domain Explorer </span>
|
||||
<a class="ml-auto" href="{{url_for('hiddenServices.manual')}}">
|
||||
<i class="fas fa-plus-circle ml-auto"></i>
|
||||
</a>
|
||||
</h5>
|
||||
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills-->
|
||||
<li class="nav-item">
|
||||
|
@ -64,5 +61,27 @@
|
|||
<span>Web Domain</span>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h5 class="d-flex text-muted w-100" id="nav_title_cookies">
|
||||
<span>Cookies </span>
|
||||
<a class="ml-auto" href="{{url_for('crawler_splash.crawler_cookies_add')}}">
|
||||
<i class="fas fa-plus-circle ml-auto"></i>
|
||||
</a>
|
||||
</h5>
|
||||
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills-->
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="{{url_for('crawler_splash.crawler_cookies_add')}}" id="nav_cookies_add">
|
||||
<i class="fas fa-cookie"></i>
|
||||
<span>Add Cookies</span>
|
||||
</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="{{url_for('crawler_splash.domains_explorer_web')}}" id="nav_cookies_all">
|
||||
<i class="fas fa-cookie-bite"></i>
|
||||
<span>All Cookies</span>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
|
|
@ -165,7 +165,6 @@ $('.add-field').click(function() {
|
|||
});
|
||||
|
||||
$('.fields').on('click', '.delete-field', function(){
|
||||
console.log($(this).parent());
|
||||
$(this).parent().remove();
|
||||
//$.get( "#")
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue