chg: [crawler] add cookies list by user/global, save cookies from file + dict(name, value), TODO: API + handle errors

This commit is contained in:
Terrtia 2020-03-23 18:00:09 +01:00
parent db634e8866
commit 1c45571042
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
8 changed files with 313 additions and 20 deletions

View file

@ -351,19 +351,19 @@ if __name__ == '__main__':
# get HAR files
default_crawler_har = p.config.getboolean("Crawler", "default_crawler_har")
if default_crawler_har:
default_crawler_har = 1
default_crawler_har = True
else:
default_crawler_har = 0
default_crawler_har = False
# get PNG files
default_crawler_png = p.config.getboolean("Crawler", "default_crawler_png")
if default_crawler_png:
default_crawler_png = 1
default_crawler_png = True
else:
default_crawler_png = 0
default_crawler_png = False
# Default crawler options
default_crawler_config = {'html': 1,
default_crawler_config = {'html': True,
'har': default_crawler_har,
'png': default_crawler_png,
'depth_limit': p.config.getint("Crawler", "crawler_depth_limit"),

View file

@ -27,6 +27,12 @@ r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
# # # # # # # #
# #
# COOKIES #
# #
# # # # # # # #
# # # #
# Cookies Fields:
# - name
@ -69,17 +75,76 @@ def create_cookie_dict_from_browser(browser_cookie):
}
return dict_cookie
def load_cookies(l_cookies, domain=None, crawler_type='regular'):
def load_cookies(cookies_uuid, domain=None, crawler_type='regular'):
cookies_json, l_cookies = get_cookies(cookies_uuid)
all_cookies = []
for cookie_dict in l_cookies:
for cookie_dict in cookies_json:
all_cookies.append(create_cookie_dict(browser_cookie=cookie_dict, crawler_type=crawler_type))
for cookie_name, cookie_value in l_cookies:
all_cookies.append(create_cookie_dict( cookie_name=cookie_name, cookie_value=cookie_value, domain=domain, crawler_type=crawler_type))
return all_cookies
def get_cookies():
l_cookies = []
return l_cookies
def get_all_cookies():
r_serv_onion.smembers('cookies:all')
def get_all_global_cookies():
r_serv_onion.smembers('cookies:global')
def get_user_cookies(user_id):
r_serv_onion.smembers('cookies:user:{}'.format(user_id))
def exist_cookies_uuid(cookies_uuid):
return r_serv_onion.exists('cookie_metadata:{}'.format(cookies_uuid))
def get_manual_cookies_keys(cookies_uuid):
return r_serv_onion.hgetall('cookies:manual_cookies:{}'.format(cookies_uuid))
def get_manual_cookie_val(cookies_uuid, cookie_name):
return r_serv_onion.hget('cookies:manual_cookies:{}'.format(cookies_uuid), cookie_name)
def get_cookies(cookies_uuid):
cookies_json = r_serv_onion.get('cookies:json_cookies:{}'.format(cookies_uuid))
if cookies_json:
cookies_json = json.loads(cookies_json)
else:
cookies_json = []
l_cookies = [ ( cookie_name, get_manual_cookie_val(cookies_uuid, cookie_name)) for cookie_name in get_manual_cookies_keys(cookies_uuid) ]
return (cookies_json, l_cookies)
# # TODO: handle errors + add api handler
def save_cookies(user_id, json_cookies=None, l_cookies=[], cookies_uuid=None, level=1, description=None):
if cookies_uuid is None or not exist_cookies_uuid(cookies_uuid):
cookies_uuid = str(uuid.uuid4())
if json_cookies:
json_cookies = json.loads(json_cookies) # # TODO: catch Exception
r_serv_onion.set('cookies:json_cookies:{}'.format(cookies_uuid), json.dumps(json_cookies))
for cookie_dict in l_cookies:
r_serv_onion.hset('cookies:manual_cookies:{}'.format(cookies_uuid), cookie_dict['name'], cookie_dict['value'])
# cookies level # # TODO: edit level set on edit
r_serv_onion.sadd('cookies:all', cookies_uuid)
if level==0:
r_serv_onion.sadd('cookies:user:{}'.format(user_id), cookies_uuid)
else:
r_serv_onion.sadd('cookies:global', cookies_uuid)
# metadata
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'user_id', user_id)
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'level', level)
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'description', description)
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'date', datetime.date.today().strftime("%Y%m%d"))
return cookies_uuid
#### ####
def is_redirection(domain, last_url):
url = urlparse(last_url)
last_domain = url.netloc
last_domain = last_domain.split('.')
last_domain = '{}.{}'.format(last_domain[-2], last_domain[-1])
return domain != last_domain
# domain up
def create_domain_metadata(domain_type, domain, current_port, date, date_month):

View file

@ -121,8 +121,8 @@ class TorSplashCrawler():
self.date_month = date['date_month']
self.date_epoch = int(date['epoch'])
self.png = True
self.har = True
self.png = crawler_options['png']
self.har = crawler_options['har']
self.cookies = cookies
config_section = 'Crawler'
@ -176,6 +176,8 @@ class TorSplashCrawler():
# detect connection to proxy refused
error_log = (json.loads(response.body.decode()))
print(error_log)
elif crawler_splash.is_redirection(self.domains[0], response.data['last_url']):
pass # ignore response
else:
item_id = crawler_splash.create_item_id(self.item_dir, self.domains[0])

View file

@ -37,8 +37,7 @@ if __name__ == '__main__':
crawler_options = crawler_json['crawler_options']
date = crawler_json['date']
requested_mode = crawler_json['requested']
cookies = crawler_splash.load_cookies(crawler_splash.get_cookies(), domain, crawler_type='onion')
print(cookies)
cookies = crawler_splash.load_cookies('ccad0090-bdcb-4ba5-875b-3dae8f936216', domain, crawler_type=service_type)
redis_cache.delete('crawler_request:{}'.format(uuid))

View file

@ -25,6 +25,7 @@ import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import Domain
import crawler_splash
r_cache = Flask_config.r_cache
r_serv_db = Flask_config.r_serv_db
@ -156,3 +157,55 @@ def domains_explorer_web():
dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to)
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')
@crawler_splash.route('/crawler/cookies/add', methods=['GET'])
#@login_required
#@login_analyst
def crawler_cookies_add():
return render_template("add_cookies.html")
@crawler_splash.route('/crawler/cookies/add_post', methods=['POST'])
#@login_required
#@login_analyst
def crawler_cookies_add_post():
user_id = current_user.get_id()
description = request.form.get('description')
level = request.form.get('level')
if level:
level = 1
else:
level = 0
if 'file' in request.files:
file = request.files['file']
json_file = file.read().decode()
else:
json_file = '[]'
# Get cookies to add
l_manual_cookie = []
l_invalid_cookie = []
for obj_tuple in list(request.form):
l_input = request.form.getlist(obj_tuple)
if len(l_input) == 2:
if l_input[0]: # cookie_name
cookie_dict = {'name': l_input[0], 'value': l_input[1]}
l_manual_cookie.append(cookie_dict)
elif l_input[1]: # cookie_value
l_invalid_cookie.append({'name': '', 'value': l_input[1]})
else:
#print(l_input)
pass
cookie_uuid = crawler_splash.save_cookies(user_id, json_cookies=json_file, l_cookies=l_manual_cookie, level=level, description=description)
return render_template("add_cookies.html")
@crawler_splash.route('/crawler/cookies/all', methods=['GET'])
#@login_required
#@login_read_only
def crawler_cookies_all():
user_id = current_user.get_id(user_id)
user_cookies = crawler_splash.get_user_cookies(user_id)
global_cookies = crawler_splash.get_all_global_cookies()
return render_template("add_cookies.html", user_cookies=user_cookies, global_cookies=global_cookies)

View file

@ -0,0 +1,156 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL-Framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card mb-3 mt-1">
<div class="card-header text-white bg-dark">
<h5 class="card-title"><i class="fas fa-cookie"></i> Add Cookies</h5>
</div>
<div class="card-body">
<form action="{{ url_for('crawler_splash.crawler_cookies_add_post') }}" method="post" enctype="multipart/form-data">
<div class="row">
<div class="col-12 col-md-9">
<div class="input-group mb-2 mr-sm-2">
<div class="input-group-prepend">
<div class="input-group-text"><i class="fas fa-tag"></i></div>
</div>
<input id="description" name="description" class="form-control" placeholder="cookies description - (optional)" type="text">
</div>
</div>
<div class="col-12 col-md-3">
<div class="custom-control custom-switch mt-1">
<input class="custom-control-input" type="checkbox" name="level" id="id_level" checked="">
<label class="custom-control-label" for="id_level">
<i class="fas fa-users"></i>&nbsp;Show cookies to all Users
</label>
</div>
</div>
</div>
<hr>
<div class="form-group">
<label for="file"><b>JSON File</b> Cookies to import:</label>
<input type="file" class="form-control-file btn btn-outline-secondary" id="file" name="file">
</div>
<hr>
<div>
<h5>Add manual cookies:</h5>
<div class="row">
<div class="col-5" for="obj_input_cookie_name"><b>Cookie Name</b></div>
<div class="col-6" for="obj_input_cookie_value"><b>Cookie Value</b></div>
</div>
<div class="form-horizontal">
<div class="form-body">
<div class="form-group">
<div class="fields">
<div class="input-group mb-1">
<input type="text" class="form-control col-5" name="first_cookie" id="obj_input_cookie_name">
<input type="text" class="form-control col-6" name="first_cookie" id="obj_input_cookie_value">
<span class="btn btn-info input-group-addon add-field col-1"><i class="fas fa-plus"></i></span>
</div>
<br>
<span class="help-block" hidden>Manual Cookies></span>
</div>
</div>
</div>
</div>
</div>
<div class="form-group">
<button class="btn btn-info" type="submit" value=Upload><i class="fas fa-cookie-bite"></i> Add Cookies</button>
</div>
</form>
</div>
</div>
</div>
</div>
</div>
</body>
<script>
var chart = {};
$(document).ready(function(){
$("#page-crawler").addClass("active");
$("#nav_cookies_add").addClass("active");
$("#nav_title_cookies").removeClass("text-muted");
});
var input_1 = '<div class="input-group mb-1"><input type="text" class="form-control col-5" name="'
var input_2 = '"><input type="text" class="form-control col-6" name="'
var input_3 = '">';
var minusButton = '<span class="btn btn-danger input-group-addon delete-field col-1"><i class="fas fa-trash-alt"></i></span></div>';
$('.add-field').click(function() {
var new_uuid = uuidv4();
var template = input_1 + new_uuid + input_2 + new_uuid + input_3;
var temp = $(template).insertBefore('.help-block');
temp.append(minusButton);
});
$('.fields').on('click', '.delete-field', function(){
$(this).parent().remove();
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
function uuidv4() {
return ([1e7]+-1e3+-4e3+-8e3+-1e11).replace(/[018]/g, c =>
(c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
);
}
</script>

View file

@ -47,9 +47,6 @@
<h5 class="d-flex text-muted w-100" id="nav_title_domains_explorer">
<span>Domain Explorer </span>
<a class="ml-auto" href="{{url_for('hiddenServices.manual')}}">
<i class="fas fa-plus-circle ml-auto"></i>
</a>
</h5>
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills-->
<li class="nav-item">
@ -64,5 +61,27 @@
<span>Web Domain</span>
</a>
</li>
</ul>
<h5 class="d-flex text-muted w-100" id="nav_title_cookies">
<span>Cookies </span>
<a class="ml-auto" href="{{url_for('crawler_splash.crawler_cookies_add')}}">
<i class="fas fa-plus-circle ml-auto"></i>
</a>
</h5>
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills-->
<li class="nav-item">
<a class="nav-link" href="{{url_for('crawler_splash.crawler_cookies_add')}}" id="nav_cookies_add">
<i class="fas fa-cookie"></i>
<span>Add Cookies</span>
</a>
</li>
<li class="nav-item">
<a class="nav-link" href="{{url_for('crawler_splash.domains_explorer_web')}}" id="nav_cookies_all">
<i class="fas fa-cookie-bite"></i>
<span>All Cookies</span>
</a>
</li>
</ul>
</nav>
</div>

View file

@ -165,7 +165,6 @@ $('.add-field').click(function() {
});
$('.fields').on('click', '.delete-field', function(){
console.log($(this).parent());
$(this).parent().remove();
//$.get( "#")
});