From 1254c1c9c01170b8d65a1bcde2d1ded6cbecace5 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Wed, 14 Sep 2022 10:02:38 +0200
Subject: [PATCH] chg: [api] send url to crawler

---
 bin/lib/crawlers.py                      | 47 ++++++++++++++++++++++--
 var/www/modules/restApi/Flask_restApi.py | 13 +++++++
 2 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py
index a2201c74..932938b2 100755
--- a/bin/lib/crawlers.py
+++ b/bin/lib/crawlers.py
@@ -640,7 +640,6 @@ def update_auto_crawler_queue():
         print(mess)
         r_serv_onion.sadd(f'{domain_type}_crawler_priority_queue', mess)
 
-
 ##-- AUTOMATIC CRAWLER --##
 
 #### CRAWLER TASK ####
@@ -707,14 +706,54 @@ def save_crawler_config(crawler_mode, crawler_type, crawler_config, domain, url=
         r_serv_onion.set('crawler_config:{}:{}:{}:{}'.format(crawler_mode, crawler_type, domain, url), json.dumps(crawler_config))
 
 def send_url_to_crawl_in_queue(crawler_mode, crawler_type, url):
-    print('{}_crawler_priority_queue'.format(crawler_type), '{};{}'.format(url, crawler_mode))
-    r_serv_onion.sadd('{}_crawler_priority_queue'.format(crawler_type), '{};{}'.format(url, crawler_mode))
+    print(f'{crawler_type}_crawler_priority_queue', f'{url};{crawler_mode}')
+    r_serv_onion.sadd(f'{crawler_type}_crawler_priority_queue', f'{url};{crawler_mode}')
     # add auto crawled url for user UI
     if crawler_mode == 'auto':
-        r_serv_onion.sadd('auto_crawler_url:{}'.format(crawler_type), url)
+        r_serv_onion.sadd(f'auto_crawler_url:{crawler_type}', url)
+
+def add_url_to_crawl_in_queue(url, crawler_mode='manual'): # crawler_type
+    print(f'{crawler_type}_crawler_priority_queue', f'{url};{crawler_mode}')
+    r_serv_onion.sadd(f'{crawler_type}_crawler_priority_queue', f'{url};{crawler_mode}')
+    # CURRENTLY DISABLED
+    # # add auto crawled url for user UI
+    # if crawler_mode == 'auto':
+    #     r_serv_onion.sadd(f'auto_crawler_url:{crawler_type}', url)
 
 #### ####
 #### CRAWLER TASK API ####
+
+# # TODO: ADD RESULT JSON Response
+
+# # TODO: ADD user agent
+# # TODO: sanitize URL
+def api_add_crawler_task(data, user_id=None):
+    url = data.get('url', None)
+    if not url or url=='\n':
+        return ({'status': 'error', 'reason': 'No url supplied'}, 400)
+
+    screenshot = data.get('screenshot', False)
+    if screenshot:
+        screenshot = True
+    else:
+        screenshot = False
+    har = data.get('har', False)
+    if har:
+        har = True
+    else:
+        har = False
+    depth_limit = data.get('depth_limit', 1)
+    if depth_limit:
+        try:
+            depth_limit = int(depth_limit)
+            if depth_limit < 0:
+                depth_limit = 0
+        except ValueError:
+            return ({'error':'invalid depth limit'}, 400)
+    print(url, screenshot, har, depth_limit)
+    return create_crawler_task(url, screenshot=screenshot, har=har, depth_limit=depth_limit, crawler_type='onion')
+
+
 def api_create_crawler_task(user_id, url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, crawler_type=None, cookiejar_uuid=None, user_agent=None):
     # validate url
     if url is None or url=='' or url=='\n':
diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py
index cd74d963..fb9dda5b 100644
--- a/var/www/modules/restApi/Flask_restApi.py
+++ b/var/www/modules/restApi/Flask_restApi.py
@@ -556,6 +556,19 @@ def get_crawled_domain_list():
     dict_res['domain_type'] = domain_type
     return create_json_response(dict_res, res[1])
 
+# # TODO: ADD RESULT JSON Response
+@restApi.route("api/v1/add/crawler/task", methods=['POST'])
+@token_required('analyst')
+def add_crawler_task():
+    data = request.get_json()
+    user_id = get_user_from_token(token)
+    res = crawlers.api_add_crawler_task(data, user_id=user_id)
+    if res:
+        return create_json_response(res[0], res[1])
+
+    dict_res = {'url': data['url']}
+    return create_json_response(dict_res, res[1])
+
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 # # # # # # # # # # # # #        IMPORT     # # # # # # # # # # # # # # # # # #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #