fix: [crawler] fix capture start time

2024-11-26 15:57:16 +00:00 · 2023-12-11 09:30:09 +01:00 · 2023-12-11 09:30:09 +01:00 · 235539ea42
commit 235539ea42
parent 5fc9b1403f
2 changed files with 12 additions and 4 deletions
--- a/bin/crawlers/Crawler.py
+++ b/bin/crawlers/Crawler.py
@ -121,7 +121,9 @@ class Crawler(AbstractModule):
        if crawlers.get_nb_crawler_captures() < crawlers.get_crawler_max_captures():
            task_row = crawlers.add_task_to_lacus_queue()
            if task_row:
-                task_uuid, priority = task_row
+                task, priority = task_row
                task.start()
                task_uuid = task.uuid
                try:
                    self.enqueue_capture(task_uuid, priority)
                except ConnectionError:
@ -195,10 +197,17 @@ class Crawler(AbstractModule):
        print(task.uuid, capture_uuid, 'launched')
        if self.ail_to_push_discovery:
            if task.get_depth() == 1 and priority < 10 and task.get_domain().endswith('.onion'):
                har = task.get_har()
                screenshot = task.get_screenshot()
-                self.ail_to_push_discovery.add_crawler_capture(task_uuid, capture_uuid, url, har=har,
+                # parent_id = task.get_parent()
                # if parent_id != 'manual' and parent_id != 'auto':
                #     parent = parent_id[19:-36]
                # else:
                #     parent = 'AIL_capture'
                self.ail_to_push_discovery.add_crawler_capture(task_uuid, capture_uuid, url, har=har,  # parent=parent,
                                                               screenshot=screenshot, depth_limit=1, proxy='force_tor')
                print(task.uuid, capture_uuid, 'Added to ail_to_push_discovery')
        return capture_uuid
--- a/bin/lib/crawlers.py
+++ b/bin/lib/crawlers.py
@ -1642,8 +1642,7 @@ def add_task_to_lacus_queue():
        return None
    task_uuid, priority = task_uuid[0]
    task = CrawlerTask(task_uuid)
-    task.start()
+    return task, priority
    return task.uuid, priority
 # PRIORITY:  discovery = 0/10, feeder = 10, manual = 50, auto = 40, test = 100
 def create_task(url, depth=1, har=True, screenshot=True, header=None, cookiejar=None, proxy=None,