fix: [crawler] debug crawler queued capture loop

This commit is contained in:
terrtia 2025-01-07 15:23:06 +01:00
parent 80c7410cb1
commit e6e48c69f5
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
2 changed files with 7 additions and 3 deletions

View file

@ -147,6 +147,7 @@ class Crawler(AbstractModule):
if capture:
try:
status = self.lacus.get_capture_status(capture.uuid)
print(status)
if status == crawlers.CaptureStatus.DONE:
return capture
elif status == crawlers.CaptureStatus.UNKNOWN:
@ -164,7 +165,10 @@ class Crawler(AbstractModule):
self.logger.warning(f'capture UNKNOWN Timeout, {task.uuid} Send back in queue')
else:
capture.update(status)
elif status == crawlers.CaptureStatus.QUEUED or status == crawlers.CaptureStatus.ONGOING:
elif status == crawlers.CaptureStatus.QUEUED:
capture.update(status, delta=30)
print(capture.uuid, crawlers.CaptureStatus(status).name, int(time.time() + 30))
elif status == crawlers.CaptureStatus.ONGOING:
capture.update(status)
print(capture.uuid, crawlers.CaptureStatus(status).name, int(time.time()))
# Invalid State

View file

@ -1531,13 +1531,13 @@ class CrawlerCapture:
r_crawler.zadd('crawler:captures', {self.uuid: launch_time})
r_cache.zadd('crawler:captures', {self.uuid: launch_time})
def update(self, status):
def update(self, status, delta=0):
# Error or Reload
if not status:
r_cache.hset(f'crawler:capture:{self.uuid}', 'status', CaptureStatus.UNKNOWN.value)
r_cache.zadd('crawler:captures', {self.uuid: 0})
else:
last_check = int(time.time())
last_check = int(time.time() + delta)
r_cache.hset(f'crawler:capture:{self.uuid}', 'status', status)
r_cache.zadd('crawler:captures', {self.uuid: last_check})