mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-26 07:47:17 +00:00
fix: [crawler] fix crawler queue stats
This commit is contained in:
parent
cc7e67d5ed
commit
a20b6054e8
2 changed files with 12 additions and 0 deletions
|
@ -61,6 +61,8 @@ class Crawler(AbstractModule):
|
||||||
crawlers.load_blacklist()
|
crawlers.load_blacklist()
|
||||||
# update captures cache
|
# update captures cache
|
||||||
crawlers.reload_crawler_captures()
|
crawlers.reload_crawler_captures()
|
||||||
|
# update crawler queue stats
|
||||||
|
crawlers.reload_crawlers_stats()
|
||||||
|
|
||||||
self.crawler_scheduler = crawlers.CrawlerScheduler()
|
self.crawler_scheduler = crawlers.CrawlerScheduler()
|
||||||
|
|
||||||
|
|
|
@ -1018,6 +1018,16 @@ def get_crawlers_stats(domain_type=None):
|
||||||
stats[domain_type] = {'queue': queue, 'up': up, 'down': down, 'crawled': crawled}
|
stats[domain_type] = {'queue': queue, 'up': up, 'down': down, 'crawled': crawled}
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
def reload_crawlers_stats():
|
||||||
|
for domain_type in get_crawler_all_types():
|
||||||
|
to_remove = []
|
||||||
|
for task_uuid in r_crawler.smembers(f'crawler:queue:type:{domain_type}'):
|
||||||
|
task = CrawlerTask(task_uuid)
|
||||||
|
if not task.exists():
|
||||||
|
to_remove.append(task_uuid)
|
||||||
|
for task_uuid in to_remove:
|
||||||
|
r_crawler.srem(f'crawler:queue:type:{domain_type}', task_uuid)
|
||||||
|
|
||||||
#### Blocklist ####
|
#### Blocklist ####
|
||||||
|
|
||||||
def get_blacklist():
|
def get_blacklist():
|
||||||
|
|
Loading…
Reference in a new issue