mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
fix: [favicon] crawler favicon
This commit is contained in:
parent
c219febd71
commit
81c4dde7b0
1 changed files with 9 additions and 0 deletions
|
@ -20,6 +20,7 @@ from lib.ConfigLoader import ConfigLoader
|
||||||
from lib.objects import CookiesNames
|
from lib.objects import CookiesNames
|
||||||
from lib.objects import Etags
|
from lib.objects import Etags
|
||||||
from lib.objects.Domains import Domain
|
from lib.objects.Domains import Domain
|
||||||
|
from lib.objects import Favicons
|
||||||
from lib.objects.Items import Item
|
from lib.objects.Items import Item
|
||||||
from lib.objects import Screenshots
|
from lib.objects import Screenshots
|
||||||
from lib.objects import Titles
|
from lib.objects import Titles
|
||||||
|
@ -198,6 +199,7 @@ class Crawler(AbstractModule):
|
||||||
user_agent=task.get_user_agent(),
|
user_agent=task.get_user_agent(),
|
||||||
proxy=task.get_proxy(),
|
proxy=task.get_proxy(),
|
||||||
cookies=task.get_cookies(),
|
cookies=task.get_cookies(),
|
||||||
|
with_favicon=True,
|
||||||
force=force,
|
force=force,
|
||||||
general_timeout_in_sec=90) # TODO increase timeout if onion ????
|
general_timeout_in_sec=90) # TODO increase timeout if onion ????
|
||||||
|
|
||||||
|
@ -245,6 +247,7 @@ class Crawler(AbstractModule):
|
||||||
parent_id = task.get_parent()
|
parent_id = task.get_parent()
|
||||||
|
|
||||||
entries = self.lacus.get_capture(capture.uuid)
|
entries = self.lacus.get_capture(capture.uuid)
|
||||||
|
|
||||||
print(entries.get('status'))
|
print(entries.get('status'))
|
||||||
self.har = task.get_har()
|
self.har = task.get_har()
|
||||||
self.screenshot = task.get_screenshot()
|
self.screenshot = task.get_screenshot()
|
||||||
|
@ -369,6 +372,12 @@ class Crawler(AbstractModule):
|
||||||
etag.add(self.date.replace('/', ''), self.domain)
|
etag.add(self.date.replace('/', ''), self.domain)
|
||||||
crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', ''))
|
crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', ''))
|
||||||
|
|
||||||
|
# FAVICON
|
||||||
|
if entries.get('potential_favicons'):
|
||||||
|
for favicon in entries['potential_favicons']:
|
||||||
|
fav = Favicons.create(favicon)
|
||||||
|
fav.add(item.get_date(), item)
|
||||||
|
|
||||||
# Next Children
|
# Next Children
|
||||||
entries_children = entries.get('children')
|
entries_children = entries.get('children')
|
||||||
if entries_children:
|
if entries_children:
|
||||||
|
|
Loading…
Reference in a new issue