mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-14 02:28:23 +00:00
chg: [crawler] tag domain by vanity
This commit is contained in:
parent
72f4733242
commit
2ead8c21aa
3 changed files with 30 additions and 0 deletions
|
@ -17,6 +17,7 @@ from modules.abstract_module import AbstractModule
|
||||||
from lib import ail_logger
|
from lib import ail_logger
|
||||||
from lib import crawlers
|
from lib import crawlers
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
from lib.Tag import get_domain_vanity_tags
|
||||||
from lib.objects import CookiesNames
|
from lib.objects import CookiesNames
|
||||||
from lib.objects import Etags
|
from lib.objects import Etags
|
||||||
from lib.objects.Domains import Domain
|
from lib.objects.Domains import Domain
|
||||||
|
@ -40,6 +41,9 @@ class Crawler(AbstractModule):
|
||||||
|
|
||||||
self.tracker_yara = Tracker_Yara(queue=False)
|
self.tracker_yara = Tracker_Yara(queue=False)
|
||||||
|
|
||||||
|
self.vanity_tags = get_domain_vanity_tags()
|
||||||
|
print('vanity tags:', self.vanity_tags)
|
||||||
|
|
||||||
config_loader = ConfigLoader()
|
config_loader = ConfigLoader()
|
||||||
|
|
||||||
self.default_har = config_loader.get_config_boolean('Crawler', 'default_har')
|
self.default_har = config_loader.get_config_boolean('Crawler', 'default_har')
|
||||||
|
@ -271,7 +275,12 @@ class Crawler(AbstractModule):
|
||||||
# Origin + History + tags
|
# Origin + History + tags
|
||||||
if self.root_item:
|
if self.root_item:
|
||||||
self.domain.set_last_origin(parent_id)
|
self.domain.set_last_origin(parent_id)
|
||||||
|
# Vanity
|
||||||
self.domain.update_vanity_cluster()
|
self.domain.update_vanity_cluster()
|
||||||
|
domain_vanity = self.domain.get_vanity()
|
||||||
|
if domain_vanity in self.vanity_tags:
|
||||||
|
for tag in self.vanity_tags[domain_vanity]:
|
||||||
|
self.domain.add_tag(tag)
|
||||||
# Tags
|
# Tags
|
||||||
for tag in task.get_tags():
|
for tag in task.get_tags():
|
||||||
self.domain.add_tag(tag)
|
self.domain.add_tag(tag)
|
||||||
|
|
|
@ -1521,6 +1521,24 @@ def refresh_auto_push():
|
||||||
|
|
||||||
# --- TAG AUTO PUSH --- #
|
# --- TAG AUTO PUSH --- #
|
||||||
|
|
||||||
|
def get_domain_vanity_tags():
|
||||||
|
vanity = {}
|
||||||
|
try:
|
||||||
|
with open(os.path.join(os.environ['AIL_HOME'], 'files/vanity_tags')) as f:
|
||||||
|
ltags = json.load(f)
|
||||||
|
if ltags:
|
||||||
|
for tag in ltags:
|
||||||
|
if is_taxonomie_tag(tag) or is_galaxy_tag(tag):
|
||||||
|
for s_vanity in ltags[tag]:
|
||||||
|
if s_vanity not in vanity:
|
||||||
|
vanity[s_vanity] = []
|
||||||
|
vanity[s_vanity].append(tag)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
except json.decoder.JSONDecodeError:
|
||||||
|
print('Error files/vanity_tags, Invalid JSON')
|
||||||
|
return vanity
|
||||||
|
|
||||||
###################################################################################
|
###################################################################################
|
||||||
###################################################################################
|
###################################################################################
|
||||||
###################################################################################
|
###################################################################################
|
||||||
|
|
|
@ -411,6 +411,9 @@ class Domain(AbstractObject):
|
||||||
r_crawler.sadd(f'language:domains:{self.domain_type}:{language}', self.id)
|
r_crawler.sadd(f'language:domains:{self.domain_type}:{language}', self.id)
|
||||||
r_crawler.sadd(f'domain:language:{self.id}', language)
|
r_crawler.sadd(f'domain:language:{self.id}', language)
|
||||||
|
|
||||||
|
def get_vanity(self, len_vanity=4):
|
||||||
|
return get_domain_vanity(self.id, len_vanity=len_vanity)
|
||||||
|
|
||||||
def update_vanity_cluster(self):
|
def update_vanity_cluster(self):
|
||||||
if self.get_domain_type() == 'onion':
|
if self.get_domain_type() == 'onion':
|
||||||
update_vanity_cluster(self.id)
|
update_vanity_cluster(self.id)
|
||||||
|
|
Loading…
Reference in a new issue