mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-26 07:47:17 +00:00
40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
|
#!/usr/bin/env python3
|
||
|
# -*-coding:UTF-8 -*
|
||
|
|
||
|
import gzip
|
||
|
import os
|
||
|
import sys
|
||
|
|
||
|
sys.path.append(os.environ['AIL_BIN'])
|
||
|
##################################
|
||
|
# Import Project packages
|
||
|
##################################
|
||
|
from lib import ail_updates
|
||
|
from lib.objects import ail_objects
|
||
|
from lib.objects import DomHashs
|
||
|
from lib.objects.Domains import Domain
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
update = ail_updates.AILBackgroundUpdate('v5.9')
|
||
|
n = 0
|
||
|
nb_items = ail_objects.card_obj_iterator('item', filters={'sources': ['crawled']})
|
||
|
update.set_nb_to_update(nb_items)
|
||
|
|
||
|
for item in ail_objects.obj_iterator('item', filters={'sources': ['crawled']}):
|
||
|
dom = item.get_domain()
|
||
|
domain = Domain(dom)
|
||
|
i_content = item.get_content()
|
||
|
if domain.exists() and i_content:
|
||
|
date = item.get_date()
|
||
|
# DOM-HASH
|
||
|
dom_hash = DomHashs.create(i_content)
|
||
|
dom_hash.add(date, item)
|
||
|
dom_hash.add_correlation('domain', '', domain.id)
|
||
|
|
||
|
print(domain.id, item.id, dom_hash.id)
|
||
|
|
||
|
update.inc_nb_updated()
|
||
|
n += 1
|
||
|
if n % 100 == 0:
|
||
|
update.update_progress()
|