ail-framework/bin/lib/objects/abstract_daterange_object.py

230 lines
6.9 KiB
Python
Executable file

# -*-coding:UTF-8 -*
"""
Base Class for AIL Objects
"""
##################################
# Import External packages
##################################
import os
import re
import sys
from abc import abstractmethod, ABC
# from flask import url_for
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib.item_basic import is_crawled, get_item_domain
from lib.data_retention_engine import update_obj_date
from packages import Date
# LOAD CONFIG
config_loader = ConfigLoader()
r_object = config_loader.get_db_conn("Kvrocks_Objects")
config_loader = None
class AbstractDaterangeObject(AbstractObject, ABC):
"""
Abstract Subtype Object
"""
def __init__(self, obj_type, id):
""" Abstract for all the AIL object
:param obj_type: object type (item, ...)
:param id: Object ID
"""
super().__init__(obj_type, id)
def exists(self):
return r_object.exists(f'meta:{self.type}:{self.id}')
def _get_field(self, field):
return r_object.hget(f'meta:{self.type}:{self.id}', field)
def _set_field(self, field, value):
return r_object.hset(f'meta:{self.type}:{self.id}', field, value)
def get_first_seen(self, r_int=False):
first_seen = self._get_field('first_seen')
if r_int:
if first_seen:
return int(first_seen)
else:
return 99999999
else:
return first_seen
def get_last_seen(self, r_int=False):
last_seen = self._get_field('last_seen')
if r_int:
if last_seen:
return int(last_seen)
else:
return 0
else:
return last_seen
def get_nb_seen(self):
return self.get_nb_correlation('item')
def get_nb_seen_by_date(self, date):
nb = r_object.zscore(f'{self.type}:date:{date}', self.id)
if nb is None:
return 0
else:
return int(nb)
def _get_meta(self, options=[]):
meta_dict = {'first_seen': self.get_first_seen(),
'last_seen': self.get_last_seen(),
'nb_seen': self.get_nb_seen()}
if 'sparkline' in options:
meta_dict['sparkline'] = self.get_sparkline()
return meta_dict
def set_first_seen(self, first_seen):
self._set_field('first_seen', first_seen)
def set_last_seen(self, last_seen):
self._set_field('last_seen', last_seen)
def update_daterange(self, date):
date = int(date)
# obj don't exit
if not self.exists():
self.set_first_seen(date)
self.set_last_seen(date)
else:
first_seen = self.get_first_seen(r_int=True)
last_seen = self.get_last_seen(r_int=True)
if date < first_seen:
self.set_first_seen(date)
if date > last_seen:
self.set_last_seen(date)
def get_sparkline(self):
sparkline = []
for date in Date.get_previous_date_list(6):
sparkline.append(self.get_nb_seen_by_date(date))
return sparkline
def get_content(self, r_type='str'):
if r_type == 'str':
return self.id
elif r_type == 'bytes':
return self.id.encode()
def _add_create(self):
r_object.sadd(f'{self.type}:all', self.id)
# TODO don't increase nb if same hash in item with different encoding
# if hash already in item
def _add(self, date, item_id):
if not self.exists():
self._add_create()
self.set_first_seen(date)
self.set_last_seen(date)
else:
self.update_daterange(date)
update_obj_date(date, self.type)
# NB Object seen by day
if not self.is_correlated('item', '', item_id): # if decoded not already in object
r_object.zincrby(f'{self.type}:date:{date}', 1, self.id)
# Correlations
self.add_correlation('item', '', item_id)
if is_crawled(item_id): # Domain
domain = get_item_domain(item_id)
self.add_correlation('domain', '', domain)
# TODO:ADD objects + Stats
def _create(self, first_seen=None, last_seen=None):
if first_seen:
self.set_first_seen(first_seen)
if last_seen:
self.set_last_seen(last_seen)
r_object.sadd(f'{self.type}:all', self.id)
# TODO
def _delete(self):
pass
class AbstractDaterangeObjects(ABC):
"""
Abstract Daterange Objects
"""
def __init__(self, obj_type):
""" Abstract for Daterange Objects
:param obj_type: object type (item, ...)
"""
self.type = obj_type
def get_all(self):
return r_object.smembers(f'{self.type}:all')
def get_by_date(self, date):
return r_object.zrange(f'{self.type}:date:{date}', 0, -1)
def get_nb_by_date(self, date):
return r_object.zcard(f'{self.type}:date:{date}')
def get_by_daterange(self, date_from, date_to):
obj_ids = set()
for date in Date.substract_date(date_from, date_to):
obj_ids = obj_ids | set(self.get_by_date(date))
return obj_ids
@abstractmethod
def get_metas(self, obj_ids, options=set()):
pass
def _get_metas(self, obj_class_ref, obj_ids, options=set()):
dict_obj = {}
for obj_id in obj_ids:
obj = obj_class_ref(obj_id)
dict_obj[obj_id] = obj.get_meta(options=options)
return dict_obj
@abstractmethod
def sanitize_name_to_search(self, name_to_search):
return name_to_search
def search_by_name(self, name_to_search, r_pos=False):
objs = {}
# for subtype in subtypes:
r_name = self.sanitize_name_to_search(name_to_search)
if not name_to_search or isinstance(r_name, dict):
return objs
r_name = re.compile(r_name)
for title_name in self.get_all():
res = re.search(r_name, title_name)
if res:
objs[title_name] = {}
if r_pos:
objs[title_name]['hl-start'] = res.start()
objs[title_name]['hl-end'] = res.end()
return objs
def api_get_chart_nb_by_daterange(self, date_from, date_to):
date_type = []
for date in Date.substract_date(date_from, date_to):
d = {'date': f'{date[0:4]}-{date[4:6]}-{date[6:8]}',
self.type: self.get_nb_by_date(date)}
date_type.append(d)
return date_type
def api_get_meta_by_daterange(self, date_from, date_to):
date = Date.sanitise_date_range(date_from, date_to)
return self.get_metas(self.get_by_daterange(date['date_from'], date['date_to']), options={'sparkline'})