ail-framework/bin/lib/Statistics.py

#!/usr/bin/env python3
# -*-coding:UTF-8 -*

import datetime
import os
import redis
import sys

sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader

config_loader = ConfigLoader.ConfigLoader()
r_statistics = config_loader.get_redis_conn("ARDB_Statistics")
#r_serv_trend = ConfigLoader().get_redis_conn("ARDB_Trending")
config_loader = None

PIE_CHART_MAX_CARDINALITY = 8

def incr_module_timeout_statistic(module_name):
    curr_date = datetime.date.today()
    r_statistics.hincrby(curr_date.strftime("%Y%m%d"), 'paste_by_modules_timeout:{}'.format(module_name), 1)

def create_item_statistics(item_id, source, size):
    pass

def get_item_sources():
    return r_statistics.smembers('all_provider_set')

def get_nb_items_processed_by_day_and_source(date, source):
    nb_items = r_statistics.hget(f'{source}_num', date)
    if not nb_items:
        nb_items = 0
    return int(nb_items)

def get_items_total_size_by_day_and_source(date, source):
    total_size = r_statistics.hget(f'{source}_size', date)
    if not total_size:
        total_size = 0
    return float(total_size)

def get_items_av_size_by_day_and_source(date, source):
    av_size = r_statistics.hget(f'{source}_avg', date)
    if not av_size:
        av_size = 0
    return float(av_size)

def _create_item_stats_size_nb(date, source, num, size, avg):
    r_statistics.hset(f'{source}_num', date, num)
    r_statistics.hset(f'{source}_size', date, size)
    r_statistics.hset(f'{source}_avg', date, avg)

def get_item_stats_size_avg_by_date():
    return r_statistics.zrange(f'top_avg_size_set_{date}', 0, -1, withscores=True)

def get_item_stats_nb_by_date():
    return r_statistics.zrange(f'providers_set_{date}', 0, -1, withscores=True)

def _set_item_stats_nb_by_date(date, source):
    return r_statistics.zrange(f'providers_set_{date}', )


# # TODO: load ZSET IN CACHE => FAST UPDATE
def update_item_stats_size_nb(item_id, source, size, date):
    # Add/Update in Redis
    r_statistics.sadd('all_provider_set', source)

    nb_items = int(r_statistics.hincrby(f'{source}_num', date, 1))
    sum_size = float(r_statistics.hincrbyfloat(f'{source}_size', date, size))
    new_avg = sum_size / nb_items
    r_statistics.hset(f'{source}_avg', date, new_avg)

    # TOP Items Size
    if r_statistics.zcard(f'top_size_set_{date}') < PIE_CHART_MAX_CARDINALITY:
        r_statistics.zadd(f'top_avg_size_set_{date}', new_avg, source)
    else:
        member_set = r_statistics.zrangebyscore(f'top_avg_size_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
        # Member set is a list of (value, score) pairs
        if float(member_set[0][1]) < new_avg:
            # remove min from set and add the new one
            r_statistics.zrem(f'top_avg_size_set_{date}', member_set[0][0])
            r_statistics.zadd(f'top_avg_size_set_{date}', new_avg, source)

    # TOP Nb Items
    if r_statistics.zcard(f'providers_set_{date}') < PIE_CHART_MAX_CARDINALITY or r_statistics.zscore(f'providers_set_{date}', source) != None:
        r_statistics.zadd(f'providers_set_{date}', float(nb_items), source)
    else: # zset at full capacity
        member_set = r_statistics.zrangebyscore(f'providers_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
        # Member set is a list of (value, score) pairs
        if int(member_set[0][1]) < nb_items:
            # remove min from set and add the new one
            r_statistics.zrem(member_set[0][0])
            r_statistics.zadd(f'providers_set_{date}', float(nb_items), source)

# keyword  num

def _add_module_stats(module_name, total_sum, keyword, date):
    r_statistics.zadd(f'top_{module_name}_set_{date}', float(total_sum), keyword)

# # TODO: ONE HSET BY MODULE / CUSTOM STATS
def update_module_stats(module_name, num, keyword, date):

    # Add/Update in Redis
    r_statistics.hincrby(date, f'{module_name}-{keyword}', int(num)) # # TODO: RENAME ME !!!!!!!!!!!!!!!!!!!!!!!!!

    # Compute Most Posted
    # check if this keyword is eligible for progression
    keyword_total_sum = 0

    curr_value = r_statistics.hget(date, module+'-'+keyword)
    keyword_total_sum += int(curr_value) if curr_value is not None else 0

    if r_statistics.zcard(f'top_{module_name}_set_{date}') < PIE_CHART_MAX_CARDINALITY:
        r_statistics.zadd(f'top_{module_name}_set_{date}', float(keyword_total_sum), keyword)
    else: # zset at full capacity
        member_set = r_statistics.zrangebyscore(f'top_{module_name}_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
        # Member set is a list of (value, score) pairs
        if int(member_set[0][1]) < keyword_total_sum:
            #remove min from set and add the new one
            r_statistics.zrem(f'top_{module_name}_set_{date}', member_set[0][0])
            r_statistics.zadd(f'top_{module_name}_set_{date}', float(keyword_total_sum), keyword)

def get_module_tld_stats_by_tld_date(date, tld):
    nb_tld = r_statistics.hget(f'credential_by_tld:{date}', tld)
    if not nb_tld:
        nb_tld = 0
    return int(nb_tld)

def get_module_tld_stats_by_date(module, date):
    return r_statistics.hgetall(f'{module}_by_tld:{date}')

def add_module_tld_stats_by_date(module, date, tld, nb):
    r_statistics.hincrby(f'{module}_by_tld:{date}', tld, int(nb))

# r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
# r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)