From 39f54d44202cd57a79e82c2605c88f6dfe0dd278 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 30 Jul 2018 09:21:22 +0200 Subject: [PATCH] chg: [Mail] add country statistic --- .gitignore | 1 + OVERVIEW.md | 12 +++++++++++- bin/Mail.py | 18 ++++++++++++++---- bin/packages/lib_refine.py | 21 +++++++++++++-------- 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index e74906ae..11c01083 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ var/www/submitted # Local config bin/packages/config.cfg +bin/packages/config.cfg.backup configs/keys # installed files diff --git a/OVERVIEW.md b/OVERVIEW.md index aa417ff7..cf40bad9 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -20,4 +20,14 @@ Redis and ARDB overview - DB 0 - Lines duplicate - DB 1 - Hashes -To be updated +ARDB overview +--------------------------- +ARDB_DB +* DB 1 - Curve +* DB 2 - TermFreq +* DB 3 - Trending +* DB 4 - Sentiment +* DB 5 - TermCred +* DB 6 - Tags +* DB 7 - Metadata +* DB 8 - Statistics diff --git a/bin/Mail.py b/bin/Mail.py index c1d8cf70..13a675ea 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -14,6 +14,7 @@ It apply mail regexes on paste content and warn if above a threshold. import redis import pprint import time +import datetime import dns.exception from packages import Paste from packages import lib_refine @@ -36,6 +37,12 @@ if __name__ == "__main__": port=p.config.getint("Redis_Cache", "port"), db=p.config.getint("Redis_Cache", "db"), decode_responses=True) + # ARDB # + server_statistics = redis.StrictRedis( + host=p.config.get("ARDB_Statistics", "host"), + port=p.config.getint("ARDB_Statistics", "port"), + db=p.config.getint("ARDB_Statistics", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("Suscribed to channel mails_categ") @@ -66,7 +73,6 @@ if __name__ == "__main__": PST.save_attribute_redis(channel, (MX_values[0], list(MX_values[1]))) - pprint.pprint(MX_values) to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\ format(PST.p_source, PST.p_date, PST.p_name, MX_values[0], PST.p_path) @@ -81,10 +87,14 @@ if __name__ == "__main__": else: publisher.info(to_print) - #Send to ModuleStats + #Send to ModuleStats and create country statistics + date = datetime.datetime.now().strftime("%Y%m") for mail in MX_values[1]: - print('mail;{};{};{}'.format(1, mail, PST.p_date)) - p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') + print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) + p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') + + country = mail.split('.')[-1] + server_statistics.hincrby('mail_by_country:'+date, country, MX_values[1][mail]) prec_filename = filename diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index 83511e40..5d2af0a9 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -38,6 +38,7 @@ def checking_MX_record(r_serv, adress_set, addr_dns): score = 0 num = len(adress_set) WalidMX = set([]) + validMX = {} # Transforming the set into a string MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower()) resolver = dns.resolver.Resolver() @@ -46,20 +47,23 @@ def checking_MX_record(r_serv, adress_set, addr_dns): resolver.lifetime = 2 if MXdomains != []: - for MXdomain in set(MXdomains): + for MXdomain in MXdomains: try: + MXdomain = MXdomain[1:] # Already in Redis living. - if r_serv.exists(MXdomain[1:]): + if r_serv.exists(MXdomain): score += 1 - WalidMX.add(MXdomain[1:]) + WalidMX.add(MXdomain) + validMX[MXdomain] = validMX.get(MXdomain, 0) + 1 # Not already in Redis else: # If I'm Walid MX domain - if resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX): + if resolver.query(MXdomain, rdtype=dns.rdatatype.MX): # Gonna be added in redis. - r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) + r_serv.setex(MXdomain, 1, timedelta(days=1)) score += 1 - WalidMX.add(MXdomain[1:]) + WalidMX.add(MXdomain) + validMX[MXdomain] = validMX.get(MXdomain, 0) + 1 else: pass @@ -86,13 +90,14 @@ def checking_MX_record(r_serv, adress_set, addr_dns): except dns.resolver.Timeout: print('timeout') - r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) + r_serv.setex(MXdomain, 1, timedelta(days=1)) except Exception as e: print(e) publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) - return (num, WalidMX) + #return (num, WalidMX) + return (num, validMX) def checking_A_record(r_serv, domains_set):