#!/usr/bin/env python3
# -*-coding:UTF-8 -*

"""
The Web Module
============================

This module tries to parse URLs and warns if some defined contry code are present.

"""

##################################
# Import External packages
##################################
import redis
import pprint
import time
import os
import dns.exception
from pyfaup.faup import Faup
import re
# Country and ASN lookup
from cymru.ip2asn.dns import DNSClient as ip2asn
import socket
import pycountry
import ipaddress

##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from packages import Paste
from packages import lib_refine
from pubsublogger import publisher
from Helper import Process


class Web(AbstractModule):
    """
    Web module for AIL framework
    """

    # Used to prevent concat with empty fields due to url parsing
    def avoidNone(self, a_string):
        if a_string is None:
            return ""
        else:
            return a_string

    def __init__(self):
        """
        Init Web
        """
        super(Web, self).__init__()

        # REDIS Cache
        self.r_serv2 = redis.StrictRedis(
            host=self.process.config.get("Redis_Cache", "host"),
            port=self.process.config.getint("Redis_Cache", "port"),
            db=self.process.config.getint("Redis_Cache", "db"),
            decode_responses=True)

        # Country to log as critical
        self.cc_critical = self.process.config.get("Url", "cc_critical")

        # FUNCTIONS #
        self.redis_logger.info("Script URL subscribed to channel web_categ")

        # FIXME For retro compatibility
        self.channel = 'web_categ'

        self.faup = Faup()

        # Protocol file path
        protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
                                          self.process.config.get("Directories", "protocolsfile"))
        # Get all uri from protocolsfile (Used for Curve)
        uri_scheme = ""
        with open(protocolsfile_path, 'r') as scheme_file:
            for scheme in scheme_file:
                uri_scheme += scheme[:-1]+"|"
        uri_scheme = uri_scheme[:-1]

        self.url_regex = "((?i:"+uri_scheme + \
            ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"

        self.prec_filename = None

        # Send module state to logs
        self.redis_logger.info("Module %s initialized" % (self.module_name))

    def compute(self, message):
        """
        Search for Web links from given message
        """
        # Extract item
        filename, score = message.split()

        if self.prec_filename is None or filename != self.prec_filename:
            domains_list = set()
            PST = Paste.Paste(filename)
            client = ip2asn()

            detected_urls = PST.get_regex(self.url_regex)
            if len(detected_urls) > 0:
                to_print = 'Web;{};{};{};'.format(
                    PST.p_source, PST.p_date, PST.p_name)
                self.redis_logger.info('{}Detected {} URL;{}'.format(
                    to_print, len(detected_urls), PST.p_rel_path))

            for url in detected_urls:
                self.redis_logger.debug("match regex: %s" % (url))

                # self.redis_logger.debug("match regex search: %s"%(url))

                to_send = "{} {} {}".format(url, PST._get_p_date(), filename)
                self.process.populate_set_out(to_send, 'Url')
                self.redis_logger.debug("url_parsed: %s" % (to_send))

                self.faup.decode(url)
                domain = self.faup.get_domain()
                subdomain = self.faup.get_subdomain()

                self.redis_logger.debug('{} Published'.format(url))

                if subdomain is not None:
                    # TODO: # FIXME: remove me
                    try:
                        subdomain = subdomain.decode()
                    except:
                        pass

                if domain is not None:
                    # TODO: # FIXME: remove me
                    try:
                        domain = domain.decode()
                    except:
                        pass
                    domains_list.add(domain)

                hostl = self.avoidNone(subdomain) + self.avoidNone(domain)

                try:
                    socket.setdefaulttimeout(1)
                    ip = socket.gethostbyname(hostl)
                    # If the resolver is not giving any IPv4 address,
                    # ASN/CC lookup is skip.
                    l = client.lookup(ip, qType='IP')
                except ipaddress.AddressValueError:
                    self.redis_logger.error(
                        'ASN/CC lookup failed for IP {}'.format(ip))
                    continue
                except:
                    self.redis_logger.error(
                        'Resolver IPv4 address failed for host {}'.format(hostl))
                    continue

                cc = getattr(l, 'cc')
                asn = ''
                if getattr(l, 'asn') is not None:
                    asn = getattr(l, 'asn')[2:]  # remobe b'

                # EU is not an official ISO 3166 code (but used by RIPE
                # IP allocation)
                if cc is not None and cc != "EU":
                    self.redis_logger.debug('{};{};{};{}'.format(hostl, asn, cc,
                                                                 pycountry.countries.get(alpha_2=cc).name))
                    if cc == self.cc_critical:
                        to_print = 'Url;{};{};{};Detected {} {}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            hostl, cc)
                        self.redis_logger.info(to_print)
                else:
                    self.redis_logger.debug('{};{};{}'.format(hostl, asn, cc))

            A_values = lib_refine.checking_A_record(self.r_serv2,
                                                    domains_list)

            if A_values[0] >= 1:
                PST.__setattr__(self.channel, A_values)
                PST.save_attribute_redis(self.channel, (A_values[0],
                                                        list(A_values[1])))

                pprint.pprint(A_values)
                # self.redis_logger.info('Url;{};{};{};Checked {} URL;{}'.format(
                #     PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))

        self.prec_filename = filename


if __name__ == '__main__':

    module = Web()
    module.run()