diff --git a/bin/Onion.py b/bin/Onion.py index 3637de10..e26bec7b 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -41,7 +41,7 @@ def fetch(p, r_cache, urls, domains, path): for url, domain in zip(urls, domains): if r_cache.exists(url) or url in failed: continue - to_fetch = base64.standard_b64encode(url) + to_fetch = base64.standard_b64encode(url.encode('utf8')) process = subprocess.Popen(["python", './tor_fetcher.py', to_fetch], stdout=subprocess.PIPE) while process.poll() is None: diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index 3e0dda76..d93f880a 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -66,10 +66,10 @@ def analyse(url, path): result_query = 0 if resource_path is not None: - result_path = is_sql_injection(resource_path) + result_path = is_sql_injection(resource_path.decode('utf8')) if query_string is not None: - result_query = is_sql_injection(query_string) + result_query = is_sql_injection(query_string.decode('utf8')) if (result_path > 0) or (result_query > 0): paste = Paste.Paste(path) @@ -93,7 +93,7 @@ def analyse(url, path): # defined above on it. def is_sql_injection(url_parsed): line = urllib.request.unquote(url_parsed) - line = string.upper(line) + line = str.upper(line) result = [] result_suspect = [] @@ -104,12 +104,12 @@ def is_sql_injection(url_parsed): for word_list in word_injection: for word in word_list: - temp_res = string.find(line, string.upper(word)) + temp_res = str.find(line, str.upper(word)) if temp_res!=-1: result.append(line[temp_res:temp_res+len(word)]) for word in word_injection_suspect: - temp_res = string.find(line, string.upper(word)) + temp_res = str.find(line, str.upper(word)) if temp_res!=-1: result_suspect.append(line[temp_res:temp_res+len(word)]) diff --git a/bin/Web.py b/bin/Web.py index d4593469..8369304a 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -95,17 +95,23 @@ if __name__ == "__main__": subdomain = faup.get_subdomain() f1 = None - domains_list.append(domain) - publisher.debug('{} Published'.format(url)) if f1 == "onion": print(domain) - hostl = unicode(avoidNone(subdomain)+avoidNone(domain)) + if subdomain is not None: + subdomain = subdomain.decode('utf8') + + if domain is not None: + domain = domain.decode('utf8') + domains_list.append(domain) + + hostl = avoidNone(subdomain) + avoidNone(domain) + try: socket.setdefaulttimeout(1) - ip = socket.gethostbyname(unicode(hostl)) + ip = socket.gethostbyname(hostl) except: # If the resolver is not giving any IPv4 address, # ASN/CC lookup is skip. @@ -113,10 +119,12 @@ if __name__ == "__main__": try: l = client.lookup(ip, qType='IP') + except ipaddress.AddressValueError: continue cc = getattr(l, 'cc') - asn = getattr(l, 'asn') + if getattr(l, 'asn') is not None: + asn = getattr(l, 'asn')[2:] #remobe b' # EU is not an official ISO 3166 code (but used by RIPE # IP allocation) @@ -134,11 +142,13 @@ if __name__ == "__main__": A_values = lib_refine.checking_A_record(r_serv2, domains_list) + if A_values[0] >= 1: PST.__setattr__(channel, A_values) PST.save_attribute_redis(channel, (A_values[0], list(A_values[1]))) + pprint.pprint(A_values) publisher.info('Url;{};{};{};Checked {} URL;{}'.format( PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_path)) diff --git a/bin/WebStats.py b/bin/WebStats.py index 5edadc9f..90a8f96b 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -29,6 +29,7 @@ num_day_to_look = 5 # the detection of the progression start num_day_to_lo def analyse(server, field_name, date, url_parsed): field = url_parsed[field_name] if field is not None: + field = field.decode('utf8') server.hincrby(field, date, 1) if field_name == "domain": #save domain in a set for the monthly plot domain_set_name = "domain_set_" + date[0:6] @@ -179,7 +180,7 @@ if __name__ == '__main__': # Tld analysis analyse(r_serv_trend, 'tld', date, url_parsed) # Domain analysis - analyse(r_serv_trend, 'domain', date, url_parsed) + analyse(r_serv_trend, 'domain', date, url_parsed) compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed) diff --git a/bin/tor_fetcher.py b/bin/tor_fetcher.py index f1e72e0c..f313e580 100644 --- a/bin/tor_fetcher.py +++ b/bin/tor_fetcher.py @@ -3,7 +3,7 @@ import socks import socket -import urllib2 +import urllib.request import StringIO import gzip import base64 @@ -21,10 +21,10 @@ def create_connection(address, timeout=None, source_address=None): def get_page(url, torclient_host='127.0.0.1', torclient_port=9050): - request = urllib2.Request(url) + request = urllib.request.Request(url) # UA of the Tor browser bundle request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0') - return urllib2.urlopen(request, timeout=5).read(max_size * 100000) + return urllib.request.urlopen(request, timeout=5).read(max_size * 100000) def makegzip64(s):