diff --git a/bin/Categ.py b/bin/Categ.py index 6bced9ce..a5cb6096 100755 --- a/bin/Categ.py +++ b/bin/Categ.py @@ -67,7 +67,7 @@ if __name__ == "__main__": # FUNCTIONS # publisher.info("Script Categ started") - categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve'] + categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'Dox'] tmp_dict = {} for filename in categories: bname = os.path.basename(filename) @@ -77,6 +77,7 @@ if __name__ == "__main__": tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE) prec_filename = None + print(tmp_dict) while True: filename = p.get_from_set() @@ -89,10 +90,10 @@ if __name__ == "__main__": paste = Paste.Paste(filename) content = paste.get_p_content() - print('-----------------------------------------------------') - print(filename) - print(content) - print('-----------------------------------------------------') + #print('-----------------------------------------------------') + #print(filename) + #print(content) + #print('-----------------------------------------------------') for categ, pattern in tmp_dict.items(): found = set(re.findall(pattern, content)) diff --git a/bin/DomClassifier.py b/bin/DomClassifier.py index f1362365..45fbd486 100755 --- a/bin/DomClassifier.py +++ b/bin/DomClassifier.py @@ -44,7 +44,7 @@ def main(): continue paste = PST.get_p_content() mimetype = PST._get_p_encoding() - + if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() diff --git a/bin/Duplicates.py b/bin/Duplicates.py index b25b57fe..10f9f4fc 100755 --- a/bin/Duplicates.py +++ b/bin/Duplicates.py @@ -144,8 +144,8 @@ if __name__ == "__main__": print('['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent)) except Exception: - print(str(e)) - #print 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash + #print(str(e)) + print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash) # Add paste in DB after checking to prevent its analysis twice # hash_type_i -> index_i AND index_i -> PST.PATH diff --git a/bin/Global.py b/bin/Global.py index ee783f0a..e952713d 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -27,6 +27,18 @@ from pubsublogger import publisher from Helper import Process +import magic +import io +import gzip + +def gunzip_bytes_obj(bytes_obj): + in_ = io.BytesIO() + in_.write(bytes_obj) + in_.seek(0) + with gzip.GzipFile(fileobj=in_, mode='rb') as fo: + gunzipped_bytes_obj = fo.read() + + return gunzipped_bytes_obj.decode() if __name__ == '__main__': publisher.port = 6380 @@ -68,14 +80,29 @@ if __name__ == '__main__': # Creating the full filepath filename = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"), paste) - #print(filename) + dirname = os.path.dirname(filename) if not os.path.exists(dirname): os.makedirs(dirname) - with open(filename, 'wb') as f: - f.write(base64.standard_b64decode(gzip64encoded)) + decoded = base64.standard_b64decode(gzip64encoded) + + with open(filename, 'wb') as f: + f.write(decoded) + try: + decoded2 = gunzip_bytes_obj(decoded) + except: + decoded2 ='' + + type = magic.from_buffer(decoded2, mime=True) + + if type!= 'text/x-c++' and type!= 'text/html' and type!= 'text/x-c' and type!= 'text/x-python' and type!= 'text/x-php' and type!= 'application/xml' and type!= 'text/x-shellscript' and type!= 'text/plain' and type!= 'text/x-diff' and type!= 'text/x-ruby': + + print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------') + print(filename) + print(type) + print(decoded2) + print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------') - print(filename) p.populate_set_out(filename) processed_paste+=1 diff --git a/bin/Onion.py b/bin/Onion.py index e26bec7b..b871e26e 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -42,6 +42,7 @@ def fetch(p, r_cache, urls, domains, path): if r_cache.exists(url) or url in failed: continue to_fetch = base64.standard_b64encode(url.encode('utf8')) + print(to_fetch) process = subprocess.Popen(["python", './tor_fetcher.py', to_fetch], stdout=subprocess.PIPE) while process.poll() is None: @@ -52,6 +53,7 @@ def fetch(p, r_cache, urls, domains, path): r_cache.expire(url, 360000) downloaded.append(url) tempfile = process.stdout.read().strip() + tempfile = tempfile.decode('utf8') with open(tempfile, 'r') as f: filename = path + domain + '.gz' fetched = f.read() @@ -152,6 +154,6 @@ if __name__ == "__main__": prec_filename = filename else: publisher.debug("Script url is Idling 10s") - print('Sleeping') + #print('Sleeping') time.sleep(10) message = p.get_from_set() diff --git a/bin/Update-conf.py b/bin/Update-conf.py index 6cea72c2..187509fe 100755 --- a/bin/Update-conf.py +++ b/bin/Update-conf.py @@ -63,12 +63,12 @@ def main(): print(" - "+item[0]) print("+--------------------------------------------------------------------+") - resp = raw_input("Do you want to auto fix it? [y/n] ") + resp = input("Do you want to auto fix it? [y/n] ") if resp != 'y': return False else: - resp2 = raw_input("Do you want to keep a backup of the old configuration file? [y/n] ") + resp2 = input("Do you want to keep a backup of the old configuration file? [y/n] ") if resp2 == 'y': shutil.move(configfile, configfileBackup) diff --git a/bin/tor_fetcher.py b/bin/tor_fetcher.py index f313e580..4f0056ad 100644 --- a/bin/tor_fetcher.py +++ b/bin/tor_fetcher.py @@ -4,7 +4,7 @@ import socks import socket import urllib.request -import StringIO +import io import gzip import base64 import sys @@ -26,12 +26,15 @@ def get_page(url, torclient_host='127.0.0.1', torclient_port=9050): request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0') return urllib.request.urlopen(request, timeout=5).read(max_size * 100000) - +#FIXME don't work at all def makegzip64(s): - out = StringIO.StringIO() - with gzip.GzipFile(fileobj=out, mode="w") as f: - f.write(s) - return base64.standard_b64encode(out.getvalue()) + + out = io.BytesIO() + + with gzip.GzipFile(fileobj=out, mode='ab') as fo: + fo.write(base64.standard_b64encode(s)) + + return out.getvalue() if __name__ == "__main__": @@ -41,7 +44,8 @@ if __name__ == "__main__": exit(1) try: - url = base64.standard_b64decode(sys.argv[1]) + url = base64.standard_b64decode(sys.argv[1]).decode('utf8') + print(url) except: print('unable to decode') exit(1) @@ -61,7 +65,7 @@ if __name__ == "__main__": to_write = makegzip64(page) t, path = tempfile.mkstemp() - with open(path, 'w') as f: - f.write(to_write) + #with open(path, 'w') as f: + #f.write(to_write) print(path) exit(0) diff --git a/var/www/modules/terms/Flask_terms.py b/var/www/modules/terms/Flask_terms.py index 47aed32f..d354c33d 100644 --- a/var/www/modules/terms/Flask_terms.py +++ b/var/www/modules/terms/Flask_terms.py @@ -157,8 +157,9 @@ def terms_management(): trackReg_list_values = [] trackReg_list_num_of_paste = [] for tracked_regex in r_serv_term.smembers(TrackedRegexSet_Name): + tracked_regex = tracked_regex.decode('utf8') - notificationEMailTermMapping[tracked_regex] = "\n".join(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex)) + notificationEMailTermMapping[tracked_regex] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex)).decode('utf8') ) if tracked_regex not in notificationEnabledDict: notificationEnabledDict[tracked_regex] = False @@ -174,7 +175,7 @@ def terms_management(): value_range.append(term_date) trackReg_list_values.append(value_range) - if tracked_regex in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): + if tracked_regex.encode('utf8') in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): notificationEnabledDict[tracked_regex] = True #Set @@ -182,8 +183,9 @@ def terms_management(): trackSet_list_values = [] trackSet_list_num_of_paste = [] for tracked_set in r_serv_term.smembers(TrackedSetSet_Name): + tracked_set = tracked_set.decode('utf8') - notificationEMailTermMapping[tracked_set] = "\n".join(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_set)) + notificationEMailTermMapping[tracked_set] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_set)).decode('utf8') ) if tracked_set not in notificationEnabledDict: @@ -200,7 +202,7 @@ def terms_management(): value_range.append(term_date) trackSet_list_values.append(value_range) - if tracked_set in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): + if tracked_set.encode('utf8') in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): notificationEnabledDict[tracked_set] = True #Tracked terms @@ -208,8 +210,12 @@ def terms_management(): track_list_values = [] track_list_num_of_paste = [] for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): + tracked_term = tracked_term.decode('utf8') - notificationEMailTermMapping[tracked_term] = "\n".join(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term)) + #print(TrackedTermsNotificationEmailsPrefix_Name) + print(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term)) + #print(tracked_term) + notificationEMailTermMapping[tracked_term] = "\n".join( r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term)) if tracked_term not in notificationEnabledDict: notificationEnabledDict[tracked_term] = False @@ -220,12 +226,12 @@ def terms_management(): term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) set_paste_name = "tracked_" + tracked_term - track_list_num_of_paste.append(r_serv_term.scard(set_paste_name)) + track_list_num_of_paste.append( r_serv_term.scard(set_paste_name) ) term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" value_range.append(term_date) track_list_values.append(value_range) - if tracked_term in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): + if tracked_term.encode('utf8') in r_serv_term.smembers(TrackedTermsNotificationEnabled_Name): notificationEnabledDict[tracked_term] = True #blacklist terms @@ -233,7 +239,7 @@ def terms_management(): for blacked_term in r_serv_term.smembers(BlackListTermsSet_Name): term_date = r_serv_term.hget(BlackListTermsDate_Name, blacked_term) term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - black_list.append([blacked_term, term_date]) + black_list.append([blacked_term.decode('utf8'), term_date]) return render_template("terms_management.html", black_list=black_list, track_list=track_list, trackReg_list=trackReg_list, trackSet_list=trackSet_list, @@ -251,13 +257,14 @@ def terms_management_query_paste(): # check if regex or not if term.startswith('/') and term.endswith('/'): set_paste_name = "regex_" + term - track_list_path = (r_serv_term.smembers(set_paste_name)).decode('utf8') + track_list_path = r_serv_term.smembers(set_paste_name) elif term.startswith('\\') and term.endswith('\\'): set_paste_name = "set_" + term - track_list_path = (r_serv_term.smembers(set_paste_name)).decode('utf8') + track_list_path = r_serv_term.smembers(set_paste_name) else: set_paste_name = "tracked_" + term - track_list_path = (r_serv_term.smembers(set_paste_name)).decode('utf8') + print(r_serv_term.smembers(set_paste_name)) + track_list_path = r_serv_term.smembers(set_paste_name) for path in track_list_path: paste = Paste.Paste(path)