From 1da8675750cb3adeae28967f951c820cc7a7f5ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Wed, 10 Feb 2016 16:39:06 +0100 Subject: [PATCH] Refactoring on Credential, Phone and Release --- bin/Credential.py | 75 ++++++++++++++++++++++++----------------------- bin/Phone.py | 7 ++--- bin/Release.py | 70 ++++++++++++++++++------------------------- 3 files changed, 70 insertions(+), 82 deletions(-) mode change 100644 => 100755 bin/Phone.py diff --git a/bin/Credential.py b/bin/Credential.py index 52360ce6..96f48501 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -2,46 +2,49 @@ # -*-coding:UTF-8 -* import time from packages import Paste -from pubsublogger import publisher +from pubsublogger import publisher from Helper import Process import re -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - config_section = "Credential" - p = Process(config_section) - publisher.info("Find credentials") - - critical = 10 +if __name__ == "__main__": + publisher.port = 6380 + publisher.channel = "Script" + config_section = "Credential" + p = Process(config_section) + publisher.info("Find credentials") - regex_web = "/^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/" - regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" - while True: - message = p.get_from_set() - if message is not None: - paste = Paste.Paste(message) - content = paste.get_p_content() - all_cred = re.findall(regex_cred, content) - if len(all_cred) > 0: - cred_set = set([]) - for cred in all_cred: - cred_set.add(cred) + critical = 10 - to_print = 'Cred;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) - if len(cred_set) > 0: - publisher.info(to_print) - for cred in set(cred_set): - print(cred) + regex_web = "/^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/" + regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" + while True: + filepath = p.get_from_set() + if filepath is None: + publisher.debug("Script Credential is Idling 10s") + print('Sleeping') + time.sleep(10) + continue - if len(cred_set) > critical: - print("========> Found more than 10 credentials on this file : {}".format(message)) - site = re.findall(regex_web, content) - publisher.warning(to_print) - if len(site) > 0: - print("=======> Probably on : {}".format(iter(site).next())) + paste = Paste.Paste(filepath) + content = paste.get_p_content() + creds = set(re.findall(regex_cred, content)) + if len(creds) == 0: + continue - else: - publisher.debug("Script Credential is Idling 10s") - print 'Sleeping' - time.sleep(10) + sites = set(re.findall(regex_web, content)) + + message = '{} credentials found.'.format(len(creds)) + if sites: + message += ' Related websites: {}'.format(', '.join(sites)) + + to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message) + + print('\n '.join(creds)) + + if len(creds) > critical: + print("========> Found more than 10 credentials on this file : {}".format(filepath)) + publisher.warning(to_print) + if sites: + print("=======> Probably on : {}".format(', '.join(sites))) + else: + publisher.info(to_print) diff --git a/bin/Phone.py b/bin/Phone.py old mode 100644 new mode 100755 index 384040cf..628f77c2 --- a/bin/Phone.py +++ b/bin/Phone.py @@ -5,10 +5,8 @@ """ import time -import pprint import re from packages import Paste -from packages import lib_refine from pubsublogger import publisher from Helper import Process @@ -22,11 +20,11 @@ def search_phone(message): results = reg_phone.findall(content) # if the list is greater than 4, we consider the Paste may contain a list of phone numbers - if len(results) > 4 : + if len(results) > 4: print results publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) - if __name__ == '__main__': +if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger publisher.port = 6380 @@ -53,4 +51,3 @@ def search_phone(message): # Do something with the message from the queue search_phone(message) - diff --git a/bin/Release.py b/bin/Release.py index 670a9af4..309efe67 100755 --- a/bin/Release.py +++ b/bin/Release.py @@ -2,52 +2,40 @@ # -*-coding:UTF-8 -* import time from packages import Paste -from pubsublogger import publisher +from pubsublogger import publisher from Helper import Process import re -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - config_section = "Release" - p = Process(config_section) - publisher.info("Release scripts to find release names") +if __name__ == "__main__": + publisher.port = 6380 + publisher.channel = "Script" + config_section = "Release" + p = Process(config_section) + publisher.info("Release scripts to find release names") - #REGEX : - - movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+" - tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+" - xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+" - - regexs = [movie,tv,xxx] + movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+" + tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+" + xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+" - regex = re.compile('|'.join(regexs)) - while True: - message = p.get_from_set() - if message is not None: - paste = Paste.Paste(message) - content = paste.get_p_content() - all_release = re.findall(regex, content) - if len(all_release) > 0: - release_set = set([]) - for rlz in all_release: - release_set.add(rlz) + regexs = [movie, tv, xxx] - to_print = 'Release;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) - if (len(release_set) > 0): - publisher.warning('{}Checked {} valids'.format(to_print, len(release_set))) - for rl in set(release_set): - #publisher.warning('{}'.format(rl)) - print(rl) - if (len(release_set) > 10): - print("----------------------------------- Found more than 10 releases on this file : {}".format(message)) + regex = '|'.join(regexs) + while True: + filepath = p.get_from_set() + if filepath is None: + publisher.debug("Script Release is Idling 10s") + print 'Sleeping' + time.sleep(10) + continue - else: - publisher.info('{}Release related'.format(to_print)) + paste = Paste.Paste(filepath) + content = paste.get_p_content() + releases = set(re.findall(regex, content)) + if len(releases) == 0: + continue - - - else: - publisher.debug("Script Release is Idling 10s") - print 'Sleeping' - time.sleep(10) + to_print = 'Release;{};{};{};{} releases'.format(paste.p_source, paste.p_date, paste.p_name, len(releases)) + if len(releases) > 30: + publisher.warning(to_print) + else: + publisher.info(to_print)