Refactoring on Credential, Phone and Release

This commit is contained in:
Raphaël Vinot 2016-02-10 16:39:06 +01:00
parent 837efb4592
commit 1da8675750
3 changed files with 70 additions and 82 deletions

View file

@ -7,41 +7,44 @@ from Helper import Process
import re import re
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = "Credential" config_section = "Credential"
p = Process(config_section) p = Process(config_section)
publisher.info("Find credentials") publisher.info("Find credentials")
critical = 10 critical = 10
regex_web = "/^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/" regex_web = "/^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/"
regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
while True: while True:
message = p.get_from_set() filepath = p.get_from_set()
if message is not None: if filepath is None:
paste = Paste.Paste(message) publisher.debug("Script Credential is Idling 10s")
content = paste.get_p_content() print('Sleeping')
all_cred = re.findall(regex_cred, content) time.sleep(10)
if len(all_cred) > 0: continue
cred_set = set([])
for cred in all_cred:
cred_set.add(cred)
to_print = 'Cred;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) paste = Paste.Paste(filepath)
if len(cred_set) > 0: content = paste.get_p_content()
publisher.info(to_print) creds = set(re.findall(regex_cred, content))
for cred in set(cred_set): if len(creds) == 0:
print(cred) continue
if len(cred_set) > critical: sites = set(re.findall(regex_web, content))
print("========> Found more than 10 credentials on this file : {}".format(message))
site = re.findall(regex_web, content)
publisher.warning(to_print)
if len(site) > 0:
print("=======> Probably on : {}".format(iter(site).next()))
else: message = '{} credentials found.'.format(len(creds))
publisher.debug("Script Credential is Idling 10s") if sites:
print 'Sleeping' message += ' Related websites: {}'.format(', '.join(sites))
time.sleep(10)
to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message)
print('\n '.join(creds))
if len(creds) > critical:
print("========> Found more than 10 credentials on this file : {}".format(filepath))
publisher.warning(to_print)
if sites:
print("=======> Probably on : {}".format(', '.join(sites)))
else:
publisher.info(to_print)

7
bin/Phone.py Normal file → Executable file
View file

@ -5,10 +5,8 @@
""" """
import time import time
import pprint
import re import re
from packages import Paste from packages import Paste
from packages import lib_refine
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
@ -22,11 +20,11 @@ def search_phone(message):
results = reg_phone.findall(content) results = reg_phone.findall(content)
# if the list is greater than 4, we consider the Paste may contain a list of phone numbers # if the list is greater than 4, we consider the Paste may contain a list of phone numbers
if len(results) > 4 : if len(results) > 4:
print results print results
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
if __name__ == '__main__': if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger # Port of the redis instance used by pubsublogger
publisher.port = 6380 publisher.port = 6380
@ -53,4 +51,3 @@ def search_phone(message):
# Do something with the message from the queue # Do something with the message from the queue
search_phone(message) search_phone(message)

View file

@ -7,47 +7,35 @@ from Helper import Process
import re import re
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = "Release" config_section = "Release"
p = Process(config_section) p = Process(config_section)
publisher.info("Release scripts to find release names") publisher.info("Release scripts to find release names")
#REGEX : movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+" regexs = [movie, tv, xxx]
tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
regexs = [movie,tv,xxx] regex = '|'.join(regexs)
while True:
filepath = p.get_from_set()
if filepath is None:
publisher.debug("Script Release is Idling 10s")
print 'Sleeping'
time.sleep(10)
continue
regex = re.compile('|'.join(regexs)) paste = Paste.Paste(filepath)
while True: content = paste.get_p_content()
message = p.get_from_set() releases = set(re.findall(regex, content))
if message is not None: if len(releases) == 0:
paste = Paste.Paste(message) continue
content = paste.get_p_content()
all_release = re.findall(regex, content)
if len(all_release) > 0:
release_set = set([])
for rlz in all_release:
release_set.add(rlz)
to_print = 'Release;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) to_print = 'Release;{};{};{};{} releases'.format(paste.p_source, paste.p_date, paste.p_name, len(releases))
if (len(release_set) > 0): if len(releases) > 30:
publisher.warning('{}Checked {} valids'.format(to_print, len(release_set))) publisher.warning(to_print)
for rl in set(release_set): else:
#publisher.warning('{}'.format(rl)) publisher.info(to_print)
print(rl)
if (len(release_set) > 10):
print("----------------------------------- Found more than 10 releases on this file : {}".format(message))
else:
publisher.info('{}Release related'.format(to_print))
else:
publisher.debug("Script Release is Idling 10s")
print 'Sleeping'
time.sleep(10)