Refactoring on Credential, Phone and Release

This commit is contained in:
Raphaël Vinot 2016-02-10 16:39:06 +01:00
parent 837efb4592
commit 1da8675750
3 changed files with 70 additions and 82 deletions

View file

@ -7,41 +7,44 @@ from Helper import Process
import re
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = "Credential"
p = Process(config_section)
publisher.info("Find credentials")
publisher.port = 6380
publisher.channel = "Script"
config_section = "Credential"
p = Process(config_section)
publisher.info("Find credentials")
critical = 10
critical = 10
regex_web = "/^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/"
regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
while True:
message = p.get_from_set()
if message is not None:
paste = Paste.Paste(message)
content = paste.get_p_content()
all_cred = re.findall(regex_cred, content)
if len(all_cred) > 0:
cred_set = set([])
for cred in all_cred:
cred_set.add(cred)
regex_web = "/^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/"
regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
while True:
filepath = p.get_from_set()
if filepath is None:
publisher.debug("Script Credential is Idling 10s")
print('Sleeping')
time.sleep(10)
continue
to_print = 'Cred;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
if len(cred_set) > 0:
publisher.info(to_print)
for cred in set(cred_set):
print(cred)
paste = Paste.Paste(filepath)
content = paste.get_p_content()
creds = set(re.findall(regex_cred, content))
if len(creds) == 0:
continue
if len(cred_set) > critical:
print("========> Found more than 10 credentials on this file : {}".format(message))
site = re.findall(regex_web, content)
publisher.warning(to_print)
if len(site) > 0:
print("=======> Probably on : {}".format(iter(site).next()))
sites = set(re.findall(regex_web, content))
else:
publisher.debug("Script Credential is Idling 10s")
print 'Sleeping'
time.sleep(10)
message = '{} credentials found.'.format(len(creds))
if sites:
message += ' Related websites: {}'.format(', '.join(sites))
to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message)
print('\n '.join(creds))
if len(creds) > critical:
print("========> Found more than 10 credentials on this file : {}".format(filepath))
publisher.warning(to_print)
if sites:
print("=======> Probably on : {}".format(', '.join(sites)))
else:
publisher.info(to_print)

7
bin/Phone.py Normal file → Executable file
View file

@ -5,10 +5,8 @@
"""
import time
import pprint
import re
from packages import Paste
from packages import lib_refine
from pubsublogger import publisher
from Helper import Process
@ -22,11 +20,11 @@ def search_phone(message):
results = reg_phone.findall(content)
# if the list is greater than 4, we consider the Paste may contain a list of phone numbers
if len(results) > 4 :
if len(results) > 4:
print results
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
if __name__ == '__main__':
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger
publisher.port = 6380
@ -53,4 +51,3 @@ def search_phone(message):
# Do something with the message from the queue
search_phone(message)

View file

@ -7,47 +7,35 @@ from Helper import Process
import re
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = "Release"
p = Process(config_section)
publisher.info("Release scripts to find release names")
publisher.port = 6380
publisher.channel = "Script"
config_section = "Release"
p = Process(config_section)
publisher.info("Release scripts to find release names")
#REGEX :
movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
regexs = [movie, tv, xxx]
regexs = [movie,tv,xxx]
regex = '|'.join(regexs)
while True:
filepath = p.get_from_set()
if filepath is None:
publisher.debug("Script Release is Idling 10s")
print 'Sleeping'
time.sleep(10)
continue
regex = re.compile('|'.join(regexs))
while True:
message = p.get_from_set()
if message is not None:
paste = Paste.Paste(message)
content = paste.get_p_content()
all_release = re.findall(regex, content)
if len(all_release) > 0:
release_set = set([])
for rlz in all_release:
release_set.add(rlz)
paste = Paste.Paste(filepath)
content = paste.get_p_content()
releases = set(re.findall(regex, content))
if len(releases) == 0:
continue
to_print = 'Release;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
if (len(release_set) > 0):
publisher.warning('{}Checked {} valids'.format(to_print, len(release_set)))
for rl in set(release_set):
#publisher.warning('{}'.format(rl))
print(rl)
if (len(release_set) > 10):
print("----------------------------------- Found more than 10 releases on this file : {}".format(message))
else:
publisher.info('{}Release related'.format(to_print))
else:
publisher.debug("Script Release is Idling 10s")
print 'Sleeping'
time.sleep(10)
to_print = 'Release;{};{};{};{} releases'.format(paste.p_source, paste.p_date, paste.p_name, len(releases))
if len(releases) > 30:
publisher.warning(to_print)
else:
publisher.info(to_print)