From e983c839adf9729473dcf2aaf98f4dd7f1eacce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Fri, 5 Sep 2014 17:05:45 +0200 Subject: [PATCH] Categ now listen to the Global queue --- bin/Categ.py | 29 +++++++++++---------- bin/CreditCard.py | 56 +++++++++++++++++++++------------------- bin/Mail.py | 2 +- bin/Onion.py | 2 +- bin/Url.py | 2 +- bin/packages/modules.cfg | 2 +- 6 files changed, 49 insertions(+), 44 deletions(-) diff --git a/bin/Categ.py b/bin/Categ.py index d9b052bd..8bf834a1 100755 --- a/bin/Categ.py +++ b/bin/Categ.py @@ -39,6 +39,7 @@ Requirements import os import argparse import time +import re from pubsublogger import publisher from packages import Paste @@ -73,29 +74,29 @@ if __name__ == "__main__": bname = os.path.basename(filename) tmp_dict[bname] = [] with open(os.path.join(args.d, filename), 'r') as f: - for l in f: - tmp_dict[bname].append(l.strip()) + patterns = [r'%s' % re.escape(s.strip()) for s in f] + tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE) prec_filename = None while True: - message = p.get_from_set() - if message is not None: - filename, word, score = message.split() + filename = p.get_from_set() + if filename is not None: - if prec_filename is None or filename != prec_filename: - PST = Paste.Paste(filename) - prec_filename = filename + paste = Paste.Paste(filename) + content = paste.get_p_content() - for categ, words_list in tmp_dict.items(): - - if word.lower() in words_list: - msg = '{} {} {}'.format(PST.p_path, word, score) + for categ, pattern in tmp_dict.items(): + found = re.findall(pattern, content) + if len(found) > 0: + msg = '{} {}'.format(paste.p_path, len(found)) + print msg, categ p.populate_set_out(msg, categ) publisher.info( - 'Categ;{};{};{};Detected {} "{}"'.format( - PST.p_source, PST.p_date, PST.p_name, score, word)) + 'Categ;{};{};{};Detected {} as {}'.format( + paste.p_source, paste.p_date, paste.p_name, + len(found), categ)) else: publisher.debug("Script Categ is Idling 10s") diff --git a/bin/CreditCard.py b/bin/CreditCard.py index 37e8bf67..ba264c9d 100755 --- a/bin/CreditCard.py +++ b/bin/CreditCard.py @@ -5,6 +5,8 @@ import time from packages import Paste from packages import lib_refine from pubsublogger import publisher +import re + from Helper import Process @@ -19,52 +21,54 @@ if __name__ == "__main__": # FUNCTIONS # publisher.info("Creditcard script subscribed to channel creditcard_categ") - message = p.get_from_set() - prec_filename = None creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?" # FIXME For retro compatibility channel = 'creditcard_categ' - # mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}" - # visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}" - # discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]| - # [3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}" - # jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}" - # amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}" - # chinaUP_regex = "62[0-5]\d{13,16}" - # maestro_regex = "(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}" + # Source: http://www.richardsramblings.com/regex/credit-card-numbers/ + cards = [ + r'4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # 16-digit VISA, with separators + r'5[1-5]\d{2}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # 16 digits MasterCard + r'6(?:011|22(?:(?=[\ \-]?(?:2[6-9]|[3-9]))|[2-8]|9(?=[\ \-]?(?:[01]|2[0-5])))|4[4-9]\d|5\d\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # Discover Card + r'35(?:2[89]|[3-8]\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # Japan Credit Bureau (JCB) + r'3[47]\d\d(?:[\ \-]?)\d{6}(?:[\ \-]?)\d{5}', # American Express + r'(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}', # Maestro + ] + + regex = re.compile('|'.join(cards)) while True: + message = p.get_from_set() if message is not None: - filename, word, score = message.split() - - if prec_filename is None or filename != prec_filename: + filename, score = message.split() + paste = Paste.Paste(filename) + content = paste.get_p_content() + all_cards = re.findall(regex, content) + if len(all_cards) > 0: + print 'All matching', all_cards creditcard_set = set([]) - PST = Paste.Paste(filename) - for x in PST.get_regex(creditcard_regex): - if lib_refine.is_luhn_valid(x): - creditcard_set.add(x) + for card in all_cards: + clean_card = re.sub('[^0-9]', '', card) + if lib_refine.is_luhn_valid(clean_card): + print clean_card, 'is valid' + creditcard_set.add(clean_card) - PST.__setattr__(channel, creditcard_set) - PST.save_attribute_redis(channel, creditcard_set) + paste.__setattr__(channel, creditcard_set) + paste.save_attribute_redis(channel, creditcard_set) pprint.pprint(creditcard_set) to_print = 'CreditCard;{};{};{};'.format( - PST.p_source, PST.p_date, PST.p_name) + paste.p_source, paste.p_date, paste.p_name) if (len(creditcard_set) > 0): - publisher.critical('{}Checked {} valid number(s)'.format( + publisher.warning('{}Checked {} valid number(s)'.format( to_print, len(creditcard_set))) else: publisher.info('{}CreditCard related'.format(to_print)) - - prec_filename = filename - else: publisher.debug("Script creditcard is idling 1m") print 'Sleeping' - time.sleep(60) + time.sleep(10) - message = p.get_from_set() diff --git a/bin/Mail.py b/bin/Mail.py index d83c80e7..07d742b2 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -41,7 +41,7 @@ if __name__ == "__main__": MX_values = None while True: if message is not None: - filename, word, score = message.split() + filename, score = message.split() if prec_filename is None or filename != prec_filename: PST = Paste.Paste(filename) diff --git a/bin/Onion.py b/bin/Onion.py index b9166abb..7d04d028 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -102,7 +102,7 @@ if __name__ == "__main__": while True: if message is not None: print message - filename, word, score = message.split() + filename, score = message.split() # "For each new paste" if prec_filename is None or filename != prec_filename: diff --git a/bin/Url.py b/bin/Url.py index 19a0a0d4..e8061895 100755 --- a/bin/Url.py +++ b/bin/Url.py @@ -46,7 +46,7 @@ if __name__ == "__main__": while True: if message is not None: - filename, word, score = message.split() + filename, score = message.split() if prec_filename is None or filename != prec_filename: domains_list = [] diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 677a77d6..c0731fc1 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -26,7 +26,7 @@ publish = Redis_Words subscribe = Redis_Words [Categ] -subscribe = Redis_Words +subscribe = Redis_Global publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web [CreditCards]