Categ now listen to the Global queue

This commit is contained in:
Raphaël Vinot 2014-09-05 17:05:45 +02:00
parent 46f27ada4e
commit e983c839ad
6 changed files with 49 additions and 44 deletions

View file

@ -39,6 +39,7 @@ Requirements
import os import os
import argparse import argparse
import time import time
import re
from pubsublogger import publisher from pubsublogger import publisher
from packages import Paste from packages import Paste
@ -73,29 +74,29 @@ if __name__ == "__main__":
bname = os.path.basename(filename) bname = os.path.basename(filename)
tmp_dict[bname] = [] tmp_dict[bname] = []
with open(os.path.join(args.d, filename), 'r') as f: with open(os.path.join(args.d, filename), 'r') as f:
for l in f: patterns = [r'%s' % re.escape(s.strip()) for s in f]
tmp_dict[bname].append(l.strip()) tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
prec_filename = None prec_filename = None
while True: while True:
message = p.get_from_set() filename = p.get_from_set()
if message is not None: if filename is not None:
filename, word, score = message.split()
if prec_filename is None or filename != prec_filename: paste = Paste.Paste(filename)
PST = Paste.Paste(filename) content = paste.get_p_content()
prec_filename = filename
for categ, words_list in tmp_dict.items(): for categ, pattern in tmp_dict.items():
found = re.findall(pattern, content)
if word.lower() in words_list: if len(found) > 0:
msg = '{} {} {}'.format(PST.p_path, word, score) msg = '{} {}'.format(paste.p_path, len(found))
print msg, categ
p.populate_set_out(msg, categ) p.populate_set_out(msg, categ)
publisher.info( publisher.info(
'Categ;{};{};{};Detected {} "{}"'.format( 'Categ;{};{};{};Detected {} as {}'.format(
PST.p_source, PST.p_date, PST.p_name, score, word)) paste.p_source, paste.p_date, paste.p_name,
len(found), categ))
else: else:
publisher.debug("Script Categ is Idling 10s") publisher.debug("Script Categ is Idling 10s")

View file

@ -5,6 +5,8 @@ import time
from packages import Paste from packages import Paste
from packages import lib_refine from packages import lib_refine
from pubsublogger import publisher from pubsublogger import publisher
import re
from Helper import Process from Helper import Process
@ -19,52 +21,54 @@ if __name__ == "__main__":
# FUNCTIONS # # FUNCTIONS #
publisher.info("Creditcard script subscribed to channel creditcard_categ") publisher.info("Creditcard script subscribed to channel creditcard_categ")
message = p.get_from_set()
prec_filename = None
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?" creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
# FIXME For retro compatibility # FIXME For retro compatibility
channel = 'creditcard_categ' channel = 'creditcard_categ'
# mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}" # Source: http://www.richardsramblings.com/regex/credit-card-numbers/
# visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}" cards = [
# discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]| r'4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # 16-digit VISA, with separators
# [3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}" r'5[1-5]\d{2}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # 16 digits MasterCard
# jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}" r'6(?:011|22(?:(?=[\ \-]?(?:2[6-9]|[3-9]))|[2-8]|9(?=[\ \-]?(?:[01]|2[0-5])))|4[4-9]\d|5\d\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # Discover Card
# amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}" r'35(?:2[89]|[3-8]\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # Japan Credit Bureau (JCB)
# chinaUP_regex = "62[0-5]\d{13,16}" r'3[47]\d\d(?:[\ \-]?)\d{6}(?:[\ \-]?)\d{5}', # American Express
# maestro_regex = "(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}" r'(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}', # Maestro
]
regex = re.compile('|'.join(cards))
while True: while True:
message = p.get_from_set()
if message is not None: if message is not None:
filename, word, score = message.split() filename, score = message.split()
paste = Paste.Paste(filename)
if prec_filename is None or filename != prec_filename: content = paste.get_p_content()
all_cards = re.findall(regex, content)
if len(all_cards) > 0:
print 'All matching', all_cards
creditcard_set = set([]) creditcard_set = set([])
PST = Paste.Paste(filename)
for x in PST.get_regex(creditcard_regex): for card in all_cards:
if lib_refine.is_luhn_valid(x): clean_card = re.sub('[^0-9]', '', card)
creditcard_set.add(x) if lib_refine.is_luhn_valid(clean_card):
print clean_card, 'is valid'
creditcard_set.add(clean_card)
PST.__setattr__(channel, creditcard_set) paste.__setattr__(channel, creditcard_set)
PST.save_attribute_redis(channel, creditcard_set) paste.save_attribute_redis(channel, creditcard_set)
pprint.pprint(creditcard_set) pprint.pprint(creditcard_set)
to_print = 'CreditCard;{};{};{};'.format( to_print = 'CreditCard;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name) paste.p_source, paste.p_date, paste.p_name)
if (len(creditcard_set) > 0): if (len(creditcard_set) > 0):
publisher.critical('{}Checked {} valid number(s)'.format( publisher.warning('{}Checked {} valid number(s)'.format(
to_print, len(creditcard_set))) to_print, len(creditcard_set)))
else: else:
publisher.info('{}CreditCard related'.format(to_print)) publisher.info('{}CreditCard related'.format(to_print))
prec_filename = filename
else: else:
publisher.debug("Script creditcard is idling 1m") publisher.debug("Script creditcard is idling 1m")
print 'Sleeping' print 'Sleeping'
time.sleep(60) time.sleep(10)
message = p.get_from_set()

View file

@ -41,7 +41,7 @@ if __name__ == "__main__":
MX_values = None MX_values = None
while True: while True:
if message is not None: if message is not None:
filename, word, score = message.split() filename, score = message.split()
if prec_filename is None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
PST = Paste.Paste(filename) PST = Paste.Paste(filename)

View file

@ -102,7 +102,7 @@ if __name__ == "__main__":
while True: while True:
if message is not None: if message is not None:
print message print message
filename, word, score = message.split() filename, score = message.split()
# "For each new paste" # "For each new paste"
if prec_filename is None or filename != prec_filename: if prec_filename is None or filename != prec_filename:

View file

@ -46,7 +46,7 @@ if __name__ == "__main__":
while True: while True:
if message is not None: if message is not None:
filename, word, score = message.split() filename, score = message.split()
if prec_filename is None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
domains_list = [] domains_list = []

View file

@ -26,7 +26,7 @@ publish = Redis_Words
subscribe = Redis_Words subscribe = Redis_Words
[Categ] [Categ]
subscribe = Redis_Words subscribe = Redis_Global
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web
[CreditCards] [CreditCards]