From 9bfb7a5fb632b6a0c1a2940e89cbe6edc0a52da7 Mon Sep 17 00:00:00 2001 From: Xavier Mertens Date: Fri, 26 Oct 2018 17:13:26 +0200 Subject: [PATCH 1/7] Added module Regex.py --- bin/LAUNCH.sh | 2 + bin/Regex.py | 138 +++++++++++++++++++++++++++++++++++++++++ bin/packages/regex.cfg | 17 +++++ 3 files changed, 157 insertions(+) create mode 100644 bin/Regex.py create mode 100644 bin/packages/regex.cfg diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 684af83b..c751a439 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -170,6 +170,8 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Keys" bash -c 'cd '${AIL_BIN}'; ./Keys.py; read x' sleep 0.1 + screen -S "Script_AIL" -X screen -t "Keys" bash -c 'cd '${AIL_BIN}'; ./Regex.py; read x' + sleep 0.1 screen -S "Script_AIL" -X screen -t "Decoder" bash -c 'cd '${AIL_BIN}'; ./Decoder.py; read x' sleep 0.1 screen -S "Script_AIL" -X screen -t "Bitcoin" bash -c 'cd '${AIL_BIN}'; ./Bitcoin.py; read x' diff --git a/bin/Regex.py b/bin/Regex.py new file mode 100644 index 00000000..960c33be --- /dev/null +++ b/bin/Regex.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +""" +The Regex Module +================ + +Search for regular expressions stored in a flat file + tag. +The flat file is automatically reloaded when the MTIME changed. + +It uses the file 'packagess/regex.cfg'. Format: +Tag||Regex + +Xavier Mertens + +""" + +import time +import os +import re +from pubsublogger import publisher + +#from bin.packages import Paste +#from bin.Helper import Process + +from packages import Paste +from Helper import Process + +# Change the path to your preferred one +regexConfig = 'packages/regex.cfg' + +regexes = [] + +def load_regex(force = False): + ''' + Load regexes from the config file and validate them + If 'True' passed as argument, force to reload + ''' + + lregexes = regexes + validate_regex = False + + try: + stats = os.stat(regexConfig) + mtime = int(stats.st_mtime) + if mtime > time.time()-60 or force == True: + # Regex config changed, reload the file + print('Loading regular expressions') + with open(regexConfig) as f: + lines = f.readlines() + lines = [x.strip() for x in lines] + validate_regex = True + except: + print('Cannot read {}'.format(regexConfig)) + return [] + + if validate_regex: + # Validate regexes read from the file + line=1 + lregexes = [] + for l in lines: + # Skip comments and empty lines + if len(l) > 0: + if l[0] == '#': + continue + try: + re.compile(l.split('||')[1]) + except: + print('Ignored line {}: Syntax error in "{}"'.format(line, regexConfig)) + continue + line += 1 + lregexes.append(l) + print('DEBUG: regexes:') + print(lregexes) + return lregexes + +def search_regex(paste): + content = paste.get_p_content() + find = False + global regexes + + regexes = load_regex(False) + + for r in regexes: + (tag,pattern) = r.split('||') + + if re.findall(pattern, content, re.MULTILINE|re.IGNORECASE): + publisher.warning('Regex match: {} ({})'.format(pattern, tag)) + # Sanitize tag to make it easy to read + tag = tag.strip().lower().replace(' ','-') + print('regex {} found'.format(tag)) + msg = 'infoleak:automatic-detection="regex-{}";{}'.format(tag, message) + p.populate_set_out(msg, 'Tags') + find = True + + if find: + #Send to duplicate + p.populate_set_out(message, 'Duplicate') + #send to Browse_warning_paste + msg = ('regex;{}'.format(message)) + print(message) + p.populate_set_out( msg, 'alertHandler') + + +if __name__ == '__main__': + global regexes + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'Regex' + + # Setup the I/O queues + p = Process(config_section) + + # Sent to the logging a description of the module + publisher.info("Run Regex module ") + + # Load regular expressions from config file + regexes = load_regex(True) + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + # Do something with the message from the queue + paste = Paste.Paste(message) + search_regex(paste) + + # (Optional) Send that thing to the next queue diff --git a/bin/packages/regex.cfg b/bin/packages/regex.cfg new file mode 100644 index 00000000..dcc939c6 --- /dev/null +++ b/bin/packages/regex.cfg @@ -0,0 +1,17 @@ +# +# Regular expressions to be search in AIL +# +# Format: +# tag||regex +# +# tag: appened to the tag (ex: regex-tag) +# regex: the regular expression +# (Comments & empty lines are ignores) +# + +# Example1 +# Search for my name +personal-mention||xavier\s+mertens + +# Search for cicl.lu hostnames +circl||\w+\.circl\.lu From 9e7ca845818f4355898245bd9d2a2f46fb7b0e97 Mon Sep 17 00:00:00 2001 From: Xavier Mertens Date: Fri, 2 Nov 2018 15:49:06 +0100 Subject: [PATCH 2/7] Added timeout to avoid blocking regexes --- bin/Regex.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/bin/Regex.py b/bin/Regex.py index 960c33be..6b01ffc5 100644 --- a/bin/Regex.py +++ b/bin/Regex.py @@ -18,6 +18,7 @@ Xavier Mertens import time import os import re +import signal from pubsublogger import publisher #from bin.packages import Paste @@ -26,6 +27,14 @@ from pubsublogger import publisher from packages import Paste from Helper import Process +class TimeoutException(Exception): + pass + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) + # Change the path to your preferred one regexConfig = 'packages/regex.cfg' @@ -48,7 +57,7 @@ def load_regex(force = False): print('Loading regular expressions') with open(regexConfig) as f: lines = f.readlines() - lines = [x.strip() for x in lines] + lines = [x.strip() for x in lines] validate_regex = True except: print('Cannot read {}'.format(regexConfig)) @@ -65,7 +74,7 @@ def load_regex(force = False): continue try: re.compile(l.split('||')[1]) - except: + except: print('Ignored line {}: Syntax error in "{}"'.format(line, regexConfig)) continue line += 1 @@ -84,14 +93,21 @@ def search_regex(paste): for r in regexes: (tag,pattern) = r.split('||') - if re.findall(pattern, content, re.MULTILINE|re.IGNORECASE): - publisher.warning('Regex match: {} ({})'.format(pattern, tag)) - # Sanitize tag to make it easy to read - tag = tag.strip().lower().replace(' ','-') - print('regex {} found'.format(tag)) - msg = 'infoleak:automatic-detection="regex-{}";{}'.format(tag, message) - p.populate_set_out(msg, 'Tags') - find = True + signal.alarm(max_execution_time) + try: + if re.findall(pattern, content, re.MULTILINE|re.IGNORECASE): + publisher.warning('Regex match: {} ({})'.format(pattern, tag)) + # Sanitize tag to make it easy to read + tag = tag.strip().lower().replace(' ','-') + print('regex {} found'.format(tag)) + msg = 'infoleak:automatic-detection="regex-{}";{}'.format(tag, message) + p.populate_set_out(msg, 'Tags') + find = True + except TimeoutException: + print ("{0} processing timeout".format(paste.p_path)) + continue + else: + signal.alarm(0) if find: #Send to duplicate @@ -115,6 +131,7 @@ if __name__ == '__main__': # Setup the I/O queues p = Process(config_section) + max_execution_time = p.config.getint(config_section, "max_execution_time") # Sent to the logging a description of the module publisher.info("Run Regex module ") From ac95dd4ee8fcb12edcbb562bc60758ce836250eb Mon Sep 17 00:00:00 2001 From: Xavier Mertens Date: Fri, 2 Nov 2018 15:50:38 +0100 Subject: [PATCH 3/7] Added timeout for Regex module --- bin/packages/config.cfg.sample | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index fbe4f6f3..088e778b 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -102,6 +102,9 @@ ttl_duplicate = 86400 [RegexForTermsFrequency] max_execution_time = 60 +[Regex] +max_execution_time = 60 + ##### Redis ##### [Redis_Cache] host = localhost From 05de39e2711eb50e2ff7936c25664a1dce7a44c9 Mon Sep 17 00:00:00 2001 From: Xavier Mertens Date: Fri, 2 Nov 2018 15:55:55 +0100 Subject: [PATCH 4/7] Fixed line counter (count all lines) --- bin/Regex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/Regex.py b/bin/Regex.py index 6b01ffc5..1d3216f1 100644 --- a/bin/Regex.py +++ b/bin/Regex.py @@ -77,8 +77,8 @@ def load_regex(force = False): except: print('Ignored line {}: Syntax error in "{}"'.format(line, regexConfig)) continue - line += 1 lregexes.append(l) + line += 1 print('DEBUG: regexes:') print(lregexes) return lregexes From b099e2ae4a7ed0ec0e5d3ca488fc9b62300849bc Mon Sep 17 00:00:00 2001 From: Xavier Mertens Date: Wed, 30 Jan 2019 22:27:33 +0100 Subject: [PATCH 5/7] Update LAUNCH.sh --- bin/LAUNCH.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index c751a439..684af83b 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -170,8 +170,6 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Keys" bash -c 'cd '${AIL_BIN}'; ./Keys.py; read x' sleep 0.1 - screen -S "Script_AIL" -X screen -t "Keys" bash -c 'cd '${AIL_BIN}'; ./Regex.py; read x' - sleep 0.1 screen -S "Script_AIL" -X screen -t "Decoder" bash -c 'cd '${AIL_BIN}'; ./Decoder.py; read x' sleep 0.1 screen -S "Script_AIL" -X screen -t "Bitcoin" bash -c 'cd '${AIL_BIN}'; ./Bitcoin.py; read x' From 1be91b88bf3aa1be7e0eb967f0cc285d25f7b97c Mon Sep 17 00:00:00 2001 From: Xavier Mertens Date: Wed, 30 Jan 2019 22:30:07 +0100 Subject: [PATCH 6/7] Fix: reset alarm timeout --- bin/Onion.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/Onion.py b/bin/Onion.py index 026617e9..801118d5 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -170,6 +170,8 @@ if __name__ == "__main__": print ("{0} processing timeout".format(PST.p_path)) continue + signal.alarm(0) + ''' for x in PST.get_regex(i2p_regex): # Extracting url with regex From 43d08d0d3ebfa5c9d724c6bad3a7690c354b5720 Mon Sep 17 00:00:00 2001 From: Xavier Mertens Date: Wed, 30 Jan 2019 22:36:10 +0100 Subject: [PATCH 7/7] Cleanup --- bin/Regex.py | 155 --------------------------------- bin/packages/config.cfg.sample | 3 - bin/packages/regex.cfg | 17 ---- 3 files changed, 175 deletions(-) delete mode 100644 bin/Regex.py delete mode 100644 bin/packages/regex.cfg diff --git a/bin/Regex.py b/bin/Regex.py deleted file mode 100644 index 1d3216f1..00000000 --- a/bin/Regex.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -""" -The Regex Module -================ - -Search for regular expressions stored in a flat file + tag. -The flat file is automatically reloaded when the MTIME changed. - -It uses the file 'packagess/regex.cfg'. Format: -Tag||Regex - -Xavier Mertens - -""" - -import time -import os -import re -import signal -from pubsublogger import publisher - -#from bin.packages import Paste -#from bin.Helper import Process - -from packages import Paste -from Helper import Process - -class TimeoutException(Exception): - pass - -def timeout_handler(signum, frame): - raise TimeoutException - -signal.signal(signal.SIGALRM, timeout_handler) - -# Change the path to your preferred one -regexConfig = 'packages/regex.cfg' - -regexes = [] - -def load_regex(force = False): - ''' - Load regexes from the config file and validate them - If 'True' passed as argument, force to reload - ''' - - lregexes = regexes - validate_regex = False - - try: - stats = os.stat(regexConfig) - mtime = int(stats.st_mtime) - if mtime > time.time()-60 or force == True: - # Regex config changed, reload the file - print('Loading regular expressions') - with open(regexConfig) as f: - lines = f.readlines() - lines = [x.strip() for x in lines] - validate_regex = True - except: - print('Cannot read {}'.format(regexConfig)) - return [] - - if validate_regex: - # Validate regexes read from the file - line=1 - lregexes = [] - for l in lines: - # Skip comments and empty lines - if len(l) > 0: - if l[0] == '#': - continue - try: - re.compile(l.split('||')[1]) - except: - print('Ignored line {}: Syntax error in "{}"'.format(line, regexConfig)) - continue - lregexes.append(l) - line += 1 - print('DEBUG: regexes:') - print(lregexes) - return lregexes - -def search_regex(paste): - content = paste.get_p_content() - find = False - global regexes - - regexes = load_regex(False) - - for r in regexes: - (tag,pattern) = r.split('||') - - signal.alarm(max_execution_time) - try: - if re.findall(pattern, content, re.MULTILINE|re.IGNORECASE): - publisher.warning('Regex match: {} ({})'.format(pattern, tag)) - # Sanitize tag to make it easy to read - tag = tag.strip().lower().replace(' ','-') - print('regex {} found'.format(tag)) - msg = 'infoleak:automatic-detection="regex-{}";{}'.format(tag, message) - p.populate_set_out(msg, 'Tags') - find = True - except TimeoutException: - print ("{0} processing timeout".format(paste.p_path)) - continue - else: - signal.alarm(0) - - if find: - #Send to duplicate - p.populate_set_out(message, 'Duplicate') - #send to Browse_warning_paste - msg = ('regex;{}'.format(message)) - print(message) - p.populate_set_out( msg, 'alertHandler') - - -if __name__ == '__main__': - global regexes - # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) - # Port of the redis instance used by pubsublogger - publisher.port = 6380 - # Script is the default channel used for the modules. - publisher.channel = 'Script' - - # Section name in bin/packages/modules.cfg - config_section = 'Regex' - - # Setup the I/O queues - p = Process(config_section) - max_execution_time = p.config.getint(config_section, "max_execution_time") - - # Sent to the logging a description of the module - publisher.info("Run Regex module ") - - # Load regular expressions from config file - regexes = load_regex(True) - - # Endless loop getting messages from the input queue - while True: - # Get one message from the input queue - message = p.get_from_set() - if message is None: - publisher.debug("{} queue is empty, waiting".format(config_section)) - time.sleep(1) - continue - - # Do something with the message from the queue - paste = Paste.Paste(message) - search_regex(paste) - - # (Optional) Send that thing to the next queue diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index c9a800b9..ace656cc 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -107,9 +107,6 @@ default_unnamed_feed_name = unnamed_feeder [RegexForTermsFrequency] max_execution_time = 60 -[Regex] -max_execution_time = 60 - ##### Redis ##### [Redis_Cache] host = localhost diff --git a/bin/packages/regex.cfg b/bin/packages/regex.cfg deleted file mode 100644 index dcc939c6..00000000 --- a/bin/packages/regex.cfg +++ /dev/null @@ -1,17 +0,0 @@ -# -# Regular expressions to be search in AIL -# -# Format: -# tag||regex -# -# tag: appened to the tag (ex: regex-tag) -# regex: the regular expression -# (Comments & empty lines are ignores) -# - -# Example1 -# Search for my name -personal-mention||xavier\s+mertens - -# Search for cicl.lu hostnames -circl||\w+\.circl\.lu