From 9d0d0b4303747ddac540b1d4a0bec5fa580288cf Mon Sep 17 00:00:00 2001 From: Sami Mokaddem Date: Mon, 11 Dec 2017 17:28:34 +0100 Subject: [PATCH 1/3] update: Moved filtering operation (thresholds, number of matching in the categ file) in the configuration file. It permits to better control the flow of pastes. Also set default mixer duplicate filtering to 3 (Do not filter) --- bin/Categ.py | 3 ++- bin/Credential.py | 12 +++++++----- bin/Mixer.py | 2 +- bin/packages/config.cfg.sample | 16 ++++++++++++++-- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/bin/Categ.py b/bin/Categ.py index 986080d4..3bf68664 100755 --- a/bin/Categ.py +++ b/bin/Categ.py @@ -52,6 +52,7 @@ if __name__ == "__main__": config_section = 'Categ' p = Process(config_section) + matchingThreshold = p.config.getint("Categ", "matchingThreshold") # SCRIPT PARSER # parser = argparse.ArgumentParser(description='Start Categ module on files.') @@ -90,7 +91,7 @@ if __name__ == "__main__": for categ, pattern in tmp_dict.items(): found = set(re.findall(pattern, content)) - if len(found) > 0: + if len(found) >= matchingThreshold: msg = '{} {}'.format(paste.p_path, len(found)) print msg, categ p.populate_set_out(msg, categ) diff --git a/bin/Credential.py b/bin/Credential.py index bb52f311..29f80f88 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -41,7 +41,6 @@ REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev' REDIS_KEY_ALL_PATH_SET = 'AllPath' REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev' REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping' -MINIMUMSIZETHRESHOLD = 3 if __name__ == "__main__": publisher.port = 6380 @@ -49,6 +48,8 @@ if __name__ == "__main__": config_section = "Credential" p = Process(config_section) publisher.info("Find credentials") + + minimumLengthThreshold = p.config.getint("Credential", "minimumLengthThreshold") faup = Faup() server_cred = redis.StrictRedis( @@ -56,7 +57,8 @@ if __name__ == "__main__": port=p.config.get("Redis_Level_DB_TermCred", "port"), db=p.config.get("Redis_Level_DB_TermCred", "db")) - critical = 8 + criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert") + minTopPassList = p.config.getint("Credential", "minTopPassList") regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" @@ -71,7 +73,7 @@ if __name__ == "__main__": filepath, count = message.split() - if count < 5: + if count < minTopPassList: # Less than 5 matches from the top password list, false positive. print("false positive:", count) continue @@ -94,7 +96,7 @@ if __name__ == "__main__": print('\n '.join(creds)) #num of creds above tresh, publish an alert - if len(creds) > critical: + if len(creds) > criticalNumberToAlert: print("========> Found more than 10 credentials in this file : {}".format(filepath)) publisher.warning(to_print) #Send to duplicate @@ -154,6 +156,6 @@ if __name__ == "__main__": #Add the split to redis, each split point towards its initial credential unique number splitedCred = re.findall(REGEX_CRED, cred) for partCred in splitedCred: - if len(partCred) > MINIMUMSIZETHRESHOLD: + if len(partCred) > minimumLengthThreshold: server_cred.sadd(partCred, uniq_num_cred) diff --git a/bin/Mixer.py b/bin/Mixer.py index f2988a01..83475ac4 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -20,7 +20,7 @@ Depending on the configuration, this module will process the feed as follow: - Else, do not process it but keep track for statistics on duplicate operation_mode 3: "Don't look if duplicate" - - SImply do not bother to check if it is a duplicate + - Simply do not bother to check if it is a duplicate Note that the hash of the content is defined as the sha1(gzip64encoded). diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index da50932f..158c0491 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -30,6 +30,18 @@ default_display = 10 minute_processed_paste = 10 #### Modules #### +[Categ] +#Minimum number of match between the paste and the category file +matchingThreshold=1 + +[Credentials] +#Minimum length that a credential must have to be considered as such +minimumLengthThreshold=3 +#Will be pushed as alert if the number of credentials is greater to that number +criticalNumberToAlert=8 +#Will be considered as false positive if less that X matches from the top password list +minTopPassList=5 + [Modules_Duplicates] #Number of month to look back maximum_month_range = 3 @@ -45,8 +57,8 @@ min_paste_size = 0.3 threshold_stucked_module=600 [Module_Mixer] -#Define the configuration of the mixer, possible value: 1 or 2 -operation_mode = 1 +#Define the configuration of the mixer, possible value: 1, 2 or 3 +operation_mode = 3 #Define the time that a paste will be considerate duplicate. in seconds (1day = 86400) ttl_duplicate = 86400 From 860acbf7cf75ac6324bd197aa181a6998e04927f Mon Sep 17 00:00:00 2001 From: Sami Mokaddem Date: Tue, 12 Dec 2017 09:29:15 +0100 Subject: [PATCH 2/3] fix: Renamed Credentials into Credential sectiont title config change: By default, zmq listen to both local and CRF --- bin/packages/config.cfg.sample | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 158c0491..36fd7e7b 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -34,7 +34,7 @@ minute_processed_paste = 10 #Minimum number of match between the paste and the category file matchingThreshold=1 -[Credentials] +[Credential] #Minimum length that a credential must have to be considered as such minimumLengthThreshold=3 #Will be pushed as alert if the number of credentials is greater to that number @@ -151,7 +151,7 @@ maxDuplicateToPushToMISP=10 # e.g.: tcp://127.0.0.1:5556,tcp://127.0.0.1:5557 [ZMQ_Global] #address = tcp://crf.circl.lu:5556 -address = tcp://127.0.0.1:5556 +address = tcp://127.0.0.1:5556,tcp://crf.circl.lu:5556 channel = 102 bind = tcp://127.0.0.1:5556 From 05b22e896373ee155f8a93ac01a093b9cc4e4783 Mon Sep 17 00:00:00 2001 From: Sami Mokaddem Date: Tue, 12 Dec 2017 11:16:40 +0100 Subject: [PATCH 3/3] typo: added word + deleted trailing spaces --- bin/Mixer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/Mixer.py b/bin/Mixer.py index 83475ac4..4b846ebf 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -19,7 +19,7 @@ Depending on the configuration, this module will process the feed as follow: - Elseif, the saved content associated with the paste is not the same, process it - Else, do not process it but keep track for statistics on duplicate - operation_mode 3: "Don't look if duplicate" + operation_mode 3: "Don't look if duplicated content" - Simply do not bother to check if it is a duplicate Note that the hash of the content is defined as the sha1(gzip64encoded). @@ -126,7 +126,7 @@ if __name__ == '__main__': # Keep duplicate coming from different sources elif operation_mode == 2: - # Filter to avoid duplicate + # Filter to avoid duplicate content = server.get('HASH_'+paste_name) if content is None: # New content