update: Moved filtering operation (thresholds, number of matching in the

categ file) in the configuration file. It permits to better control the
flow of pastes.
Also set default mixer duplicate filtering to 3 (Do not filter)
This commit is contained in:
Sami Mokaddem 2017-12-11 17:28:34 +01:00
parent 9cab76cf88
commit 9d0d0b4303
4 changed files with 24 additions and 9 deletions

View file

@ -52,6 +52,7 @@ if __name__ == "__main__":
config_section = 'Categ'
p = Process(config_section)
matchingThreshold = p.config.getint("Categ", "matchingThreshold")
# SCRIPT PARSER #
parser = argparse.ArgumentParser(description='Start Categ module on files.')
@ -90,7 +91,7 @@ if __name__ == "__main__":
for categ, pattern in tmp_dict.items():
found = set(re.findall(pattern, content))
if len(found) > 0:
if len(found) >= matchingThreshold:
msg = '{} {}'.format(paste.p_path, len(found))
print msg, categ
p.populate_set_out(msg, categ)

View file

@ -41,7 +41,6 @@ REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev'
REDIS_KEY_ALL_PATH_SET = 'AllPath'
REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev'
REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping'
MINIMUMSIZETHRESHOLD = 3
if __name__ == "__main__":
publisher.port = 6380
@ -49,6 +48,8 @@ if __name__ == "__main__":
config_section = "Credential"
p = Process(config_section)
publisher.info("Find credentials")
minimumLengthThreshold = p.config.getint("Credential", "minimumLengthThreshold")
faup = Faup()
server_cred = redis.StrictRedis(
@ -56,7 +57,8 @@ if __name__ == "__main__":
port=p.config.get("Redis_Level_DB_TermCred", "port"),
db=p.config.get("Redis_Level_DB_TermCred", "db"))
critical = 8
criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert")
minTopPassList = p.config.getint("Credential", "minTopPassList")
regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
@ -71,7 +73,7 @@ if __name__ == "__main__":
filepath, count = message.split()
if count < 5:
if count < minTopPassList:
# Less than 5 matches from the top password list, false positive.
print("false positive:", count)
continue
@ -94,7 +96,7 @@ if __name__ == "__main__":
print('\n '.join(creds))
#num of creds above tresh, publish an alert
if len(creds) > critical:
if len(creds) > criticalNumberToAlert:
print("========> Found more than 10 credentials in this file : {}".format(filepath))
publisher.warning(to_print)
#Send to duplicate
@ -154,6 +156,6 @@ if __name__ == "__main__":
#Add the split to redis, each split point towards its initial credential unique number
splitedCred = re.findall(REGEX_CRED, cred)
for partCred in splitedCred:
if len(partCred) > MINIMUMSIZETHRESHOLD:
if len(partCred) > minimumLengthThreshold:
server_cred.sadd(partCred, uniq_num_cred)

View file

@ -20,7 +20,7 @@ Depending on the configuration, this module will process the feed as follow:
- Else, do not process it but keep track for statistics on duplicate
operation_mode 3: "Don't look if duplicate"
- SImply do not bother to check if it is a duplicate
- Simply do not bother to check if it is a duplicate
Note that the hash of the content is defined as the sha1(gzip64encoded).

View file

@ -30,6 +30,18 @@ default_display = 10
minute_processed_paste = 10
#### Modules ####
[Categ]
#Minimum number of match between the paste and the category file
matchingThreshold=1
[Credentials]
#Minimum length that a credential must have to be considered as such
minimumLengthThreshold=3
#Will be pushed as alert if the number of credentials is greater to that number
criticalNumberToAlert=8
#Will be considered as false positive if less that X matches from the top password list
minTopPassList=5
[Modules_Duplicates]
#Number of month to look back
maximum_month_range = 3
@ -45,8 +57,8 @@ min_paste_size = 0.3
threshold_stucked_module=600
[Module_Mixer]
#Define the configuration of the mixer, possible value: 1 or 2
operation_mode = 1
#Define the configuration of the mixer, possible value: 1, 2 or 3
operation_mode = 3
#Define the time that a paste will be considerate duplicate. in seconds (1day = 86400)
ttl_duplicate = 86400