diff --git a/bin/Credential.py b/bin/Credential.py index ff8f8f97..233e424a 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -9,6 +9,18 @@ This module is consuming the Redis-list created by the Categ module. It apply credential regexes on paste content and warn if above a threshold. +It also split the username and store it into redis for searching purposes. + +Redis organization: + uniqNumForUsername: unique number attached to unique username + uniqNumForPath: unique number attached to unique path + -> uniqNum are used to avoid string duplication + AllCredentials: hashed set where keys are username and value are their uniq number + AllCredentialsRev: the opposite of AllCredentials, uniqNum -> username + AllPath: hashed set where keys are path and value are their uniq number + AllPathRev: the opposite of AllPath, uniqNum -> path + CredToPathMapping_uniqNumForUsername -> (set) -> uniqNumForPath + """ import time @@ -17,8 +29,20 @@ from packages import Paste from pubsublogger import publisher from Helper import Process import re +import redis from pyfaup.faup import Faup +#split username with spec. char or with upper case, distinguish start with upper +REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+" +REDIS_KEY_NUM_USERNAME = 'uniqNumForUsername' +REDIS_KEY_NUM_PATH = 'uniqNumForUsername' +REDIS_KEY_ALL_CRED_SET = 'AllCredentials' +REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev' +REDIS_KEY_ALL_PATH_SET = 'AllPath' +REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev' +REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping' +MINIMUMSIZETHRESHOLD = 3 + if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" @@ -27,6 +51,10 @@ if __name__ == "__main__": publisher.info("Find credentials") faup = Faup() + server_cred = redis.StrictRedis( + host=p.config.get("Redis_Level_DB_TermCred", "host"), + port=p.config.get("Redis_Level_DB_TermCred", "port"), + db=p.config.get("Redis_Level_DB_TermCred", "db")) critical = 8 @@ -37,6 +65,7 @@ if __name__ == "__main__": message = p.get_from_set() if message is None: publisher.debug("Script Credential is Idling 10s") + print('sleeping 10s') time.sleep(10) continue @@ -44,6 +73,7 @@ if __name__ == "__main__": if count < 5: # Less than 5 matches from the top password list, false positive. + print("false positive:", count) continue paste = Paste.Paste(filepath) @@ -63,6 +93,7 @@ if __name__ == "__main__": print('\n '.join(creds)) + #num of creds above tresh, publish an alert if len(creds) > critical: print("========> Found more than 10 credentials in this file : {}".format(filepath)) publisher.warning(to_print) @@ -97,3 +128,32 @@ if __name__ == "__main__": print("=======> Probably on : {}".format(', '.join(sites_set))) else: publisher.info(to_print) + print('found {} credentials'.format(len(creds))) + + + #for searching credential in termFreq + for cred in creds: + cred = cred.split('@')[0] #Split to ignore mail address + + #unique number attached to unique path + uniq_num_path = server_cred.incr(REDIS_KEY_NUM_PATH) + server_cred.hmset(REDIS_KEY_ALL_PATH_SET, {filepath: uniq_num_path}) + server_cred.hmset(REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: filepath}) + + #unique number attached to unique username + uniq_num_cred = server_cred.hget(REDIS_KEY_ALL_CRED_SET, cred) + if uniq_num_cred is None: #cred do not exist, create new entries + uniq_num_cred = server_cred.incr(REDIS_KEY_NUM_USERNAME) + server_cred.hmset(REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred}) + server_cred.hmset(REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred}) + + #Add the mapping between the credential and the path + server_cred.sadd(REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path) + + #Split credentials on capital letters, numbers, dots and so on + #Add the split to redis, each split point towards its initial credential unique number + splitedCred = re.findall(REGEX_CRED, cred) + for partCred in splitedCred: + if len(partCred) > MINIMUMSIZETHRESHOLD: + server_cred.sadd(partCred, uniq_num_cred) + diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 0e91a993..2e2d246d 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -97,6 +97,11 @@ host = localhost port = 6382 db = 2 +[Redis_Level_DB_TermCred] +host = localhost +port = 6382 +db = 5 + [Redis_Level_DB] host = localhost port = 2016 diff --git a/pip_packages_requirement.txt b/pip_packages_requirement.txt index 8bffb7b5..d6dd108d 100644 --- a/pip_packages_requirement.txt +++ b/pip_packages_requirement.txt @@ -21,6 +21,7 @@ nltk crcmod mmh3 ssdeep +python-Levenshtein #Others python-magic diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index c15e4dca..161e6130 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -53,6 +53,11 @@ r_serv_term = redis.StrictRedis( port=cfg.getint("Redis_Level_DB_TermFreq", "port"), db=cfg.getint("Redis_Level_DB_TermFreq", "db")) +r_serv_cred = redis.StrictRedis( + host=cfg.get("Redis_Level_DB_TermCred", "host"), + port=cfg.getint("Redis_Level_DB_TermCred", "port"), + db=cfg.getint("Redis_Level_DB_TermCred", "db")) + r_serv_pasteName = redis.StrictRedis( host=cfg.get("Redis_Paste_Name", "host"), port=cfg.getint("Redis_Paste_Name", "port"), diff --git a/var/www/modules/terms/Flask_terms.py b/var/www/modules/terms/Flask_terms.py index 3df9bede..e0209735 100644 --- a/var/www/modules/terms/Flask_terms.py +++ b/var/www/modules/terms/Flask_terms.py @@ -3,6 +3,8 @@ ''' Flask functions and routes for the trending modules page + + note: The matching of credential against supplied credential is done using Levenshtein distance ''' import redis import datetime @@ -11,6 +13,8 @@ import flask from flask import Flask, render_template, jsonify, request, Blueprint import re import Paste +from pprint import pprint +import Levenshtein # ============ VARIABLES ============ import Flask_config @@ -18,9 +22,11 @@ import Flask_config app = Flask_config.app cfg = Flask_config.cfg r_serv_term = Flask_config.r_serv_term +r_serv_cred = Flask_config.r_serv_cred terms = Blueprint('terms', __name__, template_folder='templates') +'''TERM''' DEFAULT_MATCH_PERCENT = 50 #tracked @@ -36,6 +42,19 @@ TrackedRegexDate_Name = "TrackedRegexDate" TrackedSetSet_Name = "TrackedSetSet" TrackedSetDate_Name = "TrackedSetDate" + +'''CRED''' +REGEX_CRED = '[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+' +REDIS_KEY_NUM_USERNAME = 'uniqNumForUsername' +REDIS_KEY_NUM_PATH = 'uniqNumForUsername' +REDIS_KEY_ALL_CRED_SET = 'AllCredentials' +REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev' +REDIS_KEY_ALL_PATH_SET = 'AllPath' +REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev' +REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping' + + + # ============ FUNCTIONS ============ def Term_getValueOverRange(word, startDate, num_day, per_paste=""): @@ -52,6 +71,56 @@ def Term_getValueOverRange(word, startDate, num_day, per_paste=""): passed_days += 1 return to_return +#Mix suplied username, if extensive is set, slice username(s) with different windows +def mixUserName(supplied, extensive=False): + #e.g.: John Smith + terms = supplied.split()[:2] + usernames = [] + if len(terms) == 1: + terms.append(' ') + + #john, smith, John, Smith, JOHN, SMITH + usernames += [terms[0].lower()] + usernames += [terms[1].lower()] + usernames += [terms[0][0].upper() + terms[0][1:].lower()] + usernames += [terms[1][0].upper() + terms[1][1:].lower()] + usernames += [terms[0].upper()] + usernames += [terms[1].upper()] + + #johnsmith, smithjohn, JOHNsmith, johnSMITH, SMITHjohn, smithJOHN + usernames += [(terms[0].lower() + terms[1].lower()).strip()] + usernames += [(terms[1].lower() + terms[0].lower()).strip()] + usernames += [(terms[0].upper() + terms[1].lower()).strip()] + usernames += [(terms[0].lower() + terms[1].upper()).strip()] + usernames += [(terms[1].upper() + terms[0].lower()).strip()] + usernames += [(terms[1].lower() + terms[0].upper()).strip()] + #Jsmith, JSmith, jsmith, jSmith, johnS, Js, JohnSmith, Johnsmith, johnSmith + usernames += [(terms[0][0].upper() + terms[1][0].lower() + terms[1][1:].lower()).strip()] + usernames += [(terms[0][0].upper() + terms[1][0].upper() + terms[1][1:].lower()).strip()] + usernames += [(terms[0][0].lower() + terms[1][0].lower() + terms[1][1:].lower()).strip()] + usernames += [(terms[0][0].lower() + terms[1][0].upper() + terms[1][1:].lower()).strip()] + usernames += [(terms[0].lower() + terms[1][0].upper()).strip()] + usernames += [(terms[0].upper() + terms[1][0].lower()).strip()] + usernames += [(terms[0][0].upper() + terms[0][1:].lower() + terms[1][0].upper() + terms[1][1:].lower()).strip()] + usernames += [(terms[0][0].upper() + terms[0][1:].lower() + terms[1][0].lower() + terms[1][1:].lower()).strip()] + usernames += [(terms[0][0].lower() + terms[0][1:].lower() + terms[1][0].upper() + terms[1][1:].lower()).strip()] + + if not extensive: + return usernames + + #Slice the supplied username(s) + mixedSupplied = supplied.replace(' ','') + minWindow = 3 if len(mixedSupplied)/2 < 4 else len(mixedSupplied)/2 + for winSize in range(3,len(mixedSupplied)): + for startIndex in range(0, len(mixedSupplied)-winSize): + usernames += [mixedSupplied[startIndex:startIndex+winSize]] + + filtered_usernames = [] + for usr in usernames: + if len(usr) > 2: + filtered_usernames.append(usr) + return filtered_usernames + # ============ ROUTES ============ @@ -282,9 +351,9 @@ def terms_plot_tool_data(): else: per_paste = "" - if term is None: return "None" + else: value_range = [] for timestamp in range(range_start, range_end+oneDay, oneDay): @@ -348,5 +417,100 @@ def terms_plot_top_data(): return jsonify(to_return) +@terms.route("/credentials_tracker/") +def credentials_tracker(): + return render_template("credentials_tracker.html") + +@terms.route("/credentials_management_query_paste/", methods=['GET', 'POST']) +def credentials_management_query_paste(): + cred = request.args.get('cred') + allPath = request.json['allPath'] + + paste_info = [] + for pathNum in allPath: + path = r_serv_cred.hget(REDIS_KEY_ALL_PATH_SET_REV, pathNum) + paste = Paste.Paste(path) + p_date = str(paste._get_p_date()) + p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] + p_source = paste.p_source + p_encoding = paste._get_p_encoding() + p_size = paste.p_size + p_mime = paste.p_mime + p_lineinfo = paste.get_lines_info() + p_content = paste.get_p_content().decode('utf-8', 'ignore') + if p_content != 0: + p_content = p_content[0:400] + paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) + + return jsonify(paste_info) + +@terms.route("/credentials_management_action/", methods=['GET']) +def cred_management_action(): + + supplied = request.args.get('term').encode('utf-8') + action = request.args.get('action') + section = request.args.get('section') + extensive = request.args.get('extensive') + extensive = True if extensive == "true" else False + + if extensive: + #collectDico + AllUsernameInRedis = r_serv_cred.hgetall(REDIS_KEY_ALL_CRED_SET).keys() + uniq_num_set = set() + if action == "seek": + possibilities = mixUserName(supplied, extensive) + for poss in possibilities: + num = r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET, poss) + if num is not None: + uniq_num_set.add(num) + for num in r_serv_cred.smembers(poss): + uniq_num_set.add(num) + #Extensive /!\ + if extensive: + iter_num = 0 + tot_iter = len(AllUsernameInRedis)*len(possibilities) + for tempUsername in AllUsernameInRedis: + for poss in possibilities: + #FIXME print progress + if(iter_num % int(tot_iter/20) == 0): + #print("searching: {}% done".format(int(iter_num/tot_iter*100)), sep=' ', end='\r', flush=True) + print("searching: {}% done".format(float(iter_num)/float(tot_iter)*100)) + iter_num += 1 + + if poss in tempUsername: + num = r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET, tempUsername) + if num is not None: + uniq_num_set.add(num) + for num in r_serv_cred.smembers(tempUsername): + uniq_num_set.add(num) + + data = {'usr': [], 'path': [], 'numPaste': [], 'simil': []} + for Unum in uniq_num_set: + levenRatio = 2.0 + username = r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET_REV, Unum) + + # Calculate Levenshtein distance, ignore negative ratio + supp_splitted = supplied.split() + supp_mixed = supplied.replace(' ','') + supp_splitted.append(supp_mixed) + for indiv_supplied in supp_splitted: + levenRatio = float(Levenshtein.ratio(indiv_supplied, username)) + levenRatioStr = "{:.1%}".format(levenRatio) + + data['usr'].append(username) + allPathNum = list(r_serv_cred.smembers(REDIS_KEY_MAP_CRED_TO_PATH+'_'+Unum)) + data['path'].append(allPathNum) + data['numPaste'].append(len(allPathNum)) + data['simil'].append(levenRatioStr) + + to_return = {} + to_return["section"] = section + to_return["action"] = action + to_return["term"] = supplied + to_return["data"] = data + + return jsonify(to_return) + + # ========= REGISTRATION ========= app.register_blueprint(terms) diff --git a/var/www/modules/terms/templates/credentials_tracker.html b/var/www/modules/terms/templates/credentials_tracker.html new file mode 100644 index 00000000..e95e7db2 --- /dev/null +++ b/var/www/modules/terms/templates/credentials_tracker.html @@ -0,0 +1,252 @@ + + + + + + + + Analysis Information Leak framework Dashboard + + + + + + + + + + + + + + + + + + + + + + + + + + {% include 'navbar.html' %} + +
+
+
+

Credential seeker

+
+ + +
+
+
+
+
+
+ Credential seeker +
+
+ + +
+ + + + +
+ + + + + + + + + + + + +
UsernameSimilarity# concerned paste(s)Action
+ +
+
+ +
+ +
+
+ +
+ +
+ + + + + + + + + diff --git a/var/www/modules/terms/templates/header_terms.html b/var/www/modules/terms/templates/header_terms.html index d834bb31..8fa38bc5 100644 --- a/var/www/modules/terms/templates/header_terms.html +++ b/var/www/modules/terms/templates/header_terms.html @@ -2,6 +2,7 @@ diff --git a/var/www/templates/header.html b/var/www/templates/header.html index 34682d07..1df983a0 100644 --- a/var/www/templates/header.html +++ b/var/www/templates/header.html @@ -20,6 +20,7 @@