diff --git a/bin/Credential.py b/bin/Credential.py index d8bb8a84..8c62f34a 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -48,7 +48,7 @@ if __name__ == "__main__": if sites_set: message += ' Related websites: {}'.format(', '.join(sites_set)) - to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message) + to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path) print('\n '.join(creds)) diff --git a/bin/CreditCards.py b/bin/CreditCards.py index 04ce9c62..6c9bf9c1 100755 --- a/bin/CreditCards.py +++ b/bin/CreditCards.py @@ -63,14 +63,14 @@ if __name__ == "__main__": to_print = 'CreditCard;{};{};{};'.format( paste.p_source, paste.p_date, paste.p_name) if (len(creditcard_set) > 0): - publisher.warning('{}Checked {} valid number(s)'.format( - to_print, len(creditcard_set))) + publisher.warning('{}Checked {} valid number(s);{}'.format( + to_print, len(creditcard_set), paste.p_path)) #Send to duplicate p.populate_set_out(filename, 'Duplicate') #send to Browse_warning_paste p.populate_set_out('creditcard;{}'.format(filename), 'BrowseWarningPaste') else: - publisher.info('{}CreditCard related'.format(to_print)) + publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path)) else: publisher.debug("Script creditcard is idling 1m") time.sleep(10) diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py index 8f316333..03ea8f96 100755 --- a/bin/CurveManageTopSets.py +++ b/bin/CurveManageTopSets.py @@ -17,6 +17,7 @@ Requirements import redis import time +import datetime import copy from pubsublogger import publisher from packages import lib_words @@ -44,13 +45,14 @@ def manage_top_set(): startDate = datetime.datetime.now() startDate = startDate.replace(hour=0, minute=0, second=0, microsecond=0) startDate = calendar.timegm(startDate.timetuple()) + blacklist_size = int(server_term.scard(BlackListTermsSet_Name)) dico = {} - # Retreive top data (2*max_card) from days sets + # Retreive top data (max_card + blacklist_size) from days sets for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay): curr_set = top_termFreq_setName_day[0] + str(timestamp) - array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality*2) + array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size) for word, value in array_top_day: if word not in server_term.smembers(BlackListTermsSet_Name): @@ -87,6 +89,11 @@ def manage_top_set(): for elem in array_month: server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0]) + timestamp = int(time.mktime(datetime.datetime.now().timetuple())) + value = str(timestamp) + ", " + "-" + r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) + print "refreshed module" + if __name__ == '__main__': @@ -105,6 +112,18 @@ if __name__ == '__main__': cfg = ConfigParser.ConfigParser() cfg.read(configfile) + + # For Module Manager + r_temp = redis.StrictRedis( + host=cfg.get('RedisPubSub', 'host'), + port=cfg.getint('RedisPubSub', 'port'), + db=cfg.getint('RedisPubSub', 'db')) + + timestamp = int(time.mktime(datetime.datetime.now().timetuple())) + value = str(timestamp) + ", " + "-" + r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) + r_temp.sadd("MODULE_TYPE_"+ "CurveManageTopSets" , str(os.getpid())) + server_term = redis.StrictRedis( host=cfg.get("Redis_Level_DB_TermFreq", "host"), port=cfg.getint("Redis_Level_DB_TermFreq", "port"), diff --git a/bin/DomClassifier.py b/bin/DomClassifier.py index 14a417f2..74522917 100755 --- a/bin/DomClassifier.py +++ b/bin/DomClassifier.py @@ -51,13 +51,13 @@ def main(): localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) - publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format( - PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld)) + publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) - publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format( - PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc)) + publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( diff --git a/bin/Duplicates.py b/bin/Duplicates.py index d2efcab3..50def29f 100755 --- a/bin/Duplicates.py +++ b/bin/Duplicates.py @@ -131,8 +131,10 @@ if __name__ == "__main__": # index of paste index_current = r_serv_dico.get(dico_hash) paste_path = r_serv_dico.get(index_current) + paste_date = r_serv_dico.get(index_current+'_date') + paste_date = paste_date if paste_date != None else "No date available" if paste_path != None: - hash_dico[dico_hash] = (hash_type, paste_path, percent) + hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date) print '['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent) except Exception,e: @@ -142,6 +144,7 @@ if __name__ == "__main__": # Add paste in DB after checking to prevent its analysis twice # hash_type_i -> index_i AND index_i -> PST.PATH r_serv1.set(index, PST.p_path) + r_serv1.set(index+'_date', PST._get_p_date()) r_serv1.sadd("INDEX", index) # Adding hashes in Redis for hash_type, paste_hash in paste_hashes.iteritems(): @@ -152,7 +155,7 @@ if __name__ == "__main__": # if there is data in this dictionnary if len(hash_dico) != 0: - # paste_tuple = (paste_path, percent) + # paste_tuple = (hash_type, date, paste_path, percent) for dico_hash, paste_tuple in hash_dico.items(): dupl.append(paste_tuple) @@ -162,7 +165,7 @@ if __name__ == "__main__": if dupl != []: PST.__setattr__("p_duplicate", dupl) PST.save_attribute_redis("p_duplicate", dupl) - publisher.info('{}Detected {}'.format(to_print, len(dupl))) + publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path)) print '{}Detected {}'.format(to_print, len(dupl)) y = time.time() diff --git a/bin/Mail.py b/bin/Mail.py index 2b3ed5fc..161082b0 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -55,9 +55,9 @@ if __name__ == "__main__": list(MX_values[1]))) pprint.pprint(MX_values) - to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.\ + to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\ format(PST.p_source, PST.p_date, PST.p_name, - MX_values[0]) + MX_values[0], PST.p_path) if MX_values[0] > is_critical: publisher.warning(to_print) #Send to duplicate diff --git a/bin/ModuleInformation.py b/bin/ModuleInformation.py index df07bf14..5eb81e84 100755 --- a/bin/ModuleInformation.py +++ b/bin/ModuleInformation.py @@ -24,27 +24,36 @@ import ConfigParser import json from terminaltables import AsciiTable import textwrap +from colorama import Fore, Back, Style, init # CONFIG VARIABLES -threshold_stucked_module = 60*60*1 #1 hour +threshold_stucked_module = 60*10*1 #1 hour +kill_retry_threshold = 60 #1m log_filename = "../logs/moduleInfo.log" command_search_pid = "ps a -o pid,cmd | grep {}" command_search_name = "ps a -o pid,cmd | grep {}" command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\"" +init() #Necesary for colorama +printarrayGlob = [None]*14 +printarrayGlob.insert(0, ["Time", "Module", "PID", "Action"]) +lastTimeKillCommand = {} def getPid(module): p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) for line in p.stdout: + print line splittedLine = line.split() if 'python2' in splittedLine: return int(splittedLine[0]) - else: - return None + return None def clearRedisModuleInfo(): for k in server.keys("MODULE_*"): server.delete(k) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, "*", "-", "Cleared redis module info"]) + printarrayGlob.pop() def cleanRedis(): for k in server.keys("MODULE_TYPE_*"): @@ -60,34 +69,93 @@ def cleanRedis(): if not flag_pid_valid: print flag_pid_valid, 'cleaning', pid, 'in', k server.srem(k, pid) - time.sleep(5) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k]) + printarrayGlob.pop() + #time.sleep(5) -def kill_module(module): +def kill_module(module, pid): print '' print '-> trying to kill module:', module - pid = getPid(module) + if pid is None: + print 'pid was None' + printarrayGlob.insert(1, [0, module, pid, "PID was None"]) + printarrayGlob.pop() + pid = getPid(module) + else: #Verify that the pid is at least in redis + if server.exists("MODULE_"+module+"_"+str(pid)) == 0: + return + + lastTimeKillCommand[pid] = int(time.time()) if pid is not None: - os.kill(pid, signal.SIGUSR1) + try: + os.kill(pid, signal.SIGUSR1) + except OSError: + print pid, 'already killed' + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"]) + printarrayGlob.pop() + return time.sleep(1) if getPid(module) is None: print module, 'has been killed' print 'restarting', module, '...' p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) + printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"]) + printarrayGlob.pop() + printarrayGlob.pop() else: print 'killing failed, retrying...' - time.sleep(3) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."]) + printarrayGlob.pop() + + time.sleep(1) os.kill(pid, signal.SIGUSR1) time.sleep(1) if getPid(module) is None: print module, 'has been killed' print 'restarting', module, '...' p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) + printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"]) + printarrayGlob.pop() + printarrayGlob.pop() else: print 'killing failed!' - time.sleep(7) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"]) + printarrayGlob.pop() + else: + print 'Module does not exist' + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"]) + printarrayGlob.pop() + #time.sleep(5) + cleanRedis() + +def get_color(time, idle): + if time is not None: + temp = time.split(':') + time = int(temp[0])*3600 + int(temp[1])*60 + int(temp[2]) + + if time >= threshold_stucked_module: + if not idle: + return Back.RED + Style.BRIGHT + else: + return Back.MAGENTA + Style.BRIGHT + elif time > threshold_stucked_module/2: + return Back.YELLOW + Style.BRIGHT + else: + return Back.GREEN + Style.BRIGHT + else: + return Style.RESET_ALL if __name__ == "__main__": @@ -108,6 +176,8 @@ if __name__ == "__main__": cfg = ConfigParser.ConfigParser() cfg.read(configfile) + threshold_stucked_module = cfg.getint("Module_ModuleInformation", "threshold_stucked_module") + # REDIS # server = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), @@ -120,11 +190,14 @@ if __name__ == "__main__": lastTime = datetime.datetime.now() module_file_array = set() + no_info_modules = {} path_allmod = os.path.join(os.environ['AIL_HOME'], 'doc/all_modules.txt') with open(path_allmod, 'r') as module_file: for line in module_file: module_file_array.add(line[:-1]) + cleanRedis() + while True: all_queue = set() @@ -135,6 +208,7 @@ if __name__ == "__main__": all_queue.add(queue) key = "MODULE_" + queue + "_" keySet = "MODULE_TYPE_" + queue + array_module_type = [] for moduleNum in server.smembers(keySet): value = server.get(key + str(moduleNum)) @@ -148,20 +222,41 @@ if __name__ == "__main__": if int((datetime.datetime.now() - startTime_readable).total_seconds()) > threshold_stucked_module: log = open(log_filename, 'a') log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n") - if args.autokill == 1: - kill_module(queue) + try: + last_kill_try = time.time() - lastTimeKillCommand[moduleNum] + except KeyError: + last_kill_try = kill_retry_threshold+1 + if args.autokill == 1 and last_kill_try > kill_retry_threshold : + kill_module(queue, int(moduleNum)) - printarray1.append([str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path)]) + array_module_type.append([get_color(processed_time_readable, False) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, False)]) else: - printarray2.append([str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path)]) + printarray2.append([get_color(processed_time_readable, True) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, True)]) + array_module_type.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) + for e in array_module_type: + printarray1.append(e) for curr_queue in module_file_array: if curr_queue not in all_queue: - printarray3.append([curr_queue, "Not running"]) + printarray3.append([curr_queue, "Not running"]) + else: + if len(list(server.smembers('MODULE_TYPE_'+curr_queue))) == 0: + if curr_queue not in no_info_modules: + no_info_modules[curr_queue] = int(time.time()) + printarray3.append([curr_queue, "No data"]) + else: + #If no info since long time, try to kill + if args.autokill == 1 and int(time.time()) - no_info_modules[curr_queue] > threshold_stucked_module: + kill_module(curr_queue, None) + no_info_modules[curr_queue] = int(time.time()) + printarray3.append([curr_queue, "Stuck or idle, restarting in " + str(threshold_stucked_module - (int(time.time()) - no_info_modules[curr_queue])) + "s"]) - printarray1.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) - printarray2.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) + + #printarray1.sort(lambda x,y: cmp(x[0], y[0]), reverse=False) + printarray1.sort(key=lambda x: x[0][9:], reverse=False) + #printarray2.sort(lambda x,y: cmp(x[0], y[0]), reverse=False) + printarray2.sort(key=lambda x: x[0][9:], reverse=False) printarray1.insert(0,["Queue", "PID", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"]) printarray2.insert(0,["Queue", "PID","Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"]) printarray3.insert(0,["Queue", "State"]) @@ -204,11 +299,21 @@ if __name__ == "__main__": t3 = AsciiTable(printarray3, title="Not running queues") t3.column_max_width(1) + printarray4 = [] + for elem in printarrayGlob: + if elem is not None: + printarray4.append(elem) + + t4 = AsciiTable(printarray4, title="Last actions") + t4.column_max_width(1) + print t1.table print '\n' print t2.table print '\n' print t3.table + print '\n' + print t4.table if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5: lastTime = datetime.datetime.now() diff --git a/bin/Onion.py b/bin/Onion.py index cdfb44bc..1680a244 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -133,8 +133,8 @@ if __name__ == "__main__": PST.p_name) if len(domains_list) > 0: - publisher.warning('{}Detected {} .onion(s)'.format( - to_print, len(domains_list))) + publisher.warning('{}Detected {} .onion(s);{}'.format( + to_print, len(domains_list),PST.p_path)) now = datetime.datetime.now() path = os.path.join('onions', str(now.year).zfill(4), str(now.month).zfill(2), @@ -144,9 +144,9 @@ if __name__ == "__main__": PST.p_date, PST.p_name) for url in fetch(p, r_cache, urls, domains_list, path): - publisher.warning('{}Checked {}'.format(to_print, url)) + publisher.warning('{}Checked {};{}'.format(to_print, url, PST.p_path)) else: - publisher.info('{}Onion related'.format(to_print)) + publisher.info('{}Onion related;{}'.format(to_print, PST.p_path)) prec_filename = filename else: diff --git a/bin/Release.py b/bin/Release.py index 309efe67..ce30ea3f 100755 --- a/bin/Release.py +++ b/bin/Release.py @@ -34,7 +34,7 @@ if __name__ == "__main__": if len(releases) == 0: continue - to_print = 'Release;{};{};{};{} releases'.format(paste.p_source, paste.p_date, paste.p_name, len(releases)) + to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path) if len(releases) > 30: publisher.warning(to_print) else: diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index b2d002fe..5e1f1dce 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -69,7 +69,7 @@ def analyse(url, path): if (result_path > 1) or (result_query > 1): print "Detected SQL in URL: " print urllib2.unquote(url) - to_print = 'SQLInjection;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL") + to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) publisher.warning(to_print) #Send to duplicate p.populate_set_out(path, 'Duplicate') diff --git a/bin/Web.py b/bin/Web.py index 9892697c..49790185 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -131,8 +131,8 @@ if __name__ == "__main__": list(A_values[1]))) pprint.pprint(A_values) - publisher.info('Url;{};{};{};Checked {} URL'.format( - PST.p_source, PST.p_date, PST.p_name, A_values[0])) + publisher.info('Url;{};{};{};Checked {} URL;{}'.format( + PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_path)) prec_filename = filename else: diff --git a/bin/WebStats.py b/bin/WebStats.py index d8ff0876..1c41b64d 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -38,35 +38,55 @@ def get_date_range(num_day): date_list.append(date.substract_day(i)) return date_list +# Compute the progression for one keyword +def compute_progression_word(keyword): + date_range = get_date_range(num_day) + # check if this keyword is eligible for progression + keyword_total_sum = 0 + value_list = [] + for date in date_range: # get value up to date_range + curr_value = server.hget(keyword, date) + value_list.append(int(curr_value if curr_value is not None else 0)) + keyword_total_sum += int(curr_value) if curr_value is not None else 0 + oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division + + # The progression is based on the ratio: value[i] / value[i-1] + keyword_increase = 0 + value_list_reversed = value_list[:] + value_list_reversed.reverse() + for i in range(1, len(value_list_reversed)): + divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1 + keyword_increase += value_list_reversed[i] / divisor + + return (keyword_increase, keyword_total_sum) + + +''' + recompute the set top_progression zset + - Compute the current field progression + - re-compute the current progression for each first 2*max_set_cardinality fields in the top_progression_zset +''' def compute_progression(server, field_name, num_day, url_parsed): - redis_progression_name = 'top_progression_'+field_name - redis_progression_name_set = 'top_progression_'+field_name+'_set' + redis_progression_name_set = "z_top_progression_"+field_name keyword = url_parsed[field_name] if keyword is not None: - date_range = get_date_range(num_day) - # check if this keyword is eligible for progression - keyword_total_sum = 0 - value_list = [] - for date in date_range: # get value up to date_range - curr_value = server.hget(keyword, date) - value_list.append(int(curr_value if curr_value is not None else 0)) - keyword_total_sum += int(curr_value) if curr_value is not None else 0 - oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division + #compute the progression of the current word + keyword_increase, keyword_total_sum = compute_progression_word(keyword) - # The progression is based on the ratio: value[i] / value[i-1] - keyword_increase = 0 - value_list_reversed = value_list[:] - value_list_reversed.reverse() - for i in range(1, len(value_list_reversed)): - divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1 - keyword_increase += value_list_reversed[i] / divisor + #re-compute the progression of 2*max_set_cardinality + current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality) + for word, value in array_top_day: + word_inc, word_tot_sum = compute_progression_word(word) + server.zrem(redis_progression_name_set, word) + if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase): + server.zadd(redis_progression_name_set, float(word_inc), word) - # filter + # filter before adding if (keyword_total_sum > threshold_total_sum) and (keyword_increase > threshold_increase): - - server.zadd("z_top_progression_"+field_name, float(keyword_increase), keyword) + server.zadd(redis_progression_name_set, float(keyword_increase), keyword) + if __name__ == '__main__': diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index e74b5da2..566cf22c 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -36,6 +36,9 @@ threshold_duplicate_tlsh = 100 #Minimum size of the paste considered min_paste_size = 0.3 +[Module_ModuleInformation] +#Threshold to deduce if a module is stuck or not, in seconds. +threshold_stucked_module=600 ##### Redis ##### [Redis_Cache] diff --git a/installing_deps.sh b/installing_deps.sh index 9ef7480a..ec11f9f2 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -104,6 +104,7 @@ python setup.py install # Download the necessary NLTK corpora and sentiment vader HOME=$(pwd) python -m textblob.download_corpora python -m nltk.downloader vader_lexicon +python -m nltk.downloader punkt #Create the file all_module and update the graph in doc $AIL_HOME/doc/generate_modules_data_flow_graph.sh diff --git a/pip_packages_requirement.txt b/pip_packages_requirement.txt index f6602653..e95df123 100644 --- a/pip_packages_requirement.txt +++ b/pip_packages_requirement.txt @@ -11,6 +11,7 @@ numpy matplotlib networkx terminaltables +colorama #Tokeniser nltk diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index f3f9d71a..2ee00295 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -12,203 +12,34 @@ import flask import os import sys sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +sys.path.append('./Flasks/') import Paste from Date import Date +# Import config +import Flask_config + # CONFIG # -tlsh_to_percent = 1000.0 #Use to display the estimated percentage instead of a raw value +cfg = Flask_config.cfg -configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') -if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - -cfg = ConfigParser.ConfigParser() -cfg.read(configfile) - -max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip -max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal - -# REDIS # -r_serv = redis.StrictRedis( - host=cfg.get("Redis_Queues", "host"), - port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) - -r_serv_log = redis.StrictRedis( - host=cfg.get("Redis_Log", "host"), - port=cfg.getint("Redis_Log", "port"), - db=cfg.getint("Redis_Log", "db")) - -r_serv_charts = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_Trending", "host"), - port=cfg.getint("Redis_Level_DB_Trending", "port"), - db=cfg.getint("Redis_Level_DB_Trending", "db")) - -r_serv_db = redis.StrictRedis( - host=cfg.get("Redis_Level_DB", "host"), - port=cfg.getint("Redis_Level_DB", "port"), - db=cfg.getint("Redis_Level_DB", "db")) - -r_serv_sentiment = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_Sentiment", "host"), - port=cfg.getint("Redis_Level_DB_Sentiment", "port"), - db=cfg.getint("Redis_Level_DB_Sentiment", "db")) - -r_serv_term = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_TermFreq", "host"), - port=cfg.getint("Redis_Level_DB_TermFreq", "port"), - db=cfg.getint("Redis_Level_DB_TermFreq", "db")) - -r_serv_pasteName = redis.StrictRedis( - host=cfg.get("Redis_Paste_Name", "host"), - port=cfg.getint("Redis_Paste_Name", "port"), - db=cfg.getint("Redis_Paste_Name", "db")) - - -app = Flask(__name__, static_url_path='/static/') - - -def event_stream(): - pubsub = r_serv_log.pubsub() - pubsub.psubscribe("Script" + '.*') - for msg in pubsub.listen(): - level = msg['channel'].split('.')[1] - if msg['type'] == 'pmessage' and level != "DEBUG": - yield 'data: %s\n\n' % json.dumps(msg) - - -def get_queues(r): - # We may want to put the llen in a pipeline to do only one query. - newData = [] - for queue, card in r.hgetall("queues").iteritems(): - key = "MODULE_" + queue + "_" - keySet = "MODULE_TYPE_" + queue - - for moduleNum in r.smembers(keySet): - - value = r.get(key + str(moduleNum)) - if value is not None: - timestamp, path = value.split(", ") - if timestamp is not None: - startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) - processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] - seconds = int((datetime.datetime.now() - startTime_readable).total_seconds()) - newData.append( (queue, card, seconds, moduleNum) ) - else: - newData.append( (queue, cards, 0, moduleNum) ) - - return newData +Flask_config.app = Flask(__name__, static_url_path='/static/') +app = Flask_config.app +# import routes and functions from modules +import Flask_dashboard +import Flask_trendingcharts +import Flask_trendingmodules +import Flask_browsepastes +import Flask_sentiment +import Flask_terms +import Flask_search +import Flask_showpaste def list_len(s): return len(s) app.jinja_env.filters['list_len'] = list_len -def showpaste(content_range): - requested_path = request.args.get('paste', '') - paste = Paste.Paste(requested_path) - p_date = str(paste._get_p_date()) - p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] - p_source = paste.p_source - p_encoding = paste._get_p_encoding() - p_language = paste._get_p_language() - p_size = paste.p_size - p_mime = paste.p_mime - p_lineinfo = paste.get_lines_info() - p_content = paste.get_p_content().decode('utf-8', 'ignore') - p_duplicate_full_list = json.loads(paste._get_p_duplicate()) - p_duplicate_list = [] - p_simil_list = [] - p_hashtype_list = [] - - - for dup_list in p_duplicate_full_list: - if dup_list[0] == "tlsh": - dup_list[2] = int(((tlsh_to_percent - float(dup_list[2])) / tlsh_to_percent)*100) - else: - dup_list[2] = int(dup_list[2]) - - p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True) - - # Combine multiple duplicate paste name and format for display - new_dup_list = [] - dup_list_removed = [] - for dup_list_index in range(0, len(p_duplicate_full_list)): - if dup_list_index in dup_list_removed: - continue - indices = [i for i, x in enumerate(p_duplicate_full_list) if x[1] == p_duplicate_full_list[dup_list_index][1]] - hash_types = [] - comp_vals = [] - for i in indices: - hash_types.append(p_duplicate_full_list[i][0].encode('utf8')) - comp_vals.append(p_duplicate_full_list[i][2]) - dup_list_removed.append(i) - - hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types) - comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals) - new_dup_list.append([hash_types.replace("'", ""), p_duplicate_full_list[dup_list_index][1], comp_vals]) - - # Create the list to pass to the webpage - for dup_list in new_dup_list: - hash_type, path, simil_percent = dup_list - p_duplicate_list.append(path) - p_simil_list.append(simil_percent) - p_hashtype_list.append(hash_type) - - if content_range != 0: - p_content = p_content[0:content_range] - - - return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list) - -def getPastebyType(server, module_name): - all_path = [] - for path in server.smembers('WARNING_'+module_name): - all_path.append(path) - return all_path - - -def get_date_range(num_day): - curr_date = datetime.date.today() - date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) - date_list = [] - - for i in range(0, num_day+1): - date_list.append(date.substract_day(i)) - return date_list - -# Iterate over elements in the module provided and return the today data or the last data -# return format: [('passed_days', num_of_passed_days), ('elem_name1', elem_value1), ('elem_name2', elem_value2)]] -def get_top_relevant_data(server, module_name): - days = 0 - for date in get_date_range(15): - redis_progression_name_set = 'top_'+ module_name +'_set_' + date - member_set = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True) - if len(member_set) == 0: #No data for this date - days += 1 - else: - member_set.insert(0, ("passed_days", days)) - return member_set - - -def Term_getValueOverRange(word, startDate, num_day): - passed_days = 0 - oneDay = 60*60*24 - to_return = [] - curr_to_return = 0 - for timestamp in range(startDate, startDate - max(num_day)*oneDay, -oneDay): - value = r_serv_term.hget(timestamp, word) - curr_to_return += int(value) if value is not None else 0 - for i in num_day: - if passed_days == i-1: - to_return.append(curr_to_return) - passed_days += 1 - return to_return - - # ========= CACHE CONTROL ======== @app.after_request def add_header(response): @@ -220,532 +51,7 @@ def add_header(response): response.headers['Cache-Control'] = 'public, max-age=0' return response -# ============ ROUTES ============ - -@app.route("/_logs") -def logs(): - return flask.Response(event_stream(), mimetype="text/event-stream") - - -@app.route("/_stuff", methods=['GET']) -def stuff(): - return jsonify(row1=get_queues(r_serv)) - -@app.route("/_progressionCharts", methods=['GET']) -def progressionCharts(): - attribute_name = request.args.get('attributeName') - trending_name = request.args.get('trendingName') - bar_requested = True if request.args.get('bar') == "true" else False - - if (bar_requested): - num_day = int(request.args.get('days')) - bar_values = [] - - date_range = get_date_range(num_day) - # Retreive all data from the last num_day - for date in date_range: - curr_value = r_serv_charts.hget(attribute_name, date) - bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) - bar_values.insert(0, attribute_name) - return jsonify(bar_values) - - else: - redis_progression_name = "z_top_progression_" + trending_name - keyw_value = r_serv_charts.zrevrangebyscore(redis_progression_name, '+inf', '-inf', withscores=True, start=0, num=10) - return jsonify(keyw_value) - -@app.route("/_moduleCharts", methods=['GET']) -def modulesCharts(): - keyword_name = request.args.get('keywordName') - module_name = request.args.get('moduleName') - bar_requested = True if request.args.get('bar') == "true" else False - - if (bar_requested): - num_day = int(request.args.get('days')) - bar_values = [] - - date_range = get_date_range(num_day) - # Retreive all data from the last num_day - for date in date_range: - curr_value = r_serv_charts.hget(date, module_name+'-'+keyword_name) - bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) - bar_values.insert(0, keyword_name) - return jsonify(bar_values) - - else: - member_set = get_top_relevant_data(r_serv_charts, module_name) - if len(member_set) == 0: - member_set.append(("No relevant data", int(100))) - return jsonify(member_set) - - -@app.route("/_providersChart", methods=['GET']) -def providersChart(): - keyword_name = request.args.get('keywordName') - module_name = request.args.get('moduleName') - bar_requested = True if request.args.get('bar') == "true" else False - - if (bar_requested): - num_day = int(request.args.get('days')) - bar_values = [] - - date_range = get_date_range(num_day) - # Retreive all data from the last num_day - for date in date_range: - curr_value_size = r_serv_charts.hget(keyword_name+'_'+'size', date) - curr_value_num = r_serv_charts.hget(keyword_name+'_'+'num', date) - curr_value_size_avg = r_serv_charts.hget(keyword_name+'_'+'avg', date) - if module_name == "size": - curr_value = float(curr_value_size_avg if curr_value_size_avg is not None else 0) - else: - curr_value = float(curr_value_num if curr_value_num is not None else 0.0) - - bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], curr_value]) - bar_values.insert(0, keyword_name) - return jsonify(bar_values) - - else: - #redis_provider_name_set = 'top_size_set' if module_name == "size" else 'providers_set' - redis_provider_name_set = 'top_avg_size_set_' if module_name == "size" else 'providers_set_' - redis_provider_name_set = redis_provider_name_set + get_date_range(0)[0] - - member_set = r_serv_charts.zrevrangebyscore(redis_provider_name_set, '+inf', '-inf', withscores=True, start=0, num=8) - # Member set is a list of (value, score) pairs - if len(member_set) == 0: - member_set.append(("No relevant data", float(100))) - return jsonify(member_set) - - - -@app.route("/search", methods=['POST']) -def search(): - query = request.form['query'] - q = [] - q.append(query) - r = [] #complete path - c = [] #preview of the paste content - paste_date = [] - paste_size = [] - - # Search filename - print r_serv_pasteName.smembers(q[0]) - for path in r_serv_pasteName.smembers(q[0]): - print path - r.append(path) - paste = Paste.Paste(path) - content = paste.get_p_content().decode('utf8', 'ignore') - content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - c.append(content[0:content_range]) - curr_date = str(paste._get_p_date()) - curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - paste_date.append(curr_date) - paste_size.append(paste._get_p_size()) - - # Search full line - from whoosh import index - from whoosh.fields import Schema, TEXT, ID - schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) - - indexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path")) - ix = index.open_dir(indexpath) - from whoosh.qparser import QueryParser - with ix.searcher() as searcher: - query = QueryParser("content", ix.schema).parse(" ".join(q)) - results = searcher.search(query, limit=None) - for x in results: - r.append(x.items()[0][1]) - paste = Paste.Paste(x.items()[0][1]) - content = paste.get_p_content().decode('utf8', 'ignore') - content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - c.append(content[0:content_range]) - curr_date = str(paste._get_p_date()) - curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - paste_date.append(curr_date) - paste_size.append(paste._get_p_size()) - return render_template("search.html", r=r, c=c, query=request.form['query'], paste_date=paste_date, paste_size=paste_size, char_to_display=max_preview_modal) - - -@app.route("/") -def index(): - default_minute = cfg.get("Flask", "minute_processed_paste") - return render_template("index.html", default_minute = default_minute) - - -@app.route("/monitoring/") -def monitoring(): - for queue in r_serv.smembers("queues"): - return render_template("Queue_live_Monitoring.html", last_value=queue) - - -@app.route("/wordstrending/") -def wordstrending(): - default_display = cfg.get("Flask", "default_display") - return render_template("Wordstrending.html", default_display = default_display) - - -@app.route("/protocolstrending/") -def protocolstrending(): - default_display = cfg.get("Flask", "default_display") - return render_template("Protocolstrending.html", default_display = default_display) - - -@app.route("/trending/") -def trending(): - default_display = cfg.get("Flask", "default_display") - return render_template("Trending.html", default_display = default_display) - -@app.route("/browseImportantPaste/", methods=['GET']) -def browseImportantPaste(): - module_name = request.args.get('moduleName') - return render_template("browse_important_paste.html") - - -@app.route("/importantPasteByModule/", methods=['GET']) -def importantPasteByModule(): - module_name = request.args.get('moduleName') - - all_content = [] - paste_date = [] - paste_linenum = [] - all_path = [] - - for path in getPastebyType(r_serv_db, module_name): - all_path.append(path) - paste = Paste.Paste(path) - content = paste.get_p_content().decode('utf8', 'ignore') - content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - all_content.append(content[0:content_range]) - curr_date = str(paste._get_p_date()) - curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - paste_date.append(curr_date) - paste_linenum.append(paste.get_lines_info()[0]) - - return render_template("important_paste_by_module.html", all_path=all_path, content=all_content, paste_date=paste_date, paste_linenum=paste_linenum, char_to_display=max_preview_modal) - -@app.route("/moduletrending/") -def moduletrending(): - return render_template("Moduletrending.html") - -@app.route("/sentiment_analysis_trending/") -def sentiment_analysis_trending(): - return render_template("sentiment_analysis_trending.html") - - -@app.route("/sentiment_analysis_getplotdata/", methods=['GET']) -def sentiment_analysis_getplotdata(): - # Get the top providers based on number of pastes - oneHour = 60*60 - sevenDays = oneHour*24*7 - dateStart = datetime.datetime.now() - dateStart = dateStart.replace(minute=0, second=0, microsecond=0) - dateStart_timestamp = calendar.timegm(dateStart.timetuple()) - - getAllProviders = request.args.get('getProviders') - provider = request.args.get('provider') - allProvider = request.args.get('all') - if getAllProviders == 'True': - if allProvider == "True": - range_providers = r_serv_charts.smembers('all_provider_set') - return jsonify(list(range_providers)) - else: - range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(0)[0], '+inf', '-inf', start=0, num=8) - # if empty, get yesterday top providers - range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(1)[1], '+inf', '-inf', start=0, num=8) if range_providers == [] else range_providers - # if still empty, takes from all providers - if range_providers == []: - print 'today provider empty' - range_providers = r_serv_charts.smembers('all_provider_set') - return jsonify(range_providers) - - elif provider is not None: - to_return = {} - - cur_provider_name = provider + '_' - list_date = {} - for cur_timestamp in range(int(dateStart_timestamp), int(dateStart_timestamp)-sevenDays-oneHour, -oneHour): - cur_set_name = cur_provider_name + str(cur_timestamp) - - list_value = [] - for cur_id in r_serv_sentiment.smembers(cur_set_name): - cur_value = r_serv_sentiment.get(cur_id) - list_value.append(cur_value) - list_date[cur_timestamp] = list_value - to_return[provider] = list_date - - return jsonify(to_return) - return "Bad request" - - - -@app.route("/sentiment_analysis_plot_tool/") -def sentiment_analysis_plot_tool(): - return render_template("sentiment_analysis_plot_tool.html") - - - -@app.route("/sentiment_analysis_plot_tool_getdata/", methods=['GET']) -def sentiment_analysis_plot_tool_getdata(): - getProviders = request.args.get('getProviders') - - if getProviders == 'True': - providers = [] - for cur_provider in r_serv_charts.smembers('all_provider_set'): - providers.append(cur_provider) - return jsonify(providers) - - else: - query = request.args.get('query') - query = query.split(',') - Qdate = request.args.get('Qdate') - - date1 = (Qdate.split('-')[0]).split('.') - date1 = datetime.date(int(date1[2]), int(date1[1]), int(date1[0])) - - date2 = (Qdate.split('-')[1]).split('.') - date2 = datetime.date(int(date2[2]), int(date2[1]), int(date2[0])) - - timestamp1 = calendar.timegm(date1.timetuple()) - timestamp2 = calendar.timegm(date2.timetuple()) - - oneHour = 60*60 - oneDay = oneHour*24 - - to_return = {} - for cur_provider in query: - list_date = {} - cur_provider_name = cur_provider + '_' - for cur_timestamp in range(int(timestamp1), int(timestamp2)+oneDay, oneHour): - cur_set_name = cur_provider_name + str(cur_timestamp) - - list_value = [] - for cur_id in r_serv_sentiment.smembers(cur_set_name): - cur_value = r_serv_sentiment.get(cur_id) - list_value.append(cur_value) - list_date[cur_timestamp] = list_value - to_return[cur_provider] = list_date - - return jsonify(to_return) - - -@app.route("/terms_management/") -def terms_management(): - TrackedTermsSet_Name = "TrackedSetTermSet" - BlackListTermsSet_Name = "BlackListSetTermSet" - TrackedTermsDate_Name = "TrackedTermDate" - BlackListTermsDate_Name = "BlackListTermDate" - - today = datetime.datetime.now() - today = today.replace(hour=0, minute=0, second=0, microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - - track_list = [] - track_list_values = [] - track_list_num_of_paste = [] - for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): - track_list.append(tracked_term) - value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31]) - - term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) - - set_paste_name = "tracked_" + tracked_term - track_list_num_of_paste.append(r_serv_term.scard(set_paste_name)) - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(term_date) - track_list_values.append(value_range) - - - black_list = [] - for blacked_term in r_serv_term.smembers(BlackListTermsSet_Name): - term_date = r_serv_term.hget(BlackListTermsDate_Name, blacked_term) - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - black_list.append([blacked_term, term_date]) - - return render_template("terms_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste) - - -@app.route("/terms_management_query_paste/") -def terms_management_query_paste(): - term = request.args.get('term') - TrackedTermsSet_Name = "TrackedSetTermSet" - paste_info = [] - - set_paste_name = "tracked_" + term - track_list_path = r_serv_term.smembers(set_paste_name) - - for path in track_list_path: - paste = Paste.Paste(path) - p_date = str(paste._get_p_date()) - p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] - p_source = paste.p_source - p_encoding = paste._get_p_encoding() - p_size = paste.p_size - p_mime = paste.p_mime - p_lineinfo = paste.get_lines_info() - p_content = paste.get_p_content().decode('utf-8', 'ignore') - if p_content != 0: - p_content = p_content[0:400] - paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) - - return jsonify(paste_info) - - -@app.route("/terms_management_query/") -def terms_management_query(): - TrackedTermsDate_Name = "TrackedTermDate" - BlackListTermsDate_Name = "BlackListTermDate" - term = request.args.get('term') - section = request.args.get('section') - - today = datetime.datetime.now() - today = today.replace(hour=0, minute=0, second=0, microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - value_range = Term_getValueOverRange(term, today_timestamp, [1, 7, 31]) - - if section == "followTerm": - term_date = r_serv_term.hget(TrackedTermsDate_Name, term) - elif section == "blacklistTerm": - term_date = r_serv_term.hget(BlackListTermsDate_Name, term) - - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(str(term_date)) - return jsonify(value_range) - - -@app.route("/terms_management_action/", methods=['GET']) -def terms_management_action(): - TrackedTermsSet_Name = "TrackedSetTermSet" - TrackedTermsDate_Name = "TrackedTermDate" - BlackListTermsDate_Name = "BlackListTermDate" - BlackListTermsSet_Name = "BlackListSetTermSet" - - today = datetime.datetime.now() - today = today.replace(microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - - - section = request.args.get('section') - action = request.args.get('action') - term = request.args.get('term') - if action is None or term is None: - return "None" - else: - if section == "followTerm": - if action == "add": - r_serv_term.sadd(TrackedTermsSet_Name, term.lower()) - r_serv_term.hset(TrackedTermsDate_Name, term, today_timestamp) - else: - r_serv_term.srem(TrackedTermsSet_Name, term.lower()) - elif section == "blacklistTerm": - if action == "add": - r_serv_term.sadd(BlackListTermsSet_Name, term.lower()) - r_serv_term.hset(BlackListTermsDate_Name, term, today_timestamp) - else: - r_serv_term.srem(BlackListTermsSet_Name, term.lower()) - else: - return "None" - - to_return = {} - to_return["section"] = section - to_return["action"] = action - to_return["term"] = term - return jsonify(to_return) - - - -@app.route("/terms_plot_tool/") -def terms_plot_tool(): - term = request.args.get('term') - if term is not None: - return render_template("terms_plot_tool.html", term=term) - else: - return render_template("terms_plot_tool.html", term="") - - -@app.route("/terms_plot_tool_data/") -def terms_plot_tool_data(): - oneDay = 60*60*24 - range_start = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_start')))) if request.args.get('range_start') is not None else 0; - range_start = range_start.replace(hour=0, minute=0, second=0, microsecond=0) - range_start = calendar.timegm(range_start.timetuple()) - range_end = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_end')))) if request.args.get('range_end') is not None else 0; - range_end = range_end.replace(hour=0, minute=0, second=0, microsecond=0) - range_end = calendar.timegm(range_end.timetuple()) - term = request.args.get('term') - - if term is None: - return "None" - else: - value_range = [] - for timestamp in range(range_start, range_end+oneDay, oneDay): - value = r_serv_term.hget(timestamp, term) - curr_value_range = int(value) if value is not None else 0 - value_range.append([timestamp, curr_value_range]) - return jsonify(value_range) - - -@app.route("/terms_plot_top/") -def terms_plot_top(): - return render_template("terms_plot_top.html") - - -@app.route("/terms_plot_top_data/") -def terms_plot_top_data(): - oneDay = 60*60*24 - today = datetime.datetime.now() - today = today.replace(hour=0, minute=0, second=0, microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - - set_day = "TopTermFreq_set_day_" + str(today_timestamp) - set_week = "TopTermFreq_set_week"; - set_month = "TopTermFreq_set_month"; - - the_set = request.args.get('set') - num_day = int(request.args.get('num_day')) - if the_set is None: - return "None" - else: - to_return = [] - if the_set == "TopTermFreq_set_day": - the_set += "_" + str(today_timestamp) - - for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20): - position = {} - position['day'] = r_serv_term.zrevrank(set_day, term) - position['day'] = position['day']+1 if position['day'] is not None else "<20" - position['week'] = r_serv_term.zrevrank(set_week, term) - position['week'] = position['week']+1 if position['week'] is not None else "<20" - position['month'] = r_serv_term.zrevrank(set_month, term) - position['month'] = position['month']+1 if position['month'] is not None else "<20" - value_range = [] - for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay): - value = r_serv_term.hget(timestamp, term) - curr_value_range = int(value) if value is not None else 0 - value_range.append([timestamp, curr_value_range]) - - to_return.append([term, value_range, tot_value, position]) - - return jsonify(to_return) - - - -@app.route("/showsavedpaste/") #completely shows the paste in a new tab -def showsavedpaste(): - return showpaste(0) - - -@app.route("/showpreviewpaste/") -def showpreviewpaste(): - return showpaste(max_preview_modal) - - -@app.route("/getmoredata/") -def getmoredata(): - requested_path = request.args.get('paste', '') - paste = Paste.Paste(requested_path) - p_content = paste.get_p_content().decode('utf-8', 'ignore') - to_return = p_content[max_preview_modal-1:] - return to_return - +# ============ MAIN ============ if __name__ == "__main__": app.run(host='0.0.0.0', port=7000, threaded=True) diff --git a/var/www/Flasks/Flask_browsepastes.py b/var/www/Flasks/Flask_browsepastes.py new file mode 100644 index 00000000..b393ab9e --- /dev/null +++ b/var/www/Flasks/Flask_browsepastes.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import json +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +max_preview_char = Flask_config.max_preview_char +max_preview_modal = Flask_config.max_preview_modal +r_serv_db = Flask_config.r_serv_db +# ============ FUNCTIONS ============ + +def getPastebyType(server, module_name): + all_path = [] + for path in server.smembers('WARNING_'+module_name): + all_path.append(path) + return all_path + + +def event_stream_getImportantPasteByModule(module_name): + index = 0 + all_pastes_list = getPastebyType(r_serv_db, module_name) + for path in all_pastes_list: + index += 1 + paste = Paste.Paste(path) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + data = {} + data["module"] = module_name + data["index"] = index + data["path"] = path + data["content"] = content[0:content_range] + data["linenum"] = paste.get_lines_info()[0] + data["date"] = curr_date + data["char_to_display"] = max_preview_modal + data["finished"] = True if index == len(all_pastes_list) else False + yield 'retry: 100000\ndata: %s\n\n' % json.dumps(data) #retry to avoid reconnection of the browser + +# ============ ROUTES ============ + +@app.route("/browseImportantPaste/", methods=['GET']) +def browseImportantPaste(): + module_name = request.args.get('moduleName') + return render_template("browse_important_paste.html") + + +@app.route("/importantPasteByModule/", methods=['GET']) +def importantPasteByModule(): + module_name = request.args.get('moduleName') + + all_content = [] + paste_date = [] + paste_linenum = [] + all_path = [] + allPastes = getPastebyType(r_serv_db, module_name) + + for path in allPastes[0:10]: + all_path.append(path) + paste = Paste.Paste(path) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + all_content.append(content[0:content_range].replace("\"", "\'").replace("\r", " ").replace("\n", " ")) + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + paste_date.append(curr_date) + paste_linenum.append(paste.get_lines_info()[0]) + + if len(allPastes) > 10: + finished = False + else: + finished = True + + return render_template("important_paste_by_module.html", + moduleName=module_name, + all_path=all_path, + content=all_content, + paste_date=paste_date, + paste_linenum=paste_linenum, + char_to_display=max_preview_modal, + finished=finished) + +@app.route("/_getImportantPasteByModule") +def getImportantPasteByModule(): + module_name = request.args.get('moduleName') + return flask.Response(event_stream_getImportantPasteByModule(module_name), mimetype="text/event-stream") + + diff --git a/var/www/Flasks/Flask_config.py b/var/www/Flasks/Flask_config.py new file mode 100644 index 00000000..c15e4dca --- /dev/null +++ b/var/www/Flasks/Flask_config.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask global variables shared accross modules +''' +import ConfigParser +import redis +import os + +# FLASK # +app = None + +# CONFIG # +configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') +if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + +cfg = ConfigParser.ConfigParser() +cfg.read(configfile) + + +# REDIS # +r_serv = redis.StrictRedis( + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) + +r_serv_log = redis.StrictRedis( + host=cfg.get("Redis_Log", "host"), + port=cfg.getint("Redis_Log", "port"), + db=cfg.getint("Redis_Log", "db")) + +r_serv_charts = redis.StrictRedis( + host=cfg.get("Redis_Level_DB_Trending", "host"), + port=cfg.getint("Redis_Level_DB_Trending", "port"), + db=cfg.getint("Redis_Level_DB_Trending", "db")) + +r_serv_db = redis.StrictRedis( + host=cfg.get("Redis_Level_DB", "host"), + port=cfg.getint("Redis_Level_DB", "port"), + db=cfg.getint("Redis_Level_DB", "db")) + +r_serv_sentiment = redis.StrictRedis( + host=cfg.get("Redis_Level_DB_Sentiment", "host"), + port=cfg.getint("Redis_Level_DB_Sentiment", "port"), + db=cfg.getint("Redis_Level_DB_Sentiment", "db")) + +r_serv_term = redis.StrictRedis( + host=cfg.get("Redis_Level_DB_TermFreq", "host"), + port=cfg.getint("Redis_Level_DB_TermFreq", "port"), + db=cfg.getint("Redis_Level_DB_TermFreq", "db")) + +r_serv_pasteName = redis.StrictRedis( + host=cfg.get("Redis_Paste_Name", "host"), + port=cfg.getint("Redis_Paste_Name", "port"), + db=cfg.getint("Redis_Paste_Name", "db")) + +# VARIABLES # +max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip +max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal + +tlsh_to_percent = 1000.0 #Use to display the estimated percentage instead of a raw value diff --git a/var/www/Flasks/Flask_corpus.py b/var/www/Flasks/Flask_corpus.py new file mode 100644 index 00000000..7805e66e --- /dev/null +++ b/var/www/Flasks/Flask_corpus.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import datetime +import calendar +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_corpus = Flask_config.r_serv_corpus +# ============ FUNCTIONS ============ + +def Corpus_getValueOverRange(word, startDate, num_day): + passed_days = 0 + oneDay = 60*60*24 + to_return = [] + curr_to_return = 0 + for timestamp in range(startDate, startDate - max(num_day)*oneDay, -oneDay): + value = r_serv_corpus.hget(timestamp, word) + curr_to_return += int(value) if value is not None else 0 + for i in num_day: + if passed_days == i-1: + to_return.append(curr_to_return) + passed_days += 1 + return to_return + + +# ============ ROUTES ============ + +@app.route("/corpus_management/") +def corpus_management(): + TrackedCorpusSet_Name = "TrackedSetCorpusSet" + TrackedCorpusDate_Name = "TrackedCorpusDate" + + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + track_list = [] + track_list_values = [] + track_list_num_of_paste = [] + for tracked_corpus in r_serv_corpus.smembers(TrackedCorpusSet_Name): + track_list.append(tracked_corpus) + value_range = Corpus_getValueOverRange(tracked_corpus, today_timestamp, [1, 7, 31]) + + corpus_date = r_serv_corpus.hget(TrackedCorpusDate_Name, tracked_corpus) + + set_paste_name = "tracked_" + tracked_corpus + track_list_num_of_paste.append(r_serv_corpus.scard(set_paste_name)) + corpus_date = datetime.datetime.utcfromtimestamp(int(corpus_date)) if corpus_date is not None else "No date recorded" + value_range.append(corpus_date) + track_list_values.append(value_range) + + + return render_template("corpus_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste) + + +@app.route("/corpus_management_query_paste/") +def corpus_management_query_paste(): + corpus = request.args.get('corpus') + TrackedCorpusSet_Name = "TrackedSetCorpusSet" + paste_info = [] + + set_paste_name = "tracked_" + corpus + track_list_path = r_serv_corpus.smembers(set_paste_name) + + for path in track_list_path: + paste = Paste.Paste(path) + p_date = str(paste._get_p_date()) + p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] + p_source = paste.p_source + p_encoding = paste._get_p_encoding() + p_size = paste.p_size + p_mime = paste.p_mime + p_lineinfo = paste.get_lines_info() + p_content = paste.get_p_content().decode('utf-8', 'ignore') + if p_content != 0: + p_content = p_content[0:400] + paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) + + return jsonify(paste_info) + + +@app.route("/corpus_management_query/") +def corpus_management_query(): + TrackedCorpusDate_Name = "TrackedCorpusDate" + corpus = request.args.get('corpus') + + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + value_range = corpus_getValueOverRange(corpus, today_timestamp, [1, 7, 31]) + + corpus_date = r_serv_corpus.hget(TrackedCorpusDate_Name, corpus) + + corpus_date = datetime.datetime.utcfromtimestamp(int(corpus_date)) if corpus_date is not None else "No date recorded" + value_range.append(str(corpus_date)) + return jsonify(value_range) + + +@app.route("/corpus_management_action/", methods=['GET']) +def corpus_management_action(): + TrackedCorpusSet_Name = "TrackedSetCorpusSet" + TrackedCorpusDate_Name = "TrackedCorpusDate" + + today = datetime.datetime.now() + today = today.replace(microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + + section = request.args.get('section') + action = request.args.get('action') + corpus = request.args.get('corpus') + if action is None or corpus is None: + return "None" + else: + if section == "followCorpus": + if action == "add": + r_serv_corpus.sadd(TrackedCorpusSet_Name, corpus.lower()) + r_serv_corpus.hset(TrackedCorpusDate_Name, corpus, today_timestamp) + else: + r_serv_corpus.srem(TrackedCorpusSet_Name, corpus.lower()) + else: + return "None" + + to_return = {} + to_return["section"] = section + to_return["action"] = action + to_return["corpus"] = corpus + return jsonify(to_return) + + + +@app.route("/corpus_plot_tool/") +def corpus_plot_tool(): + corpus = request.args.get('corpus') + if corpus is not None: + return render_template("corpus_plot_tool.html", corpus=corpus) + else: + return render_template("corpus_plot_tool.html", corpus="") + + +@app.route("/corpus_plot_tool_data/") +def corpus_plot_tool_data(): + oneDay = 60*60*24 + range_start = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_start')))) if request.args.get('range_start') is not None else 0; + range_start = range_start.replace(hour=0, minute=0, second=0, microsecond=0) + range_start = calendar.timegm(range_start.timetuple()) + range_end = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_end')))) if request.args.get('range_end') is not None else 0; + range_end = range_end.replace(hour=0, minute=0, second=0, microsecond=0) + range_end = calendar.timegm(range_end.timetuple()) + corpus = request.args.get('corpus') + + if corpus is None: + return "None" + else: + value_range = [] + for timestamp in range(range_start, range_end+oneDay, oneDay): + value = r_serv_corpus.hget(timestamp, corpus) + curr_value_range = int(value) if value is not None else 0 + value_range.append([timestamp, curr_value_range]) + value_range.insert(0,corpus) + return jsonify(value_range) + + +@app.route("/corpus_plot_top/") +def corpus_plot_top(): + return render_template("corpus_plot_top.html") + + +@app.route("/corpus_plot_top_data/") +def corpus_plot_top_data(): + oneDay = 60*60*24 + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + set_day = "TopCorpusFreq_set_day_" + str(today_timestamp) + set_week = "TopCorpusFreq_set_week"; + set_month = "TopCorpusFreq_set_month"; + + the_set = request.args.get('set') + num_day = int(request.args.get('num_day')) + if the_set is None: + return "None" + else: + to_return = [] + if the_set == "TopCorpusFreq_set_day": + the_set += "_" + str(today_timestamp) + + for corpus, tot_value in r_serv_corpus.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20): + position = {} + position['day'] = r_serv_corpus.zrevrank(set_day, corpus) + position['day'] = position['day']+1 if position['day'] is not None else "<20" + position['week'] = r_serv_corpus.zrevrank(set_week, corpus) + position['week'] = position['week']+1 if position['week'] is not None else "<20" + position['month'] = r_serv_corpus.zrevrank(set_month, corpus) + position['month'] = position['month']+1 if position['month'] is not None else "<20" + value_range = [] + for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay): + value = r_serv_corpus.hget(timestamp, corpus) + curr_value_range = int(value) if value is not None else 0 + value_range.append([timestamp, curr_value_range]) + + to_return.append([corpus, value_range, tot_value, position]) + + return jsonify(to_return) + + diff --git a/var/www/Flasks/Flask_dashboard.py b/var/www/Flasks/Flask_dashboard.py new file mode 100644 index 00000000..79307f9c --- /dev/null +++ b/var/www/Flasks/Flask_dashboard.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the dashboard page +''' +import json + +import datetime +import flask +from flask import Flask, render_template, jsonify, request + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv = Flask_config.r_serv +r_serv_log = Flask_config.r_serv_log +# ============ FUNCTIONS ============ + +def event_stream(): + pubsub = r_serv_log.pubsub() + pubsub.psubscribe("Script" + '.*') + for msg in pubsub.listen(): + level = msg['channel'].split('.')[1] + if msg['type'] == 'pmessage' and level != "DEBUG": + yield 'data: %s\n\n' % json.dumps(msg) + +def get_queues(r): + # We may want to put the llen in a pipeline to do only one query. + newData = [] + for queue, card in r.hgetall("queues").iteritems(): + key = "MODULE_" + queue + "_" + keySet = "MODULE_TYPE_" + queue + + for moduleNum in r.smembers(keySet): + + value = r.get(key + str(moduleNum)) + if value is not None: + timestamp, path = value.split(", ") + if timestamp is not None: + startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) + processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] + seconds = int((datetime.datetime.now() - startTime_readable).total_seconds()) + newData.append( (queue, card, seconds, moduleNum) ) + else: + newData.append( (queue, cards, 0, moduleNum) ) + + return newData + +# ============ ROUTES ============ + +@app.route("/_logs") +def logs(): + return flask.Response(event_stream(), mimetype="text/event-stream") + + +@app.route("/_stuff", methods=['GET']) +def stuff(): + return jsonify(row1=get_queues(r_serv)) + + +@app.route("/") +def index(): + default_minute = cfg.get("Flask", "minute_processed_paste") + threshold_stucked_module = cfg.getint("Module_ModuleInformation", "threshold_stucked_module") + return render_template("index.html", default_minute = default_minute, threshold_stucked_module=threshold_stucked_module) diff --git a/var/www/Flasks/Flask_search.py b/var/www/Flasks/Flask_search.py new file mode 100644 index 00000000..b5c60898 --- /dev/null +++ b/var/www/Flasks/Flask_search.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import json +import os +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_pasteName = Flask_config.r_serv_pasteName +max_preview_char = Flask_config.max_preview_char +max_preview_modal = Flask_config.max_preview_modal +# ============ FUNCTIONS ============ + + +# ============ ROUTES ============ + +@app.route("/search", methods=['POST']) +def search(): + query = request.form['query'] + q = [] + q.append(query) + r = [] #complete path + c = [] #preview of the paste content + paste_date = [] + paste_size = [] + num_elem_to_get = 50 + + # Search filename + for path in r_serv_pasteName.smembers(q[0]): + r.append(path) + paste = Paste.Paste(path) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + c.append(content[0:content_range]) + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + paste_date.append(curr_date) + paste_size.append(paste._get_p_size()) + + # Search full line + from whoosh import index + from whoosh.fields import Schema, TEXT, ID + schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) + + indexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path")) + ix = index.open_dir(indexpath) + from whoosh.qparser import QueryParser + with ix.searcher() as searcher: + query = QueryParser("content", ix.schema).parse(" ".join(q)) + results = searcher.search_page(query, 1, pagelen=num_elem_to_get) + for x in results: + r.append(x.items()[0][1]) + paste = Paste.Paste(x.items()[0][1]) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + c.append(content[0:content_range]) + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + paste_date.append(curr_date) + paste_size.append(paste._get_p_size()) + results = searcher.search(query) + num_res = len(results) + + return render_template("search.html", r=r, c=c, query=request.form['query'], paste_date=paste_date, paste_size=paste_size, char_to_display=max_preview_modal, num_res=num_res) + + +@app.route("/get_more_search_result", methods=['POST']) +def get_more_search_result(): + query = request.form['query'] + q = [] + q.append(query) + page_offset = int(request.form['page_offset']) + num_elem_to_get = 50 + + path_array = [] + preview_array = [] + date_array = [] + size_array = [] + + from whoosh import index + from whoosh.fields import Schema, TEXT, ID + schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) + + indexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path")) + ix = index.open_dir(indexpath) + from whoosh.qparser import QueryParser + with ix.searcher() as searcher: + query = QueryParser("content", ix.schema).parse(" ".join(q)) + results = searcher.search_page(query, page_offset, num_elem_to_get) + for x in results: + path_array.append(x.items()[0][1]) + paste = Paste.Paste(x.items()[0][1]) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + preview_array.append(content[0:content_range]) + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + date_array.append(curr_date) + size_array.append(paste._get_p_size()) + to_return = {} + to_return["path_array"] = path_array + to_return["preview_array"] = preview_array + to_return["date_array"] = date_array + to_return["size_array"] = size_array + print "len(path_array)="+str(len(path_array)) + if len(path_array) < num_elem_to_get: #pagelength + to_return["moreData"] = False + else: + to_return["moreData"] = True + + return jsonify(to_return) + + diff --git a/var/www/Flasks/Flask_sentiment.py b/var/www/Flasks/Flask_sentiment.py new file mode 100644 index 00000000..275cce39 --- /dev/null +++ b/var/www/Flasks/Flask_sentiment.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import datetime +import calendar +from Date import Date +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_charts = Flask_config.r_serv_charts +r_serv_sentiment = Flask_config.r_serv_sentiment +# ============ FUNCTIONS ============ + +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return date_list + + +# ============ ROUTES ============ + +@app.route("/sentiment_analysis_trending/") +def sentiment_analysis_trending(): + return render_template("sentiment_analysis_trending.html") + + +@app.route("/sentiment_analysis_getplotdata/", methods=['GET']) +def sentiment_analysis_getplotdata(): + # Get the top providers based on number of pastes + oneHour = 60*60 + sevenDays = oneHour*24*7 + dateStart = datetime.datetime.now() + dateStart = dateStart.replace(minute=0, second=0, microsecond=0) + dateStart_timestamp = calendar.timegm(dateStart.timetuple()) + + getAllProviders = request.args.get('getProviders') + provider = request.args.get('provider') + allProvider = request.args.get('all') + if getAllProviders == 'True': + if allProvider == "True": + range_providers = r_serv_charts.smembers('all_provider_set') + return jsonify(list(range_providers)) + else: + range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(0)[0], '+inf', '-inf', start=0, num=8) + # if empty, get yesterday top providers + range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(1)[1], '+inf', '-inf', start=0, num=8) if range_providers == [] else range_providers + # if still empty, takes from all providers + if range_providers == []: + print 'today provider empty' + range_providers = r_serv_charts.smembers('all_provider_set') + return jsonify(list(range_providers)) + + elif provider is not None: + to_return = {} + + cur_provider_name = provider + '_' + list_date = {} + for cur_timestamp in range(int(dateStart_timestamp), int(dateStart_timestamp)-sevenDays-oneHour, -oneHour): + cur_set_name = cur_provider_name + str(cur_timestamp) + + list_value = [] + for cur_id in r_serv_sentiment.smembers(cur_set_name): + cur_value = r_serv_sentiment.get(cur_id) + list_value.append(cur_value) + list_date[cur_timestamp] = list_value + to_return[provider] = list_date + + return jsonify(to_return) + return "Bad request" + + + +@app.route("/sentiment_analysis_plot_tool/") +def sentiment_analysis_plot_tool(): + return render_template("sentiment_analysis_plot_tool.html") + + + +@app.route("/sentiment_analysis_plot_tool_getdata/", methods=['GET']) +def sentiment_analysis_plot_tool_getdata(): + getProviders = request.args.get('getProviders') + + if getProviders == 'True': + providers = [] + for cur_provider in r_serv_charts.smembers('all_provider_set'): + providers.append(cur_provider) + return jsonify(providers) + + else: + query = request.args.get('query') + query = query.split(',') + Qdate = request.args.get('Qdate') + + date1 = (Qdate.split('-')[0]).split('.') + date1 = datetime.date(int(date1[2]), int(date1[1]), int(date1[0])) + + date2 = (Qdate.split('-')[1]).split('.') + date2 = datetime.date(int(date2[2]), int(date2[1]), int(date2[0])) + + timestamp1 = calendar.timegm(date1.timetuple()) + timestamp2 = calendar.timegm(date2.timetuple()) + + oneHour = 60*60 + oneDay = oneHour*24 + + to_return = {} + for cur_provider in query: + list_date = {} + cur_provider_name = cur_provider + '_' + for cur_timestamp in range(int(timestamp1), int(timestamp2)+oneDay, oneHour): + cur_set_name = cur_provider_name + str(cur_timestamp) + + list_value = [] + for cur_id in r_serv_sentiment.smembers(cur_set_name): + cur_value = r_serv_sentiment.get(cur_id) + list_value.append(cur_value) + list_date[cur_timestamp] = list_value + to_return[cur_provider] = list_date + + return jsonify(to_return) + + diff --git a/var/www/Flasks/Flask_showpaste.py b/var/www/Flasks/Flask_showpaste.py new file mode 100644 index 00000000..71e2a4e2 --- /dev/null +++ b/var/www/Flasks/Flask_showpaste.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import json +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_pasteName = Flask_config.r_serv_pasteName +max_preview_char = Flask_config.max_preview_char +max_preview_modal = Flask_config.max_preview_modal +tlsh_to_percent = Flask_config.tlsh_to_percent +# ============ FUNCTIONS ============ + +def showpaste(content_range): + requested_path = request.args.get('paste', '') + paste = Paste.Paste(requested_path) + p_date = str(paste._get_p_date()) + p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] + p_source = paste.p_source + p_encoding = paste._get_p_encoding() + p_language = paste._get_p_language() + p_size = paste.p_size + p_mime = paste.p_mime + p_lineinfo = paste.get_lines_info() + p_content = paste.get_p_content().decode('utf-8', 'ignore') + p_duplicate_full_list = json.loads(paste._get_p_duplicate()) + p_duplicate_list = [] + p_simil_list = [] + p_date_list = [] + p_hashtype_list = [] + + + for dup_list in p_duplicate_full_list: + if dup_list[0] == "tlsh": + dup_list[2] = int(((tlsh_to_percent - float(dup_list[2])) / tlsh_to_percent)*100) + else: + dup_list[2] = int(dup_list[2]) + + p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True) + + # Combine multiple duplicate paste name and format for display + new_dup_list = [] + dup_list_removed = [] + for dup_list_index in range(0, len(p_duplicate_full_list)): + if dup_list_index in dup_list_removed: + continue + indices = [i for i, x in enumerate(p_duplicate_full_list) if x[1] == p_duplicate_full_list[dup_list_index][1]] + hash_types = [] + comp_vals = [] + for i in indices: + hash_types.append(p_duplicate_full_list[i][0].encode('utf8')) + comp_vals.append(p_duplicate_full_list[i][2]) + dup_list_removed.append(i) + + hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types) + comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals) + if len(p_duplicate_full_list[dup_list_index]) > 3: + try: + date_paste = str(int(p_duplicate_full_list[dup_list_index][3])) + date_paste = date_paste[0:4]+"-"+date_paste[4:6]+"-"+date_paste[6:8] + except ValueError: + date_paste = str(p_duplicate_full_list[dup_list_index][3]) + else: + date_paste = "No date available" + new_dup_list.append([hash_types.replace("'", ""), p_duplicate_full_list[dup_list_index][1], comp_vals, date_paste]) + + # Create the list to pass to the webpage + for dup_list in new_dup_list: + hash_type, path, simil_percent, date_paste = dup_list + p_duplicate_list.append(path) + p_simil_list.append(simil_percent) + p_hashtype_list.append(hash_type) + p_date_list.append(date_paste) + + if content_range != 0: + p_content = p_content[0:content_range] + + + return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list) + + + +# ============ ROUTES ============ + +@app.route("/showsavedpaste/") #completely shows the paste in a new tab +def showsavedpaste(): + return showpaste(0) + + +@app.route("/showpreviewpaste/") +def showpreviewpaste(): + num = request.args.get('num', '') + return "|num|"+num+"|num|"+showpaste(max_preview_modal) + + +@app.route("/getmoredata/") +def getmoredata(): + requested_path = request.args.get('paste', '') + paste = Paste.Paste(requested_path) + p_content = paste.get_p_content().decode('utf-8', 'ignore') + to_return = p_content[max_preview_modal-1:] + return to_return + diff --git a/var/www/Flasks/Flask_terms.py b/var/www/Flasks/Flask_terms.py new file mode 100644 index 00000000..f5416ddc --- /dev/null +++ b/var/www/Flasks/Flask_terms.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import datetime +import calendar +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_term = Flask_config.r_serv_term +# ============ FUNCTIONS ============ + +def Term_getValueOverRange(word, startDate, num_day): + passed_days = 0 + oneDay = 60*60*24 + to_return = [] + curr_to_return = 0 + for timestamp in range(startDate, startDate - max(num_day)*oneDay, -oneDay): + value = r_serv_term.hget(timestamp, word) + curr_to_return += int(value) if value is not None else 0 + for i in num_day: + if passed_days == i-1: + to_return.append(curr_to_return) + passed_days += 1 + return to_return + + +# ============ ROUTES ============ + +@app.route("/terms_management/") +def terms_management(): + TrackedTermsSet_Name = "TrackedSetTermSet" + BlackListTermsSet_Name = "BlackListSetTermSet" + TrackedTermsDate_Name = "TrackedTermDate" + BlackListTermsDate_Name = "BlackListTermDate" + + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + track_list = [] + track_list_values = [] + track_list_num_of_paste = [] + for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): + track_list.append(tracked_term) + value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31]) + + term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) + + set_paste_name = "tracked_" + tracked_term + track_list_num_of_paste.append(r_serv_term.scard(set_paste_name)) + term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" + value_range.append(term_date) + track_list_values.append(value_range) + + + black_list = [] + for blacked_term in r_serv_term.smembers(BlackListTermsSet_Name): + term_date = r_serv_term.hget(BlackListTermsDate_Name, blacked_term) + term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" + black_list.append([blacked_term, term_date]) + + return render_template("terms_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste) + + +@app.route("/terms_management_query_paste/") +def terms_management_query_paste(): + term = request.args.get('term') + TrackedTermsSet_Name = "TrackedSetTermSet" + paste_info = [] + + set_paste_name = "tracked_" + term + track_list_path = r_serv_term.smembers(set_paste_name) + + for path in track_list_path: + paste = Paste.Paste(path) + p_date = str(paste._get_p_date()) + p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] + p_source = paste.p_source + p_encoding = paste._get_p_encoding() + p_size = paste.p_size + p_mime = paste.p_mime + p_lineinfo = paste.get_lines_info() + p_content = paste.get_p_content().decode('utf-8', 'ignore') + if p_content != 0: + p_content = p_content[0:400] + paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) + + return jsonify(paste_info) + + +@app.route("/terms_management_query/") +def terms_management_query(): + TrackedTermsDate_Name = "TrackedTermDate" + BlackListTermsDate_Name = "BlackListTermDate" + term = request.args.get('term') + section = request.args.get('section') + + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + value_range = Term_getValueOverRange(term, today_timestamp, [1, 7, 31]) + + if section == "followTerm": + term_date = r_serv_term.hget(TrackedTermsDate_Name, term) + elif section == "blacklistTerm": + term_date = r_serv_term.hget(BlackListTermsDate_Name, term) + + term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" + value_range.append(str(term_date)) + return jsonify(value_range) + + +@app.route("/terms_management_action/", methods=['GET']) +def terms_management_action(): + TrackedTermsSet_Name = "TrackedSetTermSet" + TrackedTermsDate_Name = "TrackedTermDate" + BlackListTermsDate_Name = "BlackListTermDate" + BlackListTermsSet_Name = "BlackListSetTermSet" + + today = datetime.datetime.now() + today = today.replace(microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + + section = request.args.get('section') + action = request.args.get('action') + term = request.args.get('term') + if action is None or term is None: + return "None" + else: + if section == "followTerm": + if action == "add": + r_serv_term.sadd(TrackedTermsSet_Name, term.lower()) + r_serv_term.hset(TrackedTermsDate_Name, term, today_timestamp) + else: + r_serv_term.srem(TrackedTermsSet_Name, term.lower()) + elif section == "blacklistTerm": + if action == "add": + r_serv_term.sadd(BlackListTermsSet_Name, term.lower()) + r_serv_term.hset(BlackListTermsDate_Name, term, today_timestamp) + else: + r_serv_term.srem(BlackListTermsSet_Name, term.lower()) + else: + return "None" + + to_return = {} + to_return["section"] = section + to_return["action"] = action + to_return["term"] = term + return jsonify(to_return) + + + +@app.route("/terms_plot_tool/") +def terms_plot_tool(): + term = request.args.get('term') + if term is not None: + return render_template("terms_plot_tool.html", term=term) + else: + return render_template("terms_plot_tool.html", term="") + + +@app.route("/terms_plot_tool_data/") +def terms_plot_tool_data(): + oneDay = 60*60*24 + range_start = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_start')))) if request.args.get('range_start') is not None else 0; + range_start = range_start.replace(hour=0, minute=0, second=0, microsecond=0) + range_start = calendar.timegm(range_start.timetuple()) + range_end = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_end')))) if request.args.get('range_end') is not None else 0; + range_end = range_end.replace(hour=0, minute=0, second=0, microsecond=0) + range_end = calendar.timegm(range_end.timetuple()) + term = request.args.get('term') + + if term is None: + return "None" + else: + value_range = [] + for timestamp in range(range_start, range_end+oneDay, oneDay): + value = r_serv_term.hget(timestamp, term) + curr_value_range = int(value) if value is not None else 0 + value_range.append([timestamp, curr_value_range]) + value_range.insert(0,term) + return jsonify(value_range) + + +@app.route("/terms_plot_top/") +def terms_plot_top(): + return render_template("terms_plot_top.html") + + +@app.route("/terms_plot_top_data/") +def terms_plot_top_data(): + oneDay = 60*60*24 + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + set_day = "TopTermFreq_set_day_" + str(today_timestamp) + set_week = "TopTermFreq_set_week"; + set_month = "TopTermFreq_set_month"; + + the_set = request.args.get('set') + num_day = int(request.args.get('num_day')) + if the_set is None: + return "None" + else: + to_return = [] + if the_set == "TopTermFreq_set_day": + the_set += "_" + str(today_timestamp) + + for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20): + position = {} + position['day'] = r_serv_term.zrevrank(set_day, term) + position['day'] = position['day']+1 if position['day'] is not None else "<20" + position['week'] = r_serv_term.zrevrank(set_week, term) + position['week'] = position['week']+1 if position['week'] is not None else "<20" + position['month'] = r_serv_term.zrevrank(set_month, term) + position['month'] = position['month']+1 if position['month'] is not None else "<20" + value_range = [] + for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay): + value = r_serv_term.hget(timestamp, term) + curr_value_range = int(value) if value is not None else 0 + value_range.append([timestamp, curr_value_range]) + + to_return.append([term, value_range, tot_value, position]) + + return jsonify(to_return) + + diff --git a/var/www/Flasks/Flask_trendingcharts.py b/var/www/Flasks/Flask_trendingcharts.py new file mode 100644 index 00000000..fdb1a3d4 --- /dev/null +++ b/var/www/Flasks/Flask_trendingcharts.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending charts page +''' +import redis +import datetime +from Date import Date +import flask +from flask import Flask, render_template, jsonify, request + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_charts = Flask_config.r_serv_charts +# ============ FUNCTIONS ============ + +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return date_list + + +# ============ ROUTES ============ + +@app.route("/_progressionCharts", methods=['GET']) +def progressionCharts(): + attribute_name = request.args.get('attributeName') + trending_name = request.args.get('trendingName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value = r_serv_charts.hget(attribute_name, date) + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) + bar_values.insert(0, attribute_name) + return jsonify(bar_values) + + else: + redis_progression_name = "z_top_progression_" + trending_name + keyw_value = r_serv_charts.zrevrangebyscore(redis_progression_name, '+inf', '-inf', withscores=True, start=0, num=10) + return jsonify(keyw_value) + +@app.route("/wordstrending/") +def wordstrending(): + default_display = cfg.get("Flask", "default_display") + return render_template("Wordstrending.html", default_display = default_display) + + +@app.route("/protocolstrending/") +def protocolstrending(): + default_display = cfg.get("Flask", "default_display") + return render_template("Protocolstrending.html", default_display = default_display) + + +@app.route("/trending/") +def trending(): + default_display = cfg.get("Flask", "default_display") + return render_template("Trending.html", default_display = default_display) + + diff --git a/var/www/Flasks/Flask_trendingmodules.py b/var/www/Flasks/Flask_trendingmodules.py new file mode 100644 index 00000000..73cef7f5 --- /dev/null +++ b/var/www/Flasks/Flask_trendingmodules.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import datetime +import flask +from flask import Flask, render_template, jsonify, request + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_charts = Flask_config.r_serv_charts +# ============ FUNCTIONS ============ + +# Iterate over elements in the module provided and return the today data or the last data +# return format: [('passed_days', num_of_passed_days), ('elem_name1', elem_value1), ('elem_name2', elem_value2)]] +def get_top_relevant_data(server, module_name): + days = 0 + for date in get_date_range(15): + redis_progression_name_set = 'top_'+ module_name +'_set_' + date + member_set = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True) + if len(member_set) == 0: #No data for this date + days += 1 + else: + member_set.insert(0, ("passed_days", days)) + return member_set + + +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return date_list + +# ============ ROUTES ============ + +@app.route("/_moduleCharts", methods=['GET']) +def modulesCharts(): + keyword_name = request.args.get('keywordName') + module_name = request.args.get('moduleName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value = r_serv_charts.hget(date, module_name+'-'+keyword_name) + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) + bar_values.insert(0, keyword_name) + return jsonify(bar_values) + + else: + member_set = get_top_relevant_data(r_serv_charts, module_name) + if len(member_set) == 0: + member_set.append(("No relevant data", int(100))) + return jsonify(member_set) + + +@app.route("/_providersChart", methods=['GET']) +def providersChart(): + keyword_name = request.args.get('keywordName') + module_name = request.args.get('moduleName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value_size = r_serv_charts.hget(keyword_name+'_'+'size', date) + curr_value_num = r_serv_charts.hget(keyword_name+'_'+'num', date) + curr_value_size_avg = r_serv_charts.hget(keyword_name+'_'+'avg', date) + if module_name == "size": + curr_value = float(curr_value_size_avg if curr_value_size_avg is not None else 0) + else: + curr_value = float(curr_value_num if curr_value_num is not None else 0.0) + + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], curr_value]) + bar_values.insert(0, keyword_name) + return jsonify(bar_values) + + else: + #redis_provider_name_set = 'top_size_set' if module_name == "size" else 'providers_set' + redis_provider_name_set = 'top_avg_size_set_' if module_name == "size" else 'providers_set_' + redis_provider_name_set = redis_provider_name_set + get_date_range(0)[0] + + member_set = r_serv_charts.zrevrangebyscore(redis_provider_name_set, '+inf', '-inf', withscores=True, start=0, num=8) + # Member set is a list of (value, score) pairs + if len(member_set) == 0: + member_set.append(("No relevant data", float(100))) + return jsonify(member_set) + + +@app.route("/moduletrending/") +def moduletrending(): + return render_template("Moduletrending.html") + + diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js index e527aafa..8d50ea9d 100644 --- a/var/www/static/js/indexjavascript.js +++ b/var/www/static/js/indexjavascript.js @@ -109,11 +109,11 @@ function create_log_table(obj_json) { var pdate = document.createElement('TD') var nam = document.createElement('TD') var msage = document.createElement('TD') + var inspect = document.createElement('TD') var chansplit = obj_json.channel.split('.'); var parsedmess = obj_json.data.split(';'); - if (parsedmess[0] == "Global"){ var paste_processed = parsedmess[4].split(" ")[2]; window.paste_num_tabvar = paste_processed; @@ -139,7 +139,7 @@ function create_log_table(obj_json) { source_url = "http://"+parsedmess[1]+"/"+parsedmess[3].split(".")[0]; } source_link.setAttribute("HREF",source_url); - source_link.setAttribute("TARGET", "_blank") + source_link.setAttribute("TARGET", "_blank"); source_link.appendChild(document.createTextNode(parsedmess[1])); src.appendChild(source_link); @@ -169,6 +169,18 @@ function create_log_table(obj_json) { msage.appendChild(document.createTextNode(message.join(" "))); + var paste_path = parsedmess[5]; + var url_to_saved_paste = url_showSavedPath+"?paste="+paste_path+"&num="+parsedmess[0]; + + var action_icon_a = document.createElement("A"); + action_icon_a.setAttribute("TARGET", "_blank"); + action_icon_a.setAttribute("HREF", url_to_saved_paste); + var action_icon_span = document.createElement('SPAN'); + action_icon_span.className = "fa fa-search-plus"; + action_icon_a.appendChild(action_icon_span); + + inspect.appendChild(action_icon_a); + tr.appendChild(time) tr.appendChild(chan); tr.appendChild(level); @@ -177,6 +189,7 @@ function create_log_table(obj_json) { tr.appendChild(pdate); tr.appendChild(nam); tr.appendChild(msage); + tr.appendChild(inspect); if (tr.className == document.getElementById("checkbox_log_info").value && document.getElementById("checkbox_log_info").checked == true) { tableBody.appendChild(tr); @@ -219,28 +232,42 @@ function create_queue_table() { tr.appendChild(th); } - for(i = 0; i < (glob_tabvar.row1).length;i++){ - var tr = document.createElement('TR') - for(j = 0; j < 2; j++){ - var td = document.createElement('TD') - var moduleNum = j == 0 ? "." + glob_tabvar.row1[i][3] : ""; - td.appendChild(document.createTextNode(glob_tabvar.row1[i][j] + moduleNum)); - tr.appendChild(td) - } - // Used to decide the color of the row - // We have glob_tabvar.row1[][j] with: - // - j=0: ModuleName - // - j=1: queueLength - // - j=2: LastProcessedPasteTime - // - j=3: Number of the module belonging in the same category - if (parseInt(glob_tabvar.row1[i][2]) > 60*2 && parseInt(glob_tabvar.row1[i][1]) > 2) - tr.className += " danger"; - else if (parseInt(glob_tabvar.row1[i][2]) > 60*1) - tr.className += " warning"; - else - tr.className += " success"; + if ((glob_tabvar.row1).length == 0) { + var tr = document.createElement('TR'); + var td = document.createElement('TD'); + var td2 = document.createElement('TD'); + td.appendChild(document.createTextNode("No running queues")); + td2.appendChild(document.createTextNode("Or no feed")); + td.className += " danger"; + td2.className += " danger"; + tr.appendChild(td); + tr.appendChild(td2); tableBody.appendChild(tr); } + else { + for(i = 0; i < (glob_tabvar.row1).length;i++){ + var tr = document.createElement('TR') + for(j = 0; j < 2; j++){ + var td = document.createElement('TD') + var moduleNum = j == 0 ? "." + glob_tabvar.row1[i][3] : ""; + td.appendChild(document.createTextNode(glob_tabvar.row1[i][j] + moduleNum)); + tr.appendChild(td) + } + // Used to decide the color of the row + // We have glob_tabvar.row1[][j] with: + // - j=0: ModuleName + // - j=1: queueLength + // - j=2: LastProcessedPasteTime + // - j=3: Number of the module belonging in the same category + if (parseInt(glob_tabvar.row1[i][2]) > window.threshold_stucked_module && parseInt(glob_tabvar.row1[i][1]) > 2) + tr.className += " danger"; + else if (parseInt(glob_tabvar.row1[i][1]) == 0) + tr.className += " warning"; + else + tr.className += " success"; + tableBody.appendChild(tr); + } + } Tablediv.appendChild(table); } diff --git a/var/www/templates/browse_important_paste.html b/var/www/templates/browse_important_paste.html index 2cbda569..61015a15 100644 --- a/var/www/templates/browse_important_paste.html +++ b/var/www/templates/browse_important_paste.html @@ -30,9 +30,6 @@ white-space:pre-wrap; word-wrap:break-word; } - .modal-backdrop.fade { - opacity: 0; - } diff --git a/var/www/templates/important_paste_by_module.html b/var/www/templates/important_paste_by_module.html index 061648c4..3a1c1c64 100644 --- a/var/www/templates/important_paste_by_module.html +++ b/var/www/templates/important_paste_by_module.html @@ -1,4 +1,4 @@ - +
@@ -23,12 +23,117 @@
#

+
+ +
+
+ + +

+ + + + + @@ -40,39 +145,9 @@ $(document).ready(function(){ var char_to_display = {{ char_to_display }}; var start_index = 0; - // On click, get html content from url and update the corresponding modal - $("[data-toggle='modal']").on("click.openmodal", function (event) { - event.preventDefault(); - var modal=$(this); - var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num'); - $.get(url, function (data) { - $("#mymodalbody").html(data); - var button = $(''); - button.tooltip(); - $("#mymodalbody").children(".panel-default").append(button); - - $("#button_show_path").attr('href', $(modal).attr('data-url')); - $("#button_show_path").show('fast'); - $("#loading-gif-modal").css("visibility", "hidden"); // Hide the loading GIF - if ($("[data-initsize]").attr('data-initsize') < char_to_display) { // All the content is displayed - nothing_to_display(); - } - // On click, donwload all paste's content - $("#load-more-button").on("click", function (event) { - if (complete_paste == null) { //Donwload only once - $.get("{{ url_for('getmoredata') }}"+"?paste="+$(modal).attr('data-path'), function(data, status){ - complete_paste = data; - update_preview(); - }); - } else { - update_preview(); - } - }); - }); - }); - // When the modal goes out, refresh it to normal content $("#mymodal").on('hidden.bs.modal', function () { + can_change_modal_content = true; $("#mymodalbody").html("

Loading paste information...

"); var loading_gif = ""; $("#mymodalbody").append(loading_gif); // Show the loading GIF @@ -110,38 +185,53 @@ $(document).ready(function(){ } - // Use to bind the button with the new displayed data // (The bind do not happens if the dataTable is in tabs and the clicked data is in another page) - $('#myTable').on( 'draw.dt', function () { + + search_table.on( 'draw.dt', function () { + // Bind tooltip each time we draw a new page + $('[data-toggle="tooltip"]').tooltip(); // On click, get html content from url and update the corresponding modal - $("[data-toggle='modal']").unbind('click.openmodal').on("click.openmodal", function (event) { + $("[data-toggle='modal']").off('click.openmodal').on("click.openmodal", function (event) { event.preventDefault(); var modal=$(this); var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num'); + last_clicked_paste = $(this).attr('data-num'); $.get(url, function (data) { - $("#mymodalbody").html(data); - var button = $(''); - button.tooltip(); - $("#mymodalbody").children(".panel-default").append(button); - - $("#button_show_path").attr('href', $(modal).attr('data-url')); - $("#button_show_path").show('fast'); - $("#loading-gif-modal").css("visibility", "hidden"); // Hide the loading GIF - if ($("[data-initsize]").attr('data-initsize') < char_to_display) { // All the content is displayed - nothing_to_display(); - } - // On click, donwload all paste's content - $("#load-more-button").on("click", function (event) { - if (complete_paste == null) { //Donwload only once - $.get("{{ url_for('getmoredata') }}"+"?paste="+$(modal).attr('data-path'), function(data, status){ - complete_paste = data; - update_preview(); - }); - } else { - update_preview(); + + // verify that the reveived data is really the current clicked paste. Otherwise, ignore it. + var received_num = parseInt(data.split("|num|")[1]); + if (received_num == last_clicked_paste && can_change_modal_content) { + can_change_modal_content = false; + + // clear data by removing html, body, head tags. prevent dark modal background stack bug. + var cleared_data = data.split("")[1].split("")[0]; + $("#mymodalbody").html(cleared_data); + + var button = $(''); + button.tooltip(); + $("#mymodalbody").children(".panel-default").append(button); + + $("#button_show_path").attr('href', $(modal).attr('data-url')); + $("#button_show_path").show('fast'); + $("#loading-gif-modal").css("visibility", "hidden"); // Hide the loading GIF + if ($("[data-initsize]").attr('data-initsize') < char_to_display) { // All the content is displayed + nothing_to_display(); } - }); + // On click, donwload all paste's content + $("#load-more-button").on("click", function (event) { + if (complete_paste == null) { //Donwload only once + $.get("{{ url_for('getmoredata') }}"+"?paste="+$(modal).attr('data-path'), function(data, status){ + complete_paste = data; + update_preview(); + }); + } else { + update_preview(); + } + }); + } else if (can_change_modal_content) { + $("#mymodalbody").html("Ignoring previous not finished query of paste #" + received_num); + } }); }); } ); diff --git a/var/www/templates/index.html b/var/www/templates/index.html index 5d8639cf..74b45c01 100644 --- a/var/www/templates/index.html +++ b/var/www/templates/index.html @@ -20,6 +20,7 @@ +
+ + @@ -143,39 +208,9 @@ var char_to_display = {{ char_to_display }}; var start_index = 0; - // On click, get html content from url and update the corresponding modal - $("[data-toggle='modal']").on("click", function (event) { - event.preventDefault(); - var modal=$(this); - var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num'); - $.get(url, function (data) { - $("#mymodalbody").html(data); - var button = $(''); - button.tooltip(); - $("#mymodalbody").children(".panel-default").append(button); - - $("#button_show_path").attr('href', $(modal).attr('data-url')); - $("#button_show_path").show('fast'); - $("#loading-gif-modal").css("visibility", "hidden"); // Hide the loading GIF - if ($("[data-initsize]").attr('data-initsize') < char_to_display) { // All the content is displayed - nothing_to_display(); - } - // On click, donwload all paste's content - $("#load-more-button").on("click", function (event) { - if (complete_paste == null) { //Donwload only once - $.get("{{ url_for('getmoredata') }}"+"?paste="+$(modal).attr('data-path'), function(data, status){ - complete_paste = data; - update_preview(); - }); - } else { - update_preview(); - } - }); - }); - }); - // When the modal goes out, refresh it to normal content $("#mymodal").on('hidden.bs.modal', function () { + can_change_modal_content = true; $("#mymodalbody").html("

Loading paste information...

"); var loading_gif = ""; $("#mymodalbody").append(loading_gif); // Show the loading GIF @@ -211,5 +246,55 @@ new_content.show('fast'); $("#load-more-button").hide(); } + + + $('#myTable').on( 'draw.dt', function () { + // Bind tooltip each time we draw a new page + $('[data-toggle="tooltip"]').tooltip(); + // On click, get html content from url and update the corresponding modal + $("[data-toggle='modal']").off('click.openmodal').on("click.openmodal", function (event) { + var modal=$(this); + var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num'); + last_clicked_paste = $(this).attr('data-num'); + $.get(url, function (data) { + + // verify that the reveived data is really the current clicked paste. Otherwise, ignore it. + var received_num = parseInt(data.split("|num|")[1]); + if (received_num == last_clicked_paste && can_change_modal_content) { + can_change_modal_content = false; + + // clear data by removing html, body, head tags. prevent dark modal background stack bug. + var cleared_data = data.split("")[1].split("")[0]; + $("#mymodalbody").html(cleared_data); + setTimeout(function() { $('#tableDup').DataTable(); }, 150); + + var button = $(''); + button.tooltip(); + $("#mymodalbody").children(".panel-default").append(button); + + $("#button_show_path").attr('href', $(modal).attr('data-url')); + $("#button_show_path").show('fast'); + $("#loading-gif-modal").css("visibility", "hidden"); // Hide the loading GIF + if ($("[data-initsize]").attr('data-initsize') < char_to_display) { // All the content is displayed + nothing_to_display(); + } + // On click, donwload all paste's content + $("#load-more-button").off('click.download').on("click.download", function (event) { + if (complete_paste == null) { //Donwload only once + $.get("{{ url_for('getmoredata') }}"+"?paste="+$(modal).attr('data-path'), function(data, status){ + complete_paste = data; + update_preview(); + }); + } else { + update_preview(); + } + }); + } else if (can_change_modal_content) { + $("#mymodalbody").html("Ignoring previous not finished query of paste #" + received_num); + } + }); + }); + } ); + diff --git a/var/www/templates/sentiment_analysis_trending.html b/var/www/templates/sentiment_analysis_trending.html index e1788f35..b20c3696 100644 --- a/var/www/templates/sentiment_analysis_trending.html +++ b/var/www/templates/sentiment_analysis_trending.html @@ -205,24 +205,24 @@ - worst1 - best1 + no data + no data - worst2 - best2 + no data + no data - worst3 - best3 + no data + no data - worst4 - best4 + no data + no data - worst5 - best5 + no data + no data diff --git a/var/www/templates/show_saved_paste.html b/var/www/templates/show_saved_paste.html index 75d8ab46..ef955bfe 100644 --- a/var/www/templates/show_saved_paste.html +++ b/var/www/templates/show_saved_paste.html @@ -4,68 +4,87 @@ Paste information + + + + + + + + + + + + - -

Paste: {{ request.args.get('num') }}

-

{{ request.args.get('paste') }}

- -

- -
-
- - - - - - - - - - - - - - - - - - - - - - - - - -
DateSourceEncodingLanguageSize (Kb)MimeNumber of linesMax line length
{{ date }}{{ source }}{{ encoding }}{{ language }}{{ size }}{{ mime }}{{ lineinfo.0 }}{{ lineinfo.1 }}
-
-
- {% if duplicate_list|length == 0 %} -

No Duplicate

- {% else %} -

Duplicate list:

- - {% set i = 0 %} - - - - {% for dup_path in duplicate_list %} +
+
+

Paste: {{ request.args.get('paste') }}

+ + +
Hash typePaste info
+ + + + + + + + + + + + + + + + + + + + + + + + +
DateSourceEncodingLanguageSize (Kb)MimeNumber of linesMax line length
{{ date }}{{ source }}{{ encoding }}{{ language }}{{ size }}{{ mime }}{{ lineinfo.0 }}{{ lineinfo.1 }}
+
+
+ {% if duplicate_list|length == 0 %} +

No Duplicate

+ {% else %} +

Duplicate list:

+ + {% set i = 0 %} + - - - + + + + - {% set i = i + 1 %} - {% endfor %} -
{{ hashtype_list[i] }}Similarity: {{ simil_list[i] }}%{{ dup_path }}Hash typePaste infoDatePath
- {% endif %} -

Content:

-

{{ content }}

-
-
- - + + + {% for dup_path in duplicate_list %} + + {{ hashtype_list[i] }} + Similarity: {{ simil_list[i] }}% + {{ date_list[i] }} + {{ dup_path }} + + {% set i = i + 1 %} + {% endfor %} + + + {% endif %} +

Content:

+

{{ content }}

+ + + + diff --git a/var/www/templates/terms_management.html b/var/www/templates/terms_management.html index e4e6e5f1..3d4f2f77 100644 --- a/var/www/templates/terms_management.html +++ b/var/www/templates/terms_management.html @@ -111,7 +111,7 @@ Day occurence Week occurence Month occurence - # Concerned pastes + # tracked paste Action @@ -198,6 +198,9 @@