diff --git a/.gitignore b/.gitignore index 264aaa5f..88b623fa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Temp files *.swp *.pyc +*.swo # Install Dirs AILENV diff --git a/bin/WebStats.py b/bin/WebStats.py index 5da443a8..cac352af 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -10,19 +10,72 @@ import re import redis import os from packages import lib_words +from packages.Date import Date from pubsublogger import publisher from packages import Paste from Helper import Process from pyfaup.faup import Faup -def analyse(field_name): +# Config Var +threshold_need_to_look = 50 +range_to_look = 10 +threshold_to_plot = 1 #500% +to_plot = set() +clean_frequency = 10 #minutes + +def analyse(server, field_name): field = url_parsed[field_name] if field is not None: - prev_score = r_serv1.hget(field, date) + prev_score = server.hget(field, date) if prev_score is not None: - r_serv1.hset(field, date, int(prev_score) + 1) + server.hset(field, date, int(prev_score) + 1) else: - r_serv1.hset(field, date, 1) + server.hset(field, date, 1) + +def analyse_and_progression(server, field_name): + field = url_parsed[field_name] + if field is not None: + prev_score = server.hget(field, date) + if prev_score is not None: + print field + ' prev_score:' + prev_score + server.hset(field, date, int(prev_score) + 1) + if int(prev_score) + 1 > threshold_need_to_look: #threshold for false possitive + if(check_for_progression(server, field, date)): + to_plot.add(field) + else: + server.hset(field, date, 1) + +def check_for_progression(server, field, date): + previous_data = set() + tot_sum = 0 + for i in range(0, range_to_look): + curr_value = server.hget(field, Date(date).substract_day(i)) + if curr_value is None: #no further data + break + else: + curr_value = int(curr_value) + previous_data.add(curr_value) + tot_sum += curr_value + if i == 0: + today_val = curr_value + + + print 'totsum='+str(tot_sum) + print 'div='+str(tot_sum/today_val) + if tot_sum/today_val >= threshold_to_plot: + return True + else: + return False + +def clean_to_plot(): + temp_to_plot = set() + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month)+str(curr_date.day)) + + for elem in to_plot: + if(check_for_progression(field, date)): + temp_to_plot.add(elem) + to_plot = temp_to_plot if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) @@ -45,6 +98,11 @@ if __name__ == '__main__': host=p.config.get("Redis_Level_DB", "host"), port=p.config.get("Redis_Level_DB", "port"), db=p.config.get("Redis_Level_DB", "db")) + + r_serv2 = redis.StrictRedis( + host=p.config.get("Redis_Level_DB_Domain", "host"), + port=p.config.get("Redis_Level_DB_Domain", "port"), + db=p.config.get("Redis_Level_DB_Domain", "db")) # FILE CURVE SECTION # csv_path_proto = os.path.join(os.environ['AIL_HOME'], @@ -57,6 +115,10 @@ if __name__ == '__main__': tldsfile_path = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "tldsfile")) + csv_path_domain = os.path.join(os.environ['AIL_HOME'], + p.config.get("Directories", "domainstrending_csv")) + + faup = Faup() generate_new_graph = False # Endless loop getting messages from the input queue @@ -71,17 +133,22 @@ if __name__ == '__main__': today = datetime.date.today() year = today.year month = today.month - + print 'b1' lib_words.create_curve_with_word_file(r_serv1, csv_path_proto, protocolsfile_path, year, month) - + print 'b2' lib_words.create_curve_with_word_file(r_serv1, csv_path_tld, tldsfile_path, year, month) - + print 'b3' + lib_words.create_curve_with_list(r_serv2, csv_path_domain, + to_plot, year, + month) + print 'end building' publisher.debug("{} queue is empty, waiting".format(config_section)) - time.sleep(1) + print 'sleeping' + time.sleep(5) continue else: @@ -91,5 +158,8 @@ if __name__ == '__main__': faup.decode(url) url_parsed = faup.get() - analyse('scheme') #Scheme analysis - analyse('tld') #Tld analysis + analyse(r_serv1, 'scheme') #Scheme analysis + analyse(r_serv1, 'tld') #Tld analysis + analyse_and_progression(r_serv2, 'domain') #Domain analysis + print "to_plot:" + print to_plot diff --git a/bin/packages/Date.py b/bin/packages/Date.py index 4abb0910..ce02636a 100644 --- a/bin/packages/Date.py +++ b/bin/packages/Date.py @@ -30,3 +30,12 @@ class Date(object): def _set_day(self, day): self.day = day + + def substract_day(self, numDay): + import datetime + computed_date = datetime.date(int(self.year), int(self.month), int(self.day)) - datetime.timedelta(numDay) + comp_year = str(computed_date.year) + comp_month = str(computed_date.month).zfill(2) + comp_day = str(computed_date.day).zfill(2) + return comp_year + comp_month + comp_day + diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index 3b81c7fe..bedf36b0 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -186,7 +186,9 @@ class Paste(object): if the paste doesn't contain any human dictionnary words ..seealso: git@github.com:saffsd/langid.py.git - """ + FIXME: This procedure is using more than 20% of CPU + + """ identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True) return identifier.classify(self.get_p_content()) @@ -196,6 +198,9 @@ class Paste(object): def _get_p_date(self): return self.p_date + def _get_p_size(self): + return self.p_size + def _get_hash_lines(self, min=1, start=1, jump=10): """ Returning all the lines of the paste hashed. diff --git a/bin/packages/lib_words.py b/bin/packages/lib_words.py index 3c065ed0..b2cf418b 100644 --- a/bin/packages/lib_words.py +++ b/bin/packages/lib_words.py @@ -81,13 +81,14 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month to keep the timeline of the curve correct. """ + threshold = 50 first_day = date(year, month, 01) last_day = date(year, month, calendar.monthrange(year, month)[1]) words = [] with open(feederfilename, 'rb') as f: # words of the files - words = sorted([word.strip() for word in f]) + words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' ]) headers = ['Date'] + words with open(csvfilename+'.csv', 'wb') as f: @@ -102,6 +103,47 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month # from the 1srt day to the last of the list for word in words: value = r_serv.hget(word, curdate) + if value is None: + row.append(0) + else: + # if the word have a value for the day + # FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold + if value >= threshold: + row.append(value) + writer.writerow(row) + +def create_curve_with_list(server, csvfilename, to_plot, year, month): + """Create a csv file used with dygraph. + + :param r_serv: -- connexion to redis database + :param csvfilename: -- the path to the .csv file created + :param to_plot: -- the list which contain a words to plot. + :param year: -- (integer) The year to process + :param month: -- (integer) The month to process + + This function create a .csv file using datas in redis. + It's checking if the words contained in to_plot and + their respectives values by days exists. + + """ + + first_day = date(year, month, 01) + last_day = date(year, month, calendar.monthrange(year, month)[1]) + words = sorted(to_plot) + + headers = ['Date'] + words + with open(csvfilename+'.csv', 'wb') as f: + writer = csv.writer(f) + writer.writerow(headers) + + # for each days + for dt in rrule(DAILY, dtstart=first_day, until=last_day): + row = [] + curdate = dt.strftime("%Y%m%d") + row.append(curdate) + # from the 1srt day to the last of the list + for word in words: + value = server.hget(word, curdate) if value is None: row.append(0) else: diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 9c152cab..018608f1 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -7,7 +7,9 @@ import json from flask import Flask, render_template, jsonify, request import flask import os - +import sys +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +import Paste # CONFIG # configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') @@ -19,6 +21,9 @@ if not os.path.exists(configfile): cfg = ConfigParser.ConfigParser() cfg.read(configfile) +max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip +max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal + # REDIS # r_serv = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), @@ -49,6 +54,29 @@ def get_queues(r): r.hgetall("queues").iteritems()] +def list_len(s): + return len(s) +app.jinja_env.filters['list_len'] = list_len + + +def showpaste(content_range): + requested_path = request.args.get('paste', '') + paste = Paste.Paste(requested_path) + p_date = str(paste._get_p_date()) + p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] + p_source = paste.p_source + p_encoding = paste._get_p_encoding() + p_language = paste._get_p_language() + p_size = paste.p_size + p_mime = paste.p_mime + p_lineinfo = paste.get_lines_info() + p_content = paste.get_p_content().decode('utf-8', 'ignore') + if content_range != 0: + p_content = p_content[0:content_range] + + return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content)) + + @app.route("/_logs") def logs(): return flask.Response(event_stream(), mimetype="text/event-stream") @@ -64,7 +92,10 @@ def search(): query = request.form['query'] q = [] q.append(query) - r = [] + r = [] #complete path + c = [] #preview of the paste content + paste_date = [] + paste_size = [] # Search from whoosh import index from whoosh.fields import Schema, TEXT, ID @@ -78,7 +109,16 @@ def search(): results = searcher.search(query, limit=None) for x in results: r.append(x.items()[0][1]) - return render_template("search.html", r=r) + paste = Paste.Paste(x.items()[0][1]) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + c.append(content[0:content_range]) + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + paste_date.append(curr_date) + paste_size.append(paste._get_p_size()) + return render_template("search.html", r=r, c=c, query=request.form['query'], paste_date=paste_date, paste_size=paste_size, char_to_display=max_preview_modal) + @app.route("/") def index(): @@ -93,16 +133,39 @@ def monitoring(): @app.route("/wordstrending/") def wordstrending(): - return render_template("Wordstrending.html") + default_display = cfg.get("Flask", "default_display") + return render_template("Wordstrending.html", default_display = default_display) @app.route("/protocolstrending/") def protocolstrending(): - return render_template("Protocolstrending.html") + default_display = cfg.get("Flask", "default_display") + return render_template("Protocolstrending.html", default_display = default_display) -@app.route("/tldstrending/") -def tldstrending(): - return render_template("Tldstrending.html") + +@app.route("/trending/") +def trending(): + default_display = cfg.get("Flask", "default_display") + return render_template("Trending.html", default_display = default_display) + + +@app.route("/showsavedpaste/") #completely shows the paste in a new tab +def showsavedpaste(): + return showpaste(0) + + +@app.route("/showpreviewpaste/") +def showpreviewpaste(): + return showpaste(max_preview_modal) + + +@app.route("/getmoredata/") +def getmoredata(): + requested_path = request.args.get('paste', '') + paste = Paste.Paste(requested_path) + p_content = paste.get_p_content().decode('utf-8', 'ignore') + to_return = p_content[max_preview_modal-1:] + return to_return if __name__ == "__main__": diff --git a/var/www/static/image/loading.gif b/var/www/static/image/loading.gif new file mode 100644 index 00000000..b60c1615 Binary files /dev/null and b/var/www/static/image/loading.gif differ diff --git a/var/www/static/js/plot-graph.js b/var/www/static/js/plot-graph.js new file mode 100644 index 00000000..6daad3ba --- /dev/null +++ b/var/www/static/js/plot-graph.js @@ -0,0 +1,141 @@ +function Graph(id_pannel, path, header_size){ + this.path = path; + this.id_pannel = id_pannel; + + // Hide every header during initialisation + var false_tab = []; + for(i=0; i max_data_x) { + end_x_highlight = max_data_x; + } + highlight_period(start_x_highlight,end_x_highlight); + // calculate start of highlight for next Saturday + w += 7*24*3600*1000; + } + }, + visibility: false_tab + }); + this.graph = g2; + this.set_Visibility = setVis; + this.set_Visibility_andHide = setVis_andHide; + + onclick = function(ev) { + if (g2.isSeriesLocked()) { + g2.clearSelection(); + } + else { + g2.setSelection(g2.getSelection(), g2.getHighlightSeries(), true); + } + }; + g2.updateOptions({clickCallback: onclick}, true); + + var linear = document.getElementById("linear"); + var log = document.getElementById("log"); + linear.onclick = function() { setLog(false); } + log.onclick = function() { setLog(true); } + var setLog = function(val) { + g2.updateOptions({ logscale: val }); + linear.disabled = !val; + log.disabled = val; + } + function unzoomGraph() { + g2.updateOptions({ + dateWindow:null, + valueRange:null + }); + } + + // display the top headers + function setVis(max_display){ + headings = this.graph.getLabels(); + headings.splice(0,1); + var sorted_list = new Array(); + today = new Date().getDate()-1; // Take the top from yesterday so that we can see the current evolution + for( i=0; i - + + + +
+ + + + +
+
@@ -93,102 +105,44 @@
- - var min_data_x = g.getValue(0,0); - var max_data_x = g.getValue(g.numRows()-1,0); + - var w = min_data_x; - // starting on Sunday is a special case - if (dow == 0) { - highlight_period(w,w+12*3600*1000); - } - // find first saturday - while (dow != 5) { - w += 24*3600*1000; - d = new Date(w); - dow = d.getUTCDay(); - } + - // shift back 1/2 day to center highlight around the point for the day - w -= 12*3600*1000; - while (w < max_data_x) { - var start_x_highlight = w; - var end_x_highlight = w + 2*24*3600*1000; - // make sure we don't try to plot outside the graph - if (start_x_highlight < min_data_x) { - start_x_highlight = min_data_x; - } - if (end_x_highlight > max_data_x) { - end_x_highlight = max_data_x; - } - highlight_period(start_x_highlight,end_x_highlight); - // calculate start of highlight for next Saturday - w += 7*24*3600*1000; - } - } -}); -onclick = function(ev) { - if (g2.isSeriesLocked()) { - g2.clearSelection(); - } - else { - g2.setSelection(g2.getSelection(), g2.getHighlightSeries(), true); - } -}; -g2.updateOptions({clickCallback: onclick}, true); -var linear = document.getElementById("linear"); -var log = document.getElementById("log"); -linear.onclick = function() { setLog(false); } -log.onclick = function() { setLog(true); } -var setLog = function(val) { - g2.updateOptions({ logscale: val }); - linear.disabled = !val; - log.disabled = val; - } -function unzoomGraph() { - g2.updateOptions({ - dateWindow:null, - valueRange:null - }); -} - diff --git a/var/www/templates/Tldstrending.html b/var/www/templates/Tldstrending.html deleted file mode 100644 index e2706f21..00000000 --- a/var/www/templates/Tldstrending.html +++ /dev/null @@ -1,196 +0,0 @@ - - - - - - - - Analysis Information Leak framework Dashboard - - - - - - - - - - - - - - -
- -
-
-
-

Top Level Domain Trending

-
- -
- -
-
-
-
- Top Level Domain Trending -
-
- - -
-
-
- -
- -
-
- -
-
- -
- - -
- - - - diff --git a/var/www/templates/Trending.html b/var/www/templates/Trending.html new file mode 100644 index 00000000..dca69e51 --- /dev/null +++ b/var/www/templates/Trending.html @@ -0,0 +1,199 @@ + + + + + + + + Analysis Information Leak framework Dashboard + + + + + + + + + + + + + + + +
+ +
+
+
+

Trending charts

+
+ +
+ +
+ + + +
+ +
+
+
+
+ Top Level Domain Trending + +
+
+ + +
+
+
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+ Top Domain Trending +
+
+ + +
+
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+ +
+ + + + + + + + + + +
+ + + + diff --git a/var/www/templates/Wordstrending.html b/var/www/templates/Wordstrending.html index d00376dc..22603b07 100644 --- a/var/www/templates/Wordstrending.html +++ b/var/www/templates/Wordstrending.html @@ -16,15 +16,19 @@ - + + + +
+ + + + +
+
@@ -93,101 +105,42 @@
- - var min_data_x = g.getValue(0,0); - var max_data_x = g.getValue(g.numRows()-1,0); + - var w = min_data_x; - // starting on Sunday is a special case - if (dow == 0) { - highlight_period(w,w+12*3600*1000); - } - // find first saturday - while (dow != 5) { - w += 24*3600*1000; - d = new Date(w); - dow = d.getUTCDay(); - } + - // shift back 1/2 day to center highlight around the point for the day - w -= 12*3600*1000; - while (w < max_data_x) { - var start_x_highlight = w; - var end_x_highlight = w + 2*24*3600*1000; - // make sure we don't try to plot outside the graph - if (start_x_highlight < min_data_x) { - start_x_highlight = min_data_x; - } - if (end_x_highlight > max_data_x) { - end_x_highlight = max_data_x; - } - highlight_period(start_x_highlight,end_x_highlight); - // calculate start of highlight for next Saturday - w += 7*24*3600*1000; - } - } -}); - -onclick = function(ev) { - if (g2.isSeriesLocked()) { - g2.clearSelection(); - } - else { - g2.setSelection(g2.getSelection(), g2.getHighlightSeries(), true); - } -}; -g2.updateOptions({clickCallback: onclick}, true); - -var linear = document.getElementById("linear"); -var log = document.getElementById("log"); -linear.onclick = function() { setLog(false); } -log.onclick = function() { setLog(true); } -var setLog = function(val) { - g2.updateOptions({ logscale: val }); - linear.disabled = !val; - log.disabled = val; - } -function unzoomGraph() { - g2.updateOptions({ - dateWindow:null, - valueRange:null - }); -}
diff --git a/var/www/templates/index.html b/var/www/templates/index.html index 108e1a63..70d314b0 100644 --- a/var/www/templates/index.html +++ b/var/www/templates/index.html @@ -35,7 +35,7 @@