Merge pull request #113 from mokaddem/master

Improvement of terms - Behavior and webpage
This commit is contained in:
Alexandre Dulaunoy 2017-03-29 22:32:18 +02:00 committed by GitHub
commit 8435520d8b
7 changed files with 137 additions and 67 deletions

View file

@ -117,12 +117,18 @@ if __name__ == "__main__":
r_serv1.hincrby(low_word, date, int(score))
# Update redis
#consider the num of occurence of this term
curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score)))
#1 term per paste
curr_word_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), low_word, int(1)))
# Add in set only if term is not in the blacklist
if low_word not in server_term.smembers(BlackListTermsSet_Name):
#consider the num of occurence of this term
server_term.zincrby(curr_set, low_word, float(score))
#1 term per paste
server_term.zincrby("per_paste_" + curr_set, low_word, float(1))
#Add more info for tracked terms
check_if_tracked_term(low_word, filename)

View file

@ -48,11 +48,13 @@ def manage_top_set():
blacklist_size = int(server_term.scard(BlackListTermsSet_Name))
dico = {}
dico_per_paste = {}
# Retreive top data (max_card + blacklist_size) from days sets
for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay):
curr_set = top_termFreq_setName_day[0] + str(timestamp)
array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size)
array_top_day_per_paste = server_term.zrevrangebyscore("per_paste_" + curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size)
for word, value in array_top_day:
if word not in server_term.smembers(BlackListTermsSet_Name):
@ -61,8 +63,16 @@ def manage_top_set():
else:
dico[word] = value
for word, value in array_top_day_per_paste:
if word not in server_term.smembers(BlackListTermsSet_Name):
if word in dico_per_paste.keys():
dico_per_paste[word] += value
else:
dico_per_paste[word] = value
if timestamp == startDate - num_day_week*oneDay:
dico_week = copy.deepcopy(dico)
dico_week_per_paste = copy.deepcopy(dico_per_paste)
# convert dico into sorted array
array_month = []
@ -77,17 +87,37 @@ def manage_top_set():
array_week.sort(key=lambda tup: -tup[1])
array_week = array_week[0:20]
# convert dico_per_paste into sorted array
array_month_per_paste = []
for w, v in dico_per_paste.iteritems():
array_month_per_paste.append((w, v))
array_month_per_paste.sort(key=lambda tup: -tup[1])
array_month_per_paste = array_month_per_paste[0:20]
array_week_per_paste = []
for w, v in dico_week_per_paste.iteritems():
array_week_per_paste.append((w, v))
array_week_per_paste.sort(key=lambda tup: -tup[1])
array_week_per_paste = array_week_per_paste[0:20]
# suppress every terms in top sets
for curr_set, curr_num_day in top_termFreq_set_array[1:3]:
for w in server_term.zrange(curr_set, 0, -1):
server_term.zrem(curr_set, w)
for w in server_term.zrange("per_paste_" + curr_set, 0, -1):
server_term.zrem("per_paste_" + curr_set, w)
# Add top term from sorted array in their respective sorted sets
for elem in array_week:
server_term.zadd(top_termFreq_setName_week[0], float(elem[1]), elem[0])
for elem in array_week_per_paste:
server_term.zadd("per_paste_" + top_termFreq_setName_week[0], float(elem[1]), elem[0])
for elem in array_month:
server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0])
for elem in array_month_per_paste:
server_term.zadd("per_paste_" + top_termFreq_setName_month[0], float(elem[1]), elem[0])
timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
value = str(timestamp) + ", " + "-"

View file

@ -1,31 +0,0 @@
Attributes
BrowseWarningPaste
Categ
Credential
CreditCards
Curve
CurveManageTopSets
Cve
DomClassifier
Duplicates
Global
Indexer
Keys
Lines
Mail
Mixer
ModuleInformation
Keys
Lines
Mail
Mixer
ModuleInformation
ModuleStats
Onion
Phone
Release
SentimentAnalysis
SQLInjectionDetection
Tokenize
Web
WebStats

View file

@ -20,13 +20,13 @@ cfg = Flask_config.cfg
r_serv_term = Flask_config.r_serv_term
# ============ FUNCTIONS ============
def Term_getValueOverRange(word, startDate, num_day):
def Term_getValueOverRange(word, startDate, num_day, per_paste=""):
passed_days = 0
oneDay = 60*60*24
to_return = []
curr_to_return = 0
for timestamp in range(startDate, startDate - max(num_day)*oneDay, -oneDay):
value = r_serv_term.hget(timestamp, word)
value = r_serv_term.hget(per_paste+str(timestamp), word)
curr_to_return += int(value) if value is not None else 0
for i in num_day:
if passed_days == i-1:
@ -39,6 +39,14 @@ def Term_getValueOverRange(word, startDate, num_day):
@app.route("/terms_management/")
def terms_management():
per_paste = request.args.get('per_paste')
if per_paste == "1" or per_paste is None:
per_paste_text = "per_paste_"
per_paste = 1
else:
per_paste_text = ""
per_paste = 0
TrackedTermsSet_Name = "TrackedSetTermSet"
BlackListTermsSet_Name = "BlackListSetTermSet"
TrackedTermsDate_Name = "TrackedTermDate"
@ -53,7 +61,7 @@ def terms_management():
track_list_num_of_paste = []
for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name):
track_list.append(tracked_term)
value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31])
value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31], per_paste=per_paste_text)
term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term)
@ -70,7 +78,7 @@ def terms_management():
term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded"
black_list.append([blacked_term, term_date])
return render_template("terms_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste)
return render_template("terms_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste, per_paste=per_paste)
@app.route("/terms_management_query_paste/")
@ -182,12 +190,19 @@ def terms_plot_tool_data():
range_end = calendar.timegm(range_end.timetuple())
term = request.args.get('term')
per_paste = request.args.get('per_paste')
if per_paste == "1" or per_paste is None:
per_paste = "per_paste_"
else:
per_paste = ""
if term is None:
return "None"
else:
value_range = []
for timestamp in range(range_start, range_end+oneDay, oneDay):
value = r_serv_term.hget(timestamp, term)
value = r_serv_term.hget(per_paste+str(timestamp), term)
curr_value_range = int(value) if value is not None else 0
value_range.append([timestamp, curr_value_range])
value_range.insert(0,term)
@ -196,7 +211,9 @@ def terms_plot_tool_data():
@app.route("/terms_plot_top/")
def terms_plot_top():
return render_template("terms_plot_top.html")
per_paste = request.args.get('per_paste')
per_paste = per_paste if per_paste is not None else 1
return render_template("terms_plot_top.html", per_paste=per_paste)
@app.route("/terms_plot_top_data/")
@ -206,17 +223,24 @@ def terms_plot_top_data():
today = today.replace(hour=0, minute=0, second=0, microsecond=0)
today_timestamp = calendar.timegm(today.timetuple())
set_day = "TopTermFreq_set_day_" + str(today_timestamp)
set_week = "TopTermFreq_set_week";
set_month = "TopTermFreq_set_month";
per_paste = request.args.get('per_paste')
if per_paste == "1" or per_paste is None:
per_paste = "per_paste_"
else:
per_paste = ""
the_set = request.args.get('set')
set_day = per_paste + "TopTermFreq_set_day_" + str(today_timestamp)
set_week = per_paste + "TopTermFreq_set_week";
set_month = per_paste + "TopTermFreq_set_month";
the_set = per_paste + request.args.get('set')
num_day = int(request.args.get('num_day'))
if the_set is None:
return "None"
else:
to_return = []
if the_set == "TopTermFreq_set_day":
if "TopTermFreq_set_day" in the_set:
the_set += "_" + str(today_timestamp)
for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20):
@ -229,7 +253,7 @@ def terms_plot_top_data():
position['month'] = position['month']+1 if position['month'] is not None else "<20"
value_range = []
for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay):
value = r_serv_term.hget(timestamp, term)
value = r_serv_term.hget(per_paste+str(timestamp), term)
curr_value_range = int(value) if value is not None else 0
value_range.append([timestamp, curr_value_range])

View file

@ -12,6 +12,7 @@
<link href="{{ url_for('static', filename='font-awesome/css/font-awesome.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/sb-admin-2.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.css') }}" rel="stylesheet" type="text/css" />
<link href="{{ url_for('static', filename='css/switch_checkbox.css') }}" rel="stylesheet" type="text/css" />
<script language="javascript" src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js') }}"></script>
@ -91,6 +92,13 @@
<div class="col-lg-12">
<div class="row">
<div class="col-lg-12">
<label class="switch">
<input id="per_paste" class="switch-input" value="per_paste" type="checkbox" onclick="reload_per_paste()">
<span class="switch-label" data-on="On" data-off="Off"></span>
<span class="switch-handle"></span>
</label>
<strong style="top: 3px; position: relative;">1 term per paste</strong>
<div id="panel-today" class="panel panel-success">
<div class="panel-heading">
<strong>Manage tracked terms</strong>
@ -116,21 +124,19 @@
</tr>
</thead>
<tbody>
{% set i = 0 %}
{% for term in track_list %}
<tr>
<td>{{ term }}</td>
<td>{{ track_list_values[i][3] }}</td>
<td>{{ track_list_values[i][0] }}</td>
<td>{{ track_list_values[i][1] }}</td>
<td>{{ track_list_values[i][2] }}</td>
<td>{{ track_list_num_of_paste[i] }}</td>
<td>{{ track_list_values[loop.index0][3] }}</td>
<td>{{ track_list_values[loop.index0][0] }}</td>
<td>{{ track_list_values[loop.index0][1] }}</td>
<td>{{ track_list_values[loop.index0][2] }}</td>
<td>{{ track_list_num_of_paste[loop.index0] }}</td>
<td><p style="margin: 0px;">
<span data-toggle="modal" data-target="#mymodal" data-term="{{ term }}" ><button class="btn-link" data-toggle="tooltip" data-placement="right" title="Show concerned paste(s)"><span class="glyphicon glyphicon-info-sign"></span></button></span>
<button class="btn-link btn-interaction" data-toggle="tooltip" data-placement="left" title="Remove this term" data-content="{{ term }}" data-section="followTerm" data-action="delete"><span class="glyphicon glyphicon-trash"></span></button>
</p></td>
</tr>
{% set i = i + 1 %}
{% endfor %}
</tbody>
</table>
@ -162,22 +168,20 @@
<table class="table table-striped table-bordered table-hover" id="myTable2">
<thead>
<tr>
<th style="max-width: 800px;">Termx</th>
<th style="max-width: 800px;">Term</th>
<th>Added date</th>
<th>Action</th>
</tr>
</thead>
<tbody>
{% set i = 0 %}
{% for term in black_list %}
{% for term, date in black_list %}
<tr>
<td>{{ black_list[i][0] }}</td>
<td>{{ black_list[i][1] }}</td>
<td>{{ term }}</td>
<td>{{ date }}</td>
<td><p style="margin: 0px;">
<button class="btn-link btn-interaction" data-toggle="tooltip" data-placement="right" title="Remove this term" data-content="{{ black_list[i][0] }}" data-section="blacklistTerm" data-action="delete"><span class="glyphicon glyphicon-trash"></span></button>
<button class="btn-link btn-interaction" data-toggle="tooltip" data-placement="right" title="Remove this term" data-content="{{ term }}" data-section="blacklistTerm" data-action="delete"><span class="glyphicon glyphicon-trash"></span></button>
</p></td>
</tr>
{% set i = i + 1 %}
{% endfor %}
</tbody>
</table>
@ -198,13 +202,21 @@
<!-- import graph function -->
<script>
function reload_per_paste() {
var checked = $("#per_paste").prop( "checked" ) ? 1 : 0;
window.location.href = {{ url_for('terms_management') }}+"?per_paste="+checked;
}
var table_track;
var table_black;
$(document).ready(function(){
activePage = $('h1.page-header').attr('data-page');
$("#"+activePage).addClass("active");
if({{ per_paste }} == 1) {
$("#per_paste").attr('checked', true)
}
$('[data-toggle="tooltip"]').tooltip();
table_track = $('#myTable').DataTable();

View file

@ -13,6 +13,7 @@
<link href="{{ url_for('static', filename='css/sb-admin-2.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.css') }}" rel="stylesheet" type="text/css" />
<link href="{{ url_for('static', filename='css/jquery-ui.min.css') }}" rel="stylesheet" type="text/css" />
<link href="{{ url_for('static', filename='css/switch_checkbox.css') }}" rel="stylesheet" type="text/css" />
<script language="javascript" src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js') }}"></script>
@ -81,6 +82,15 @@
<button id="plot-btn-add" class="btn btn-success" style="margin-left: 6px;"><span class="fa fa-plus"></span> Add the term to the chart</button>
</div>
<div class='pull_right' style="float: right;">
<label class="switch">
<input id="per_paste" class="switch-input" value="per_paste" type="checkbox" checked>
<span class="switch-label" data-on="On" data-off="Off"></span>
<span class="switch-handle"></span>
</label>
<strong style="top: 3px; position: relative;">1 term per paste</strong>
</div>
</div>
<!-- /.panel-body -->
</div>
@ -202,8 +212,9 @@ function plotData() {
plotted_terms = [term]
var range_start = new Date($( ".sliderRange" ).slider( "values", 0 )).getTime() / 1000;
var range_end = new Date($( ".sliderRange" ).slider( "values", 1 )).getTime() / 1000;
var checked = $("#per_paste").prop( "checked" ) ? 1 : 0;
$.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term }, function(data, status){
$.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term, per_paste: checked }, function(data, status){
graph_data = [];
var to_plot = [];
var curr_data = [];
@ -239,8 +250,9 @@ function addData() {
plotted_terms.push(term)
var range_start = new Date($( ".sliderRange" ).slider( "values", 0 )).getTime() / 1000;
var range_end = new Date($( ".sliderRange" ).slider( "values", 1 )).getTime() / 1000;
var checked = $("#per_paste").prop( "checked" ) ? 1 : 0;
$.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term }, function(data, status){
$.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term, per_paste: checked }, function(data, status){
var curr_data = [];
for(i=1; i<data.length; i++) {
curr_data.push([data[i][0]*1000, data[i][1]]);
@ -259,8 +271,9 @@ function replot() {
var term = plotted_terms[i];
var range_start = new Date($( ".sliderRange" ).slider( "values", 0 )).getTime() / 1000;
var range_end = new Date($( ".sliderRange" ).slider( "values", 1 )).getTime() / 1000;
var checked = $("#per_paste").prop( "checked" ) ? 1 : 0;
promises.push($.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term }, function(data, status){
promises.push($.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term, per_paste: checked }, function(data, status){
var curr_data = [];
for(i=1; i<data.length; i++) {
curr_data.push([data[i][0]*1000, data[i][1]]);
@ -279,5 +292,3 @@ function replot() {
</script>
</body>
</html>

View file

@ -12,6 +12,7 @@
<link href="{{ url_for('static', filename='font-awesome/css/font-awesome.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/sb-admin-2.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.css') }}" rel="stylesheet" type="text/css" />
<link href="{{ url_for('static', filename='css/switch_checkbox.css') }}" rel="stylesheet" type="text/css" />
<script language="javascript" src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js') }}"></script>
@ -67,6 +68,14 @@
<div class="col-lg-12">
<div class="row">
<div class="col-lg-12">
<div class='pull_right'>
<label class="switch">
<input id="per_paste" class="switch-input" value="per_paste" type="checkbox" onclick="reload_per_paste()">
<span class="switch-label" data-on="On" data-off="Off"></span>
<span class="switch-handle"></span>
</label>
<strong style="top: 3px; position: relative;">1 term per paste</strong>
<div>
<div id="panel-today" class="panel panel-info">
<div class="panel-heading">
<strong>Today</strong>
@ -281,7 +290,15 @@
$(document).ready(function(){
activePage = $('h1.page-header').attr('data-page');
$("#"+activePage).addClass("active");
if({{ per_paste }} == 1) {
$("#per_paste").attr('checked', true)
}
});
function reload_per_paste() {
var checked = $("#per_paste").prop( "checked" ) ? 1 : 0;
window.location.href = {{ url_for('terms_plot_top') }}+"?per_paste="+checked;
}
</script>
@ -332,6 +349,7 @@ set_today = "TopTermFreq_set_day";
set_week = "TopTermFreq_set_week";
set_month = "TopTermFreq_set_month";
default_num_curves = 8;
per_paste = {{ per_paste }}
var plot_today;
var plot_week;
@ -339,7 +357,7 @@ var plot_month;
var promises = []; // Used to know when everything has been received
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_today, num_day: 5 }, function(data, status){
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_today, num_day: 5, per_paste: per_paste }, function(data, status){
data.sort(function(a, b){return b[2]-a[2];});
// Sort data
var table_today = $("#table-today")
@ -380,7 +398,7 @@ promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_today
}));
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_week, num_day: 7 }, function(data, status){
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_week, num_day: 7, per_paste: per_paste }, function(data, status){
data.sort(function(a, b){return b[2]-a[2];});
// Sort data
var table = $("#table-week")
@ -420,7 +438,7 @@ promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_week,
});
}));
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_month, num_day: 31 }, function(data, status){
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_month, num_day: 31, per_paste: per_paste }, function(data, status){
data.sort(function(a, b){return b[2]-a[2];});
// Sort data
var table = $("#table-month")