Added sentiment analyser module (draft)
7 changed files with 486 additions and 88 deletions
@ -151,6 +151,8 @@ function launching_scripts {
screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
sleep 0.1
screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x'
#If no params, display the help
Normal file → Executable file
Normal file → Executable file
@ -1,7 +1,10 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
Template for new modules
Sentiment analyser module.
It takes its inputs from 'shortLine' and 'longLine'.
Source code is taken into account (in case of comments). If it is only source code,
it will be treated with a neutral value anyway.
nltk.sentiment.vader module:
Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text. Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
@ -9,27 +12,87 @@ nltk.sentiment.vader module:
import time
import datetime
import calendar
import redis
from pubsublogger import publisher
from Helper import Process
from packages import Paste
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk import tokenize
def Analyse(message):
def Analyse(message, server):
#print 'analyzing'
path = message
paste = Paste.paste(path)
content = paste.p_get_content()
paste = Paste.Paste(path)
content = paste.get_p_content()
provider = paste.p_source
p_date = str(paste._get_p_date())
#print provider, date
the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8]))
#print 'pastedate: ', the_date
the_time = datetime.datetime.now()
the_time = datetime.time(getattr(the_time, 'hour'), 0, 0)
#print 'now: ', the_time
combined_datetime = datetime.datetime.combine(the_date, the_time)
#print 'combined: ', combined_datetime
timestamp = calendar.timegm(combined_datetime.timetuple())
#print 'timestamp: ', timestamp
sentences = tokenize.sent_tokenize(content.decode('utf-8', 'ignore'))
#print len(sentences)
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
neg_line = 0
pos_line = 0
sid = SentimentIntensityAnalyzer()
for sentence in sentences:
ss = sid.polarity_scores(sentence)
for k in sorted(ss):
print('{0}: {1}, '.format(k, ss[k]))
print ''
if k == 'compound':
if ss['neg'] > ss['pos']:
avg_score['compoundNeg'] += ss[k]
neg_line += 1
avg_score['compoundPos'] += ss[k]
pos_line += 1
avg_score[k] += ss[k]
#print('{0}: {1}, '.format(k, ss[k]))
for k in avg_score:
if k == 'compoundPos':
avg_score[k] = avg_score[k] / (pos_line if pos_line > 0 else 1)
elif k == 'compoundNeg':
avg_score[k] = avg_score[k] / (neg_line if neg_line > 0 else 1)
avg_score[k] = avg_score[k] / len(sentences)
# In redis-levelDB: {} = set, () = K-V
# {Provider_set -> provider_i}
# {Provider_TimestampInHour_i -> UniqID_i}_j
# (UniqID_i -> PasteValue_i)
server.sadd('Provider_set', provider)
#print 'Provider_set', provider
provider_timestamp = provider + '_' + str(timestamp)
#print provider_timestamp
UniqID = server.get('UniqID')
print provider_timestamp, '->', UniqID
server.sadd(provider_timestamp, UniqID)
server.set(UniqID, avg_score)
#print UniqID, '->', avg_score
#print '(', provider, timestamp, str(avg_score) , ')'
#server.hset(provider, timestamp, str(avg_score))
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
@ -39,7 +102,7 @@ if __name__ == '__main__':
publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg
config_section = '<section name>'
config_section = 'SentimentAnalyser'
# Setup the I/O queues
p = Process(config_section)
@ -47,6 +110,12 @@ if __name__ == '__main__':
# Sent to the logging a description of the module
publisher.info("<description of the module>")
server = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Sentiment", "host"),
port=p.config.get("Redis_Level_DB_Sentiment", "port"),
db=p.config.get("Redis_Level_DB_Sentiment", "db"))
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
@ -57,4 +126,4 @@ if __name__ == '__main__':
# Do something with the message from the queue
Analyse(message, server)
@ -82,6 +82,7 @@ def compute_progression(server, field_name, num_day, url_parsed):
member_set = []
for keyw in server.smembers(redis_progression_name_set):
member_set += (keyw, int(server.hget(redis_progression_name, keyw)))
print member_set
member_set.sort(key=lambda tup: tup[1])
if member_set[0] < keyword_increase:
#remove min from set and add the new one
@ -66,6 +66,9 @@ subscribe = Redis_BrowseWarningPaste
#subscribe = Redis_Cve
#publish = Redis_BrowseWarningPaste
subscribe = Redis_LinesLong
subscribe = Redis_Global
@ -5,6 +5,7 @@ import redis
import ConfigParser
import json
import datetime
import calendar
from flask import Flask, render_template, jsonify, request
import flask
import os
@ -49,6 +50,12 @@ r_serv_db = redis.StrictRedis(
port=cfg.getint("Redis_Level_DB", "port"),
db=cfg.getint("Redis_Level_DB", "db"))
r_serv_sentiment = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_Sentiment", "host"),
port=cfg.getint("Redis_Level_DB_Sentiment", "port"),
db=cfg.getint("Redis_Level_DB_Sentiment", "db"))
app = Flask(__name__, static_url_path='/static/')
@ -433,6 +440,36 @@ def moduletrending():
def sentiment_analysis_trending():
return render_template("sentiment_analysis_trending.html")
def sentiment_analysis_getplotdata():
# Get the top providers based on number of pastes
oneHour = 60*60
sevenDays = oneHour*24*7
dateStart = datetime.datetime.now()
dateStart = dateStart.replace(minute=0, second=0, microsecond=0)
dateStart_timestamp = calendar.timegm(dateStart.timetuple())
to_return = {}
for cur_provider in r_serv_charts.smembers('providers_set'):
cur_provider_name = cur_provider + '_'
list_date = {}
for cur_timestamp in range(int(dateStart_timestamp), int(dateStart_timestamp)-sevenDays-oneHour, -oneHour):
cur_set_name = cur_provider_name + str(cur_timestamp)
list_value = []
for cur_id in r_serv_sentiment.smembers(cur_set_name):
cur_value = r_serv_sentiment.get(cur_id)
list_date[cur_timestamp] = list_value
to_return[cur_provider] = list_date
return jsonify(to_return)
def sentiment_analysis_plot_tool():
return render_template("sentiment_analysis_plot_tool.html")
@ -1,4 +1,117 @@
/* ---------- Sparkline Charts ---------- */
//generate random number for charts
randNum = function(){
var num = Math.random();
if(num > 0.5)
num = -1+num;
return Math.floor(num*101);
//return (Math.floor( Math.random()* (1+40-20) ) ) + 20;
var sparklineOptions = {
height: 80,//Height of the chart - Defaults to 'auto' (line height of the containing tag)
chartRangeMin: -1,
chartRangeMax: 1,
type: 'bar',
barSpacing: 0,
barWidth: 2,
barColor: '#00bf5f',
negBarColor: '#f22929',
zeroColor: '#ffff00'
function(data) {
var all_plot_data = [];
var plot_data = [];
var array_provider = Object.keys(data);
var dates_providers = Object.keys(data[array_provider[0]]);
var dateStart = parseInt(dates_providers[0]);
var oneHour = 60*60;
var oneWeek = oneHour*24*7;
for (graphNum=0; graphNum<8; graphNum++) {
var graph_data = [];
var spark_data = [];
var curr_provider = array_provider[graphNum];
for(curr_date=dateStart; curr_date<dateStart+oneWeek+oneHour; curr_date+=oneHour){
var data_array = data[curr_provider][curr_date];
if (data_array.length == 0){
graph_data.push({'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0});
} else { //compute avg
var compPosAvg = 0;
var compNegAvg = 0;
var pos = 0;
var neg = 0;
var neu = 0;
for(i=0; i<data_array.length; i++){
//console.log(data_array[i].replace(/\'/g, '\"'));
var curr_data = jQuery.parseJSON(data_array[i].replace(/\'/g, '\"'));
compPosAvg += curr_data['compoundPos'];
compNegAvg += curr_data['compoundNeg'];
pos += curr_data['pos'];
neg += curr_data['neg'];
neu += curr_data['neu'];
compPosAvg = compPosAvg/data_array.length;
compNegAvg = compNegAvg/data_array.length;
pos = pos/data_array.length;
neg = neg/data_array.length;
neu = neu/data_array.length;
graph_data.push({'neg': neg, 'neu': neu, 'pos': pos, 'compoundPos': compPosAvg, 'compoundNeg': compNegAvg});
var num = graphNum + 1;
placeholder = '.sparkLineStatsWeek' + num;
//$(placeholder).sparkline([1,2,3,9], sparklineOptions);
$(placeholder).sparkline(plot_data[graphNum], sparklineOptions);
sparklineOptions.barWidth = 7;
$(placeholder+'b').sparkline([0.7], sparklineOptions);
sparklineOptions.barWidth = 2;
}//for loop
//sparklines (making loop with random data for all 10 sparkline)
for (i=1; i<10; i++) {
var data = [3+randNum(), 5+randNum(), 8+randNum()];
placeholder = '.sparkLineStatsToday' + i;
$(placeholder).sparkline(data, sparklineOptions);
sparklineOptions.barWidth = 7;
$(placeholder+'b').sparkline([0.7], sparklineOptions);
sparklineOptions.barWidth = 2;
/* ---------------- Gauge ---------------- */
var gaugeOptions = {
@ -30,7 +143,7 @@ var gaugeOptions3 = jQuery.extend(true, {}, gaugeOptions);
gaugeOptions.appendTo = '#gauge_today_last_hour';
gaugeOptions.dialLabel = 'Last hour';
gaugeOptions.elementId = 'gauge1';
gaugeOptions.inc = -0.7;
gaugeOptions.inc = -0.9;
var gauge_today_last_hour = new FlexGauge(gaugeOptions);
gaugeOptions2.appendTo = '#gauge_today_last_days';
@ -52,53 +165,6 @@ var gauge_today_last_days = new FlexGauge(gaugeOptions3);
/* ---------- Sparkline Charts ---------- */
//generate random number for charts
randNum = function(){
var num = Math.random();
if(num > 0.5)
num = -1+num;
return Math.floor(num*101);
//return (Math.floor( Math.random()* (1+40-20) ) ) + 20;
var sparklineOptions = {
width: 250,//Width of the chart - Defaults to 'auto' - May be any valid css width - 1.5em, 20px, etc (using a number without a unit specifier won't do what you want) - This option does nothing for bar and tristate chars (see barWidth)
height: 80,//Height of the chart - Defaults to 'auto' (line height of the containing tag)
type: 'bar',
barSpacing: 0,
barWidth: 10,
barColor: '#00bf5f',
negBarColor: '#f22929',
zeroColor: '#ffff00'
//sparklines (making loop with random data for all 10 sparkline)
for (i=1; i<10; i++) {
var data = [3+randNum(), 5+randNum(), 8+randNum(), 11+randNum(),14+randNum(),17+randNum(),20+randNum(),15+randNum(),18+randNum(),22+randNum()];
placeholder = '.sparkLineStatsToday' + i;
$(placeholder).sparkline(data, sparklineOptions);
//sparklines (making loop with random data for all 10 sparkline)
for (i=1; i<10; i++) {
var data = [3+randNum(), 5+randNum(), 8+randNum(), 11+randNum(),14+randNum(),17+randNum(),20+randNum(),15+randNum(),18+randNum(),22+randNum()];
placeholder = '.sparkLineStatsWeek' + i;
$(placeholder).sparkline(data, sparklineOptions);
/* ----------- CanvasJS ------------ */
var options_canvasJS = {
@ -21,12 +21,16 @@
strong {
font-size: 16px;
.table {
margin-bottom: 0px;
.sparkLineStats ul {
.sparkLineStats {
position: relative;
margin-bottom: -4px;
@ -46,9 +50,8 @@
margin-right: 5px;
.wellInside {
background-color: #dedede;
padding: 12px;
.panelInside {
padding: 5px;
.fg-dial-label {
@ -107,26 +110,134 @@
<div class="panel-body">
<!-- left column -->
<div class="col-lg-9 well">
<div class="col-lg-9" style="padding-left: 0px;">
<!-- providers charts -->
<div class="col-lg-6">
<div class="sparkLineStats">
<li class="well wellInside"><div class="sparkLineStatsToday1"></div><div id="line_today_graph1"></div><strong id="providerToday1">Graph 1</strong></li>
<li class="well wellInside"><div class="sparkLineStatsToday2"></div><div id="line_today_graph2"></div><strong id="providerToday2">Graph 2</strong></li>
<li class="well wellInside"><div class="sparkLineStatsToday3"></div><div id="line_today_graph3"></div><strong id="providerToday3">Graph 3</strong></li>
<li class="well wellInside"><div class="sparkLineStatsToday4"></div><div id="line_today_graph4"></div><strong id="providerToday4">Graph 4</strong></li>
<div id="panel-today" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 1</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday1"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday1b"></div></td>
<div id="panel-today" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 2</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday2"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday2b"></div></td>
<div id="panel-today" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 3</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday3"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday3b"></div></td>
<div id="panel-today" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 4</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday4"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday4b"></div></td>
<div class="col-lg-6">
<div class="sparkLineStats">
<li class="well wellInside"><div class="sparkLineStatsToday6"></div><div id="line_today_graph5"></div><strong id="providerToday5">Graph 5</strong></li>
<li class="well wellInside"><div class="sparkLineStatsToday7"></div><div id="line_today_graph6"></div><strong id="providerToday6">Graph 6</strong></li>
<li class="well wellInside"><div class="sparkLineStatsToday8"></div><div id="line_today_graph7"></div><strong id="providerToday7">Graph 7</strong></li>
<li class="well wellInside"><div class="sparkLineStatsToday9"></div><div id="line_today_graph8"></div><strong id="providerToday8">Graph 8</strong></li>
<div id="panel-today" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 5</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday5"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday5b"></div></td>
<div id="panel-today" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 6</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday6"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday6b"></div></td>
<div id="panel-today" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 7</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday7"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday7b"></div></td>
<div id="panel-today" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 8</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday8"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsToday8b"></div></td>
@ -167,26 +278,134 @@
<div class="panel-body">
<!-- left column -->
<div class="col-lg-9 well">
<div class="col-lg-9" style="padding-left: 0px;">
<!-- providers charts -->
<div class="col-lg-6">
<div class="sparkLineStats">
<li class="well wellInside"><div class="sparkLineStatsWeek1"></div><div id="line_week_graph1"></div><strong id="providerWeek1">Graph 1</strong></li>
<li class="well wellInside"><div class="sparkLineStatsWeek2"></div><div id="line_week_graph2"></div><strong id="providerWeek2">Graph 2</strong></li>
<li class="well wellInside"><div class="sparkLineStatsWeek3"></div><div id="line_week_graph3"></div><strong id="providerWeek3">Graph 3</strong></li>
<li class="well wellInside"><div class="sparkLineStatsWeek4"></div><div id="line_week_graph4"></div><strong id="providerWeek4">Graph 4</strong></li>
<div id="panel-week" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 1</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek1"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek1b"></div></td>
<div id="panel-week" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 2</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek2"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek2b"></div></td>
<div id="panel-week" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 3</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek3"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek3b"></div></td>
<div id="panel-week" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 4</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek4"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek4b"></div></td>
<div class="col-lg-6">
<div class="sparkLineStats">
<li class="well wellInside"><div class="sparkLineStatsWeek5"></div><div id="line_week_graph5"></div><strong id="providerWeek5">Graph 5</strong></li>
<li class="well wellInside"><div class="sparkLineStatsWeek6"></div><div id="line_week_graph6"></div><strong id="providerWeek6">Graph 6</strong></li>
<li class="well wellInside"><div class="sparkLineStatsWeek7"></div><div id="line_week_graph7"></div><strong id="providerWeek7">Graph 7</strong></li>
<li class="well wellInside"><div class="sparkLineStatsWeek8"></div><div id="line_week_graph8"></div><strong id="providerWeek8">Graph 8</strong></li>
<div id="panel-week" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 5</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek5"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek5b"></div></td>
<div id="panel-week" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 6</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek6"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek6b"></div></td>
<div id="panel-week" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 7</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek7"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek7b"></div></td>
<div id="panel-week" class="panel panel-default">
<div class="panel-heading">
<strong>Graph 8</strong>
<div class="panel-body panelInside">
<table class="table">
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek8"></div></td>
<td style="border-top: 0px solid #ddd;"><div class="sparkLineStatsWeek8b"></div></td>
@ -252,6 +471,7 @@
<!-- import graph function -->
<script src="{{ url_for('static', filename='js/sentiment_trending.js') }}"></script>
activePage = $('h1.page-header').attr('data-page');
