Added draft of filter in sentiment analysis (Discard syntaxical languages) + Added nice tooltip for sparkline. Trending displays avg in function of the number of elements processed and not for the complete week + fixed bug in gauge and canvasjs (was performing avg with only 1 graph instead of all 8).

This commit is contained in:
Mokaddem 2016-08-16 16:33:02 +02:00
parent ecd834ffb6
commit 1084e45f1b
4 changed files with 156 additions and 91 deletions

View file

@ -15,6 +15,7 @@ import time
import datetime import datetime
import calendar import calendar
import redis import redis
import json
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
from packages import Paste from packages import Paste
@ -22,6 +23,8 @@ from packages import Paste
from nltk.sentiment.vader import SentimentIntensityAnalyzer from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk import tokenize from nltk import tokenize
# Config Variables
accepted_Mime_type = ['text/plain']
def Analyse(message, server): def Analyse(message, server):
#print 'analyzing' #print 'analyzing'
@ -31,68 +34,84 @@ def Analyse(message, server):
content = paste.get_p_content() content = paste.get_p_content()
provider = paste.p_source provider = paste.p_source
p_date = str(paste._get_p_date()) p_date = str(paste._get_p_date())
#print provider, date p_MimeType = paste._get_p_encoding()
the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8])) # Perform further analysis
#print 'pastedate: ', the_date if p_MimeType == "text/plain":
the_time = datetime.datetime.now() if isJSON(content):
the_time = datetime.time(getattr(the_time, 'hour'), 0, 0) p_MimeType = "JSON"
#print 'now: ', the_time
combined_datetime = datetime.datetime.combine(the_date, the_time)
#print 'combined: ', combined_datetime
timestamp = calendar.timegm(combined_datetime.timetuple())
#print 'timestamp: ', timestamp
sentences = tokenize.sent_tokenize(content.decode('utf-8', 'ignore')) if p_MimeType in accepted_Mime_type:
#print len(sentences) print 'Processing', path
the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8]))
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} #print 'pastedate: ', the_date
neg_line = 0 the_time = datetime.datetime.now()
pos_line = 0 the_time = datetime.time(getattr(the_time, 'hour'), 0, 0)
sid = SentimentIntensityAnalyzer() #print 'now: ', the_time
for sentence in sentences: combined_datetime = datetime.datetime.combine(the_date, the_time)
ss = sid.polarity_scores(sentence) #print 'combined: ', combined_datetime
for k in sorted(ss): timestamp = calendar.timegm(combined_datetime.timetuple())
if k == 'compound': #print 'timestamp: ', timestamp
if ss['neg'] > ss['pos']:
avg_score['compoundNeg'] += ss[k] sentences = tokenize.sent_tokenize(content.decode('utf-8', 'ignore'))
neg_line += 1 #print len(sentences)
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
neg_line = 0
pos_line = 0
sid = SentimentIntensityAnalyzer()
for sentence in sentences:
ss = sid.polarity_scores(sentence)
for k in sorted(ss):
if k == 'compound':
if ss['neg'] > ss['pos']:
avg_score['compoundNeg'] += ss[k]
neg_line += 1
else:
avg_score['compoundPos'] += ss[k]
pos_line += 1
else: else:
avg_score['compoundPos'] += ss[k] avg_score[k] += ss[k]
pos_line += 1
else: #print('{0}: {1}, '.format(k, ss[k]))
avg_score[k] += ss[k]
for k in avg_score:
if k == 'compoundPos':
avg_score[k] = avg_score[k] / (pos_line if pos_line > 0 else 1)
elif k == 'compoundNeg':
avg_score[k] = avg_score[k] / (neg_line if neg_line > 0 else 1)
else:
avg_score[k] = avg_score[k] / len(sentences)
# In redis-levelDB: {} = set, () = K-V
# {Provider_set -> provider_i}
# {Provider_TimestampInHour_i -> UniqID_i}_j
# (UniqID_i -> PasteValue_i)
server.sadd('Provider_set', provider)
#print 'Provider_set', provider
provider_timestamp = provider + '_' + str(timestamp)
#print provider_timestamp
server.incr('UniqID')
UniqID = server.get('UniqID')
print provider_timestamp, '->', UniqID
server.sadd(provider_timestamp, UniqID)
server.set(UniqID, avg_score)
print avg_score
#print UniqID, '->', avg_score
else:
print 'Dropped:', p_MimeType
#print('{0}: {1}, '.format(k, ss[k])) def isJSON(content):
try:
json.loads(content)
return True
for k in avg_score: except Exception,e:
if k == 'compoundPos': return False
avg_score[k] = avg_score[k] / (pos_line if pos_line > 0 else 1)
elif k == 'compoundNeg':
avg_score[k] = avg_score[k] / (neg_line if neg_line > 0 else 1)
else:
avg_score[k] = avg_score[k] / len(sentences)
# In redis-levelDB: {} = set, () = K-V
# {Provider_set -> provider_i}
# {Provider_TimestampInHour_i -> UniqID_i}_j
# (UniqID_i -> PasteValue_i)
server.sadd('Provider_set', provider)
#print 'Provider_set', provider
provider_timestamp = provider + '_' + str(timestamp)
#print provider_timestamp
server.incr('UniqID')
UniqID = server.get('UniqID')
print provider_timestamp, '->', UniqID
server.sadd(provider_timestamp, UniqID)
server.set(UniqID, avg_score)
#print UniqID, '->', avg_score
#print '(', provider, timestamp, str(avg_score) , ')'
#server.hset(provider, timestamp, str(avg_score))
if __name__ == '__main__': if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
@ -102,7 +121,7 @@ if __name__ == '__main__':
publisher.channel = 'Script' publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg # Section name in bin/packages/modules.cfg
config_section = 'SentimentAnalyser' config_section = 'SentimentAnalysis'
# Setup the I/O queues # Setup the I/O queues
p = Process(config_section) p = Process(config_section)

View file

@ -66,8 +66,8 @@ subscribe = Redis_BrowseWarningPaste
#subscribe = Redis_Cve #subscribe = Redis_Cve
#publish = Redis_BrowseWarningPaste #publish = Redis_BrowseWarningPaste
[SentimentAnalyser] [SentimentAnalysis]
subscribe = Redis_LinesLong subscribe = Redis_Global
[Release] [Release]
subscribe = Redis_Global subscribe = Redis_Global

View file

@ -497,7 +497,7 @@ def sentiment_analysis_plot_tool_getdata():
timestamp1 = calendar.timegm(date1.timetuple()) timestamp1 = calendar.timegm(date1.timetuple())
timestamp2 = calendar.timegm(date2.timetuple()) timestamp2 = calendar.timegm(date2.timetuple())
print timestamp2
oneHour = 60*60 oneHour = 60*60
oneDay = oneHour*24 oneDay = oneHour*24

View file

@ -1,4 +1,26 @@
function generate_offset_to_time(num){
var to_ret = {};
for(i=0; i<=num; i++)
to_ret[i] = new Date().getHours()-(23-i)+'h';
return to_ret;
};
function generate_offset_to_date(day){
var now = new Date();
var to_ret = {};
for(i=0; i<day; i++){
for(j=0; j<24; j++){
var t1 =now.getDate()-i + ":";
var t2 =now.getHours()-(23-j)+"h";
to_ret[j+24*i] = t1+t2;
}
}
return to_ret;
};
var offset_to_time = generate_offset_to_time(23);
var offset_to_date = generate_offset_to_date(7);
var sparklineOptions = { var sparklineOptions = {
height: 80,//Height of the chart - Defaults to 'auto' (line height of the containing tag) height: 80,//Height of the chart - Defaults to 'auto' (line height of the containing tag)
@ -13,6 +35,7 @@
negBarColor: '#f22929', negBarColor: '#f22929',
zeroColor: '#ffff00', zeroColor: '#ffff00',
tooltipFormat: '<span style="color: {{color}}">&#9679;</span> {{offset:names}}, {{value}} </span>',
}; };
@ -37,7 +60,9 @@ $.getJSON("/sentiment_analysis_getplotdata/",
var spark_data = []; var spark_data = [];
var curr_provider = array_provider[graphNum]; var curr_provider = array_provider[graphNum];
var curr_sum = 0.0; var curr_sum = 0.0;
var curr_sum_elem = 0.0;
var day_sum = 0.0; var day_sum = 0.0;
var day_sum_elem = 0.0;
var hour_sum = 0.0; var hour_sum = 0.0;
for(curr_date=dateStart; curr_date<dateStart+oneWeek; curr_date+=oneHour){ for(curr_date=dateStart; curr_date<dateStart+oneWeek; curr_date+=oneHour){
@ -71,10 +96,12 @@ $.getJSON("/sentiment_analysis_getplotdata/",
graph_data.push({'neg': neg, 'neu': neu, 'pos': pos, 'compoundPos': compPosAvg, 'compoundNeg': compNegAvg}); graph_data.push({'neg': neg, 'neu': neu, 'pos': pos, 'compoundPos': compPosAvg, 'compoundNeg': compNegAvg});
spark_data.push(pos-neg); spark_data.push(pos-neg);
curr_sum += (pos-neg); curr_sum += (pos-neg);
curr_sum_elem++;
max_value = Math.abs(pos-neg) > max_value ? Math.abs(pos-neg) : max_value; max_value = Math.abs(pos-neg) > max_value ? Math.abs(pos-neg) : max_value;
if(curr_date >= dateStart+oneWeek-24*oneHour){ if(curr_date >= dateStart+oneWeek-24*oneHour){
day_sum += (pos-neg); day_sum += (pos-neg);
day_sum_elem++;
} }
if(curr_date >= dateStart+oneWeek-oneHour){ if(curr_date >= dateStart+oneWeek-oneHour){
hour_sum += (pos-neg); hour_sum += (pos-neg);
@ -85,7 +112,8 @@ $.getJSON("/sentiment_analysis_getplotdata/",
all_graph_day_sum += day_sum; all_graph_day_sum += day_sum;
all_graph_hour_sum += hour_sum; all_graph_hour_sum += hour_sum;
var curr_avg = curr_sum / (oneWeek/oneHour); var curr_avg = curr_sum / (curr_sum_elem);
//var curr_avg = curr_sum / (oneWeek/oneHour);
//var curr_avg = curr_sum / (spark_data.length); //var curr_avg = curr_sum / (spark_data.length);
graph_avg.push([curr_provider, curr_avg]); graph_avg.push([curr_provider, curr_avg]);
plot_data.push(spark_data); plot_data.push(spark_data);
@ -94,6 +122,8 @@ $.getJSON("/sentiment_analysis_getplotdata/",
sparklineOptions.chartRangeMax = max_value; sparklineOptions.chartRangeMax = max_value;
sparklineOptions.chartRangeMin = -max_value; sparklineOptions.chartRangeMin = -max_value;
sparklineOptions.tooltipValueLookups = { names: offset_to_date};
// print week // print week
var num = graphNum + 1; var num = graphNum + 1;
var placeholder = '.sparkLineStatsWeek' + num; var placeholder = '.sparkLineStatsWeek' + num;
@ -102,12 +132,15 @@ $.getJSON("/sentiment_analysis_getplotdata/",
$(placeholder+'s').text(curr_avg.toFixed(5)); $(placeholder+'s').text(curr_avg.toFixed(5));
sparklineOptions.barWidth = 18; sparklineOptions.barWidth = 18;
sparklineOptions.tooltipFormat = '<span style="color: {{color}}">&#9679;</span> Avg: {{value}} </span>'
$(placeholder+'b').sparkline([curr_avg], sparklineOptions); $(placeholder+'b').sparkline([curr_avg], sparklineOptions);
sparklineOptions.tooltipFormat = '<span style="color: {{color}}">&#9679;</span> {{offset:names}}, {{value}} </span>'
sparklineOptions.barWidth = 2; sparklineOptions.barWidth = 2;
sparklineOptions.tooltipValueLookups = { names: offset_to_time};
// print today // print today
var data_length = plot_data[graphNum].length; var data_length = plot_data[graphNum].length;
var data_today = plot_data[graphNum].slice(data_length-24, data_length-1); var data_today = plot_data[graphNum].slice(data_length-24, data_length);
placeholder = '.sparkLineStatsToday' + num; placeholder = '.sparkLineStatsToday' + num;
sparklineOptions.barWidth = 14; sparklineOptions.barWidth = 14;
@ -115,9 +148,13 @@ $.getJSON("/sentiment_analysis_getplotdata/",
$(placeholder+'t').text(curr_provider); $(placeholder+'t').text(curr_provider);
sparklineOptions.barWidth = 18; sparklineOptions.barWidth = 18;
$(placeholder+'b').sparkline([day_sum/24], sparklineOptions); sparklineOptions.tooltipFormat = '<span style="color: {{color}}">&#9679;</span> Avg: {{value}} </span>'
//var day_avg = day_sum/24;
var day_avg = day_sum/day_sum_elem;
$(placeholder+'b').sparkline([day_avg], sparklineOptions);
sparklineOptions.tooltipFormat = '<span style="color: {{color}}">&#9679;</span> {{offset:names}}, {{value}} </span>'
sparklineOptions.barWidth = 2; sparklineOptions.barWidth = 2;
$(placeholder+'s').text((day_sum/24).toFixed(5)); $(placeholder+'s').text((day_avg).toFixed(5));
}//for loop }//for loop
@ -153,13 +190,15 @@ $.getJSON("/sentiment_analysis_getplotdata/",
gaugeOptions.appendTo = '#gauge_today_last_hour'; gaugeOptions.appendTo = '#gauge_today_last_hour';
gaugeOptions.dialLabel = 'Last hour'; gaugeOptions.dialLabel = 'Last hour';
gaugeOptions.elementId = 'gauge1'; gaugeOptions.elementId = 'gauge1';
gaugeOptions.inc = all_graph_hour_sum / 8; var piePercent = (all_graph_hour_sum / 8) / max_value;
gaugeOptions.inc = piePercent;
var gauge_today_last_hour = new FlexGauge(gaugeOptions); var gauge_today_last_hour = new FlexGauge(gaugeOptions);
gaugeOptions2.appendTo = '#gauge_today_last_days'; gaugeOptions2.appendTo = '#gauge_today_last_days';
gaugeOptions2.dialLabel = 'Today'; gaugeOptions2.dialLabel = 'Today';
gaugeOptions2.elementId = 'gauge2'; gaugeOptions2.elementId = 'gauge2';
gaugeOptions2.inc = all_graph_day_sum / 8; piePercent = (all_graph_day_sum / (8*24)) / max_value;
gaugeOptions2.inc = piePercent;
var gauge_today_last_days = new FlexGauge(gaugeOptions2); var gauge_today_last_days = new FlexGauge(gaugeOptions2);
gaugeOptions3.appendTo = '#gauge_week'; gaugeOptions3.appendTo = '#gauge_week';
@ -167,10 +206,14 @@ $.getJSON("/sentiment_analysis_getplotdata/",
gaugeOptions3.elementId = 'gauge3'; gaugeOptions3.elementId = 'gauge3';
var graph_avg_sum = 0.0; var graph_avg_sum = 0.0;
for (i=0; i<graph_avg.length; i++) var temp_max_val = 0.0;
for (i=0; i<graph_avg.length; i++){
graph_avg_sum += graph_avg[i][1]; graph_avg_sum += graph_avg[i][1];
temp_max_val = Math.abs(graph_avg[i][1]) > temp_max_val ? Math.abs(graph_avg[i][1]) : temp_max_val;
}
gaugeOptions3.inc = graph_avg_sum / graph_avg.length; piePercent = (graph_avg_sum / graph_avg.length) / temp_max_val;
gaugeOptions3.inc = piePercent;
var gauge_today_last_days = new FlexGauge(gaugeOptions3); var gauge_today_last_days = new FlexGauge(gaugeOptions3);
@ -185,21 +228,24 @@ $.getJSON("/sentiment_analysis_getplotdata/",
/* ----------- CanvasJS ------------ */ /* ----------- CanvasJS ------------ */
var gauge_data = graph_data.slice(graph_data.length-24*2, graph_data.length-24*1);
var comp_sum_day_pos = 0.0; var comp_sum_day_pos = 0.0;
var comp_sum_day_neg = 0.0; var comp_sum_day_neg = 0.0;
var comp_sum_hour_pos = 0.0; var comp_sum_hour_pos = 0.0;
var comp_sum_hour_neg = 0.0; var comp_sum_hour_neg = 0.0;
for (i=1; i< gauge_data.length; i++){ for(graphNum=0; graphNum<8; graphNum++){
comp_sum_day_pos += gauge_data[i].compoundPos; curr_graphData = all_data[graphNum];
comp_sum_day_neg += gauge_data[i].compoundNeg; var gauge_data = curr_graphData.slice(curr_graphData.length-24, curr_graphData.length);
for (i=1; i< gauge_data.length; i++){
comp_sum_day_pos += gauge_data[i].compoundPos;
comp_sum_day_neg += gauge_data[i].compoundNeg;
if(i >= 24){ if(i == 23){
comp_sum_hour_pos += gauge_data[i].compoundPos; comp_sum_hour_pos += gauge_data[i].compoundPos;
comp_sum_hour_neg += gauge_data[i].compoundNeg; comp_sum_hour_neg += gauge_data[i].compoundNeg;
}
} }
}
}
var options_canvasJS_1 = { var options_canvasJS_1 = {
@ -216,20 +262,20 @@ $.getJSON("/sentiment_analysis_getplotdata/",
labelFontSize: 0.1, labelFontSize: 0.1,
}, },
data: [ data: [
{ {
type: "bar", type: "bar",
color: "green", color: "green",
dataPoints: [ dataPoints: [
{y: comp_sum_hour_pos/8} {y: comp_sum_hour_pos/8}
] ]
}, },
{ {
type: "bar", type: "bar",
color: "red", color: "red",
dataPoints: [ dataPoints: [
{y: comp_sum_hour_neg/8} {y: comp_sum_hour_neg/8}
] ]
} }
] ]
}; };