Added draft of filter in sentiment analysis (Discard syntaxical languages) + Added nice tooltip for sparkline. Trending displays avg in function of the number of elements processed and not for the complete week + fixed bug in gauge and canvasjs (was performing avg with only 1 graph instead of all 8).

This commit is contained in:
Mokaddem 2016-08-16 16:33:02 +02:00
parent ecd834ffb6
commit 1084e45f1b
4 changed files with 156 additions and 91 deletions

View file

@ -15,6 +15,7 @@ import time
import datetime
import calendar
import redis
import json
from pubsublogger import publisher
from Helper import Process
from packages import Paste
@ -22,6 +23,8 @@ from packages import Paste
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk import tokenize
# Config Variables
accepted_Mime_type = ['text/plain']
def Analyse(message, server):
#print 'analyzing'
@ -31,68 +34,84 @@ def Analyse(message, server):
content = paste.get_p_content()
provider = paste.p_source
p_date = str(paste._get_p_date())
#print provider, date
p_MimeType = paste._get_p_encoding()
the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8]))
#print 'pastedate: ', the_date
the_time = datetime.datetime.now()
the_time = datetime.time(getattr(the_time, 'hour'), 0, 0)
#print 'now: ', the_time
combined_datetime = datetime.datetime.combine(the_date, the_time)
#print 'combined: ', combined_datetime
timestamp = calendar.timegm(combined_datetime.timetuple())
#print 'timestamp: ', timestamp
# Perform further analysis
if p_MimeType == "text/plain":
if isJSON(content):
p_MimeType = "JSON"
sentences = tokenize.sent_tokenize(content.decode('utf-8', 'ignore'))
#print len(sentences)
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
neg_line = 0
pos_line = 0
sid = SentimentIntensityAnalyzer()
for sentence in sentences:
ss = sid.polarity_scores(sentence)
for k in sorted(ss):
if k == 'compound':
if ss['neg'] > ss['pos']:
avg_score['compoundNeg'] += ss[k]
neg_line += 1
if p_MimeType in accepted_Mime_type:
print 'Processing', path
the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8]))
#print 'pastedate: ', the_date
the_time = datetime.datetime.now()
the_time = datetime.time(getattr(the_time, 'hour'), 0, 0)
#print 'now: ', the_time
combined_datetime = datetime.datetime.combine(the_date, the_time)
#print 'combined: ', combined_datetime
timestamp = calendar.timegm(combined_datetime.timetuple())
#print 'timestamp: ', timestamp
sentences = tokenize.sent_tokenize(content.decode('utf-8', 'ignore'))
#print len(sentences)
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
neg_line = 0
pos_line = 0
sid = SentimentIntensityAnalyzer()
for sentence in sentences:
ss = sid.polarity_scores(sentence)
for k in sorted(ss):
if k == 'compound':
if ss['neg'] > ss['pos']:
avg_score['compoundNeg'] += ss[k]
neg_line += 1
else:
avg_score['compoundPos'] += ss[k]
pos_line += 1
else:
avg_score['compoundPos'] += ss[k]
pos_line += 1
else:
avg_score[k] += ss[k]
avg_score[k] += ss[k]
#print('{0}: {1}, '.format(k, ss[k]))
for k in avg_score:
if k == 'compoundPos':
avg_score[k] = avg_score[k] / (pos_line if pos_line > 0 else 1)
elif k == 'compoundNeg':
avg_score[k] = avg_score[k] / (neg_line if neg_line > 0 else 1)
else:
avg_score[k] = avg_score[k] / len(sentences)
# In redis-levelDB: {} = set, () = K-V
# {Provider_set -> provider_i}
# {Provider_TimestampInHour_i -> UniqID_i}_j
# (UniqID_i -> PasteValue_i)
server.sadd('Provider_set', provider)
#print 'Provider_set', provider
provider_timestamp = provider + '_' + str(timestamp)
#print provider_timestamp
server.incr('UniqID')
UniqID = server.get('UniqID')
print provider_timestamp, '->', UniqID
server.sadd(provider_timestamp, UniqID)
server.set(UniqID, avg_score)
print avg_score
#print UniqID, '->', avg_score
else:
print 'Dropped:', p_MimeType
#print('{0}: {1}, '.format(k, ss[k]))
def isJSON(content):
try:
json.loads(content)
return True
for k in avg_score:
if k == 'compoundPos':
avg_score[k] = avg_score[k] / (pos_line if pos_line > 0 else 1)
elif k == 'compoundNeg':
avg_score[k] = avg_score[k] / (neg_line if neg_line > 0 else 1)
else:
avg_score[k] = avg_score[k] / len(sentences)
# In redis-levelDB: {} = set, () = K-V
# {Provider_set -> provider_i}
# {Provider_TimestampInHour_i -> UniqID_i}_j
# (UniqID_i -> PasteValue_i)
server.sadd('Provider_set', provider)
#print 'Provider_set', provider
provider_timestamp = provider + '_' + str(timestamp)
#print provider_timestamp
server.incr('UniqID')
UniqID = server.get('UniqID')
print provider_timestamp, '->', UniqID
server.sadd(provider_timestamp, UniqID)
server.set(UniqID, avg_score)
#print UniqID, '->', avg_score
#print '(', provider, timestamp, str(avg_score) , ')'
#server.hset(provider, timestamp, str(avg_score))
except Exception,e:
return False
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
@ -102,7 +121,7 @@ if __name__ == '__main__':
publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg
config_section = 'SentimentAnalyser'
config_section = 'SentimentAnalysis'
# Setup the I/O queues
p = Process(config_section)

View file

@ -66,8 +66,8 @@ subscribe = Redis_BrowseWarningPaste
#subscribe = Redis_Cve
#publish = Redis_BrowseWarningPaste
[SentimentAnalyser]
subscribe = Redis_LinesLong
[SentimentAnalysis]
subscribe = Redis_Global
[Release]
subscribe = Redis_Global

View file

@ -497,7 +497,7 @@ def sentiment_analysis_plot_tool_getdata():
timestamp1 = calendar.timegm(date1.timetuple())
timestamp2 = calendar.timegm(date2.timetuple())
print timestamp2
oneHour = 60*60
oneDay = oneHour*24

View file

@ -1,4 +1,26 @@
function generate_offset_to_time(num){
var to_ret = {};
for(i=0; i<=num; i++)
to_ret[i] = new Date().getHours()-(23-i)+'h';
return to_ret;
};
function generate_offset_to_date(day){
var now = new Date();
var to_ret = {};
for(i=0; i<day; i++){
for(j=0; j<24; j++){
var t1 =now.getDate()-i + ":";
var t2 =now.getHours()-(23-j)+"h";
to_ret[j+24*i] = t1+t2;
}
}
return to_ret;
};
var offset_to_time = generate_offset_to_time(23);
var offset_to_date = generate_offset_to_date(7);
var sparklineOptions = {
height: 80,//Height of the chart - Defaults to 'auto' (line height of the containing tag)
@ -13,6 +35,7 @@
negBarColor: '#f22929',
zeroColor: '#ffff00',
tooltipFormat: '<span style="color: {{color}}">&#9679;</span> {{offset:names}}, {{value}} </span>',
};
@ -37,7 +60,9 @@ $.getJSON("/sentiment_analysis_getplotdata/",
var spark_data = [];
var curr_provider = array_provider[graphNum];
var curr_sum = 0.0;
var curr_sum_elem = 0.0;
var day_sum = 0.0;
var day_sum_elem = 0.0;
var hour_sum = 0.0;
for(curr_date=dateStart; curr_date<dateStart+oneWeek; curr_date+=oneHour){
@ -71,10 +96,12 @@ $.getJSON("/sentiment_analysis_getplotdata/",
graph_data.push({'neg': neg, 'neu': neu, 'pos': pos, 'compoundPos': compPosAvg, 'compoundNeg': compNegAvg});
spark_data.push(pos-neg);
curr_sum += (pos-neg);
curr_sum_elem++;
max_value = Math.abs(pos-neg) > max_value ? Math.abs(pos-neg) : max_value;
if(curr_date >= dateStart+oneWeek-24*oneHour){
day_sum += (pos-neg);
day_sum_elem++;
}
if(curr_date >= dateStart+oneWeek-oneHour){
hour_sum += (pos-neg);
@ -85,7 +112,8 @@ $.getJSON("/sentiment_analysis_getplotdata/",
all_graph_day_sum += day_sum;
all_graph_hour_sum += hour_sum;
var curr_avg = curr_sum / (oneWeek/oneHour);
var curr_avg = curr_sum / (curr_sum_elem);
//var curr_avg = curr_sum / (oneWeek/oneHour);
//var curr_avg = curr_sum / (spark_data.length);
graph_avg.push([curr_provider, curr_avg]);
plot_data.push(spark_data);
@ -94,6 +122,8 @@ $.getJSON("/sentiment_analysis_getplotdata/",
sparklineOptions.chartRangeMax = max_value;
sparklineOptions.chartRangeMin = -max_value;
sparklineOptions.tooltipValueLookups = { names: offset_to_date};
// print week
var num = graphNum + 1;
var placeholder = '.sparkLineStatsWeek' + num;
@ -102,12 +132,15 @@ $.getJSON("/sentiment_analysis_getplotdata/",
$(placeholder+'s').text(curr_avg.toFixed(5));
sparklineOptions.barWidth = 18;
sparklineOptions.tooltipFormat = '<span style="color: {{color}}">&#9679;</span> Avg: {{value}} </span>'
$(placeholder+'b').sparkline([curr_avg], sparklineOptions);
sparklineOptions.tooltipFormat = '<span style="color: {{color}}">&#9679;</span> {{offset:names}}, {{value}} </span>'
sparklineOptions.barWidth = 2;
sparklineOptions.tooltipValueLookups = { names: offset_to_time};
// print today
var data_length = plot_data[graphNum].length;
var data_today = plot_data[graphNum].slice(data_length-24, data_length-1);
var data_today = plot_data[graphNum].slice(data_length-24, data_length);
placeholder = '.sparkLineStatsToday' + num;
sparklineOptions.barWidth = 14;
@ -115,9 +148,13 @@ $.getJSON("/sentiment_analysis_getplotdata/",
$(placeholder+'t').text(curr_provider);
sparklineOptions.barWidth = 18;
$(placeholder+'b').sparkline([day_sum/24], sparklineOptions);
sparklineOptions.tooltipFormat = '<span style="color: {{color}}">&#9679;</span> Avg: {{value}} </span>'
//var day_avg = day_sum/24;
var day_avg = day_sum/day_sum_elem;
$(placeholder+'b').sparkline([day_avg], sparklineOptions);
sparklineOptions.tooltipFormat = '<span style="color: {{color}}">&#9679;</span> {{offset:names}}, {{value}} </span>'
sparklineOptions.barWidth = 2;
$(placeholder+'s').text((day_sum/24).toFixed(5));
$(placeholder+'s').text((day_avg).toFixed(5));
}//for loop
@ -153,13 +190,15 @@ $.getJSON("/sentiment_analysis_getplotdata/",
gaugeOptions.appendTo = '#gauge_today_last_hour';
gaugeOptions.dialLabel = 'Last hour';
gaugeOptions.elementId = 'gauge1';
gaugeOptions.inc = all_graph_hour_sum / 8;
var piePercent = (all_graph_hour_sum / 8) / max_value;
gaugeOptions.inc = piePercent;
var gauge_today_last_hour = new FlexGauge(gaugeOptions);
gaugeOptions2.appendTo = '#gauge_today_last_days';
gaugeOptions2.dialLabel = 'Today';
gaugeOptions2.elementId = 'gauge2';
gaugeOptions2.inc = all_graph_day_sum / 8;
piePercent = (all_graph_day_sum / (8*24)) / max_value;
gaugeOptions2.inc = piePercent;
var gauge_today_last_days = new FlexGauge(gaugeOptions2);
gaugeOptions3.appendTo = '#gauge_week';
@ -167,10 +206,14 @@ $.getJSON("/sentiment_analysis_getplotdata/",
gaugeOptions3.elementId = 'gauge3';
var graph_avg_sum = 0.0;
for (i=0; i<graph_avg.length; i++)
var temp_max_val = 0.0;
for (i=0; i<graph_avg.length; i++){
graph_avg_sum += graph_avg[i][1];
temp_max_val = Math.abs(graph_avg[i][1]) > temp_max_val ? Math.abs(graph_avg[i][1]) : temp_max_val;
}
gaugeOptions3.inc = graph_avg_sum / graph_avg.length;
piePercent = (graph_avg_sum / graph_avg.length) / temp_max_val;
gaugeOptions3.inc = piePercent;
var gauge_today_last_days = new FlexGauge(gaugeOptions3);
@ -185,21 +228,24 @@ $.getJSON("/sentiment_analysis_getplotdata/",
/* ----------- CanvasJS ------------ */
var gauge_data = graph_data.slice(graph_data.length-24*2, graph_data.length-24*1);
var comp_sum_day_pos = 0.0;
var comp_sum_day_neg = 0.0;
var comp_sum_hour_pos = 0.0;
var comp_sum_hour_neg = 0.0;
for (i=1; i< gauge_data.length; i++){
comp_sum_day_pos += gauge_data[i].compoundPos;
comp_sum_day_neg += gauge_data[i].compoundNeg;
for(graphNum=0; graphNum<8; graphNum++){
curr_graphData = all_data[graphNum];
var gauge_data = curr_graphData.slice(curr_graphData.length-24, curr_graphData.length);
for (i=1; i< gauge_data.length; i++){
comp_sum_day_pos += gauge_data[i].compoundPos;
comp_sum_day_neg += gauge_data[i].compoundNeg;
if(i >= 24){
comp_sum_hour_pos += gauge_data[i].compoundPos;
comp_sum_hour_neg += gauge_data[i].compoundNeg;
if(i == 23){
comp_sum_hour_pos += gauge_data[i].compoundPos;
comp_sum_hour_neg += gauge_data[i].compoundNeg;
}
}
}
}
var options_canvasJS_1 = {
@ -216,20 +262,20 @@ $.getJSON("/sentiment_analysis_getplotdata/",
labelFontSize: 0.1,
},
data: [
{
type: "bar",
color: "green",
dataPoints: [
{y: comp_sum_hour_pos/8}
]
},
{
type: "bar",
color: "red",
dataPoints: [
{y: comp_sum_hour_neg/8}
]
}
{
type: "bar",
color: "green",
dataPoints: [
{y: comp_sum_hour_pos/8}
]
},
{
type: "bar",
color: "red",
dataPoints: [
{y: comp_sum_hour_neg/8}
]
}
]
};