Merge pull request #102 from ptitdoc/bugfix1

Bug fixes in Workers and improvement of pystemon-feeder
This commit is contained in:
mokaddem 2017-01-17 16:35:32 +01:00 committed by GitHub
commit b3a3616b0b
6 changed files with 74 additions and 9 deletions

View file

@ -32,6 +32,20 @@ accepted_Mime_type = ['text/plain']
size_threshold = 250 size_threshold = 250
line_max_length_threshold = 1000 line_max_length_threshold = 1000
import os
import ConfigParser
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg.read(configfile)
sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file")
def Analyse(message, server): def Analyse(message, server):
path = message path = message
paste = Paste.Paste(path) paste = Paste.Paste(path)
@ -61,7 +75,7 @@ def Analyse(message, server):
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
neg_line = 0 neg_line = 0
pos_line = 0 pos_line = 0
sid = SentimentIntensityAnalyzer() sid = SentimentIntensityAnalyzer(sentiment_lexicon_file)
for sentence in sentences: for sentence in sentences:
ss = sid.polarity_scores(sentence) ss = sid.polarity_scores(sentence)
for k in sorted(ss): for k in sorted(ss):

View file

@ -113,7 +113,7 @@ if __name__ == "__main__":
# IP allocation) # IP allocation)
if cc is not None and cc != "EU": if cc is not None and cc != "EU":
print hostl, asn, cc, \ print hostl, asn, cc, \
pycountry.countries.get(alpha2=cc).name pycountry.countries.get(alpha_2=cc).name
if cc == cc_critical: if cc == cc_critical:
to_print = 'Url;{};{};{};Detected {} {}'.format( to_print = 'Url;{};{};{};Detected {} {}'.format(
PST.p_source, PST.p_date, PST.p_name, PST.p_source, PST.p_date, PST.p_name,

View file

@ -39,7 +39,7 @@ def get_date_range(num_day):
return date_list return date_list
# Compute the progression for one keyword # Compute the progression for one keyword
def compute_progression_word(keyword): def compute_progression_word(server, num_day, keyword):
date_range = get_date_range(num_day) date_range = get_date_range(num_day)
# check if this keyword is eligible for progression # check if this keyword is eligible for progression
keyword_total_sum = 0 keyword_total_sum = 0
@ -73,12 +73,12 @@ def compute_progression(server, field_name, num_day, url_parsed):
if keyword is not None: if keyword is not None:
#compute the progression of the current word #compute the progression of the current word
keyword_increase, keyword_total_sum = compute_progression_word(keyword) keyword_increase, keyword_total_sum = compute_progression_word(server, num_day, keyword)
#re-compute the progression of 2*max_set_cardinality #re-compute the progression of 2*max_set_cardinality
current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality) current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality)
for word, value in array_top_day: for word, value in current_top:
word_inc, word_tot_sum = compute_progression_word(word) word_inc, word_tot_sum = compute_progression_word(server, num_day, word)
server.zrem(redis_progression_name_set, word) server.zrem(redis_progression_name_set, word)
if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase): if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase):
server.zadd(redis_progression_name_set, float(word_inc), word) server.zadd(redis_progression_name_set, float(word_inc), word)

View file

@ -24,13 +24,28 @@ import sys
import time import time
import redis import redis
import base64 import base64
import os
import ConfigParser
port = "5556" configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
pystemonpath = "/home/pystemon/pystemon/" if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg.read(configfile)
if cfg.has_option("ZMQ_Global", "bind"):
zmq_url = cfg.get("ZMQ_Global", "bind")
else:
zmq_url = "tcp://127.0.0.1:5556"
pystemonpath = cfg.get("Directories", "pystemonpath")
context = zmq.Context() context = zmq.Context()
socket = context.socket(zmq.PUB) socket = context.socket(zmq.PUB)
socket.bind("tcp://*:%s" % port) socket.bind(zmq_url)
# check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16 # check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16
r = redis.StrictRedis(host='localhost', db=10) r = redis.StrictRedis(host='localhost', db=10)

View file

@ -14,6 +14,10 @@ tldsfile = faup/src/data/mozilla.tlds
domainstrending_csv = var/www/static/csv/domainstrendingdata domainstrending_csv = var/www/static/csv/domainstrendingdata
pystemonpath = /home/pystemon/pystemon/
sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt
##### Flask ##### ##### Flask #####
[Flask] [Flask]
#Maximum number of character to display in the toolip #Maximum number of character to display in the toolip
@ -128,6 +132,7 @@ path = indexdir
#address = tcp://crf.circl.lu:5556 #address = tcp://crf.circl.lu:5556
address = tcp://127.0.0.1:5556 address = tcp://127.0.0.1:5556
channel = 102 channel = 102
bind = tcp://127.0.0.1:5556
[ZMQ_Url] [ZMQ_Url]
address = tcp://127.0.0.1:5004 address = tcp://127.0.0.1:5004

31
doc/all_modules.txt Normal file
View file

@ -0,0 +1,31 @@
Attributes
BrowseWarningPaste
Categ
Credential
CreditCards
Curve
CurveManageTopSets
Cve
DomClassifier
Duplicates
Global
Indexer
Keys
Lines
Mail
Mixer
ModuleInformation
Keys
Lines
Mail
Mixer
ModuleInformation
ModuleStats
Onion
Phone
Release
SentimentAnalysis
SQLInjectionDetection
Tokenize
Web
WebStats