mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
Merge pull request #102 from ptitdoc/bugfix1
Bug fixes in Workers and improvement of pystemon-feeder
This commit is contained in:
commit
b3a3616b0b
6 changed files with 74 additions and 9 deletions
|
@ -32,6 +32,20 @@ accepted_Mime_type = ['text/plain']
|
||||||
size_threshold = 250
|
size_threshold = 250
|
||||||
line_max_length_threshold = 1000
|
line_max_length_threshold = 1000
|
||||||
|
|
||||||
|
import os
|
||||||
|
import ConfigParser
|
||||||
|
|
||||||
|
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||||
|
if not os.path.exists(configfile):
|
||||||
|
raise Exception('Unable to find the configuration file. \
|
||||||
|
Did you set environment variables? \
|
||||||
|
Or activate the virtualenv.')
|
||||||
|
|
||||||
|
cfg = ConfigParser.ConfigParser()
|
||||||
|
cfg.read(configfile)
|
||||||
|
|
||||||
|
sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file")
|
||||||
|
|
||||||
def Analyse(message, server):
|
def Analyse(message, server):
|
||||||
path = message
|
path = message
|
||||||
paste = Paste.Paste(path)
|
paste = Paste.Paste(path)
|
||||||
|
@ -61,7 +75,7 @@ def Analyse(message, server):
|
||||||
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
|
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
|
||||||
neg_line = 0
|
neg_line = 0
|
||||||
pos_line = 0
|
pos_line = 0
|
||||||
sid = SentimentIntensityAnalyzer()
|
sid = SentimentIntensityAnalyzer(sentiment_lexicon_file)
|
||||||
for sentence in sentences:
|
for sentence in sentences:
|
||||||
ss = sid.polarity_scores(sentence)
|
ss = sid.polarity_scores(sentence)
|
||||||
for k in sorted(ss):
|
for k in sorted(ss):
|
||||||
|
|
|
@ -113,7 +113,7 @@ if __name__ == "__main__":
|
||||||
# IP allocation)
|
# IP allocation)
|
||||||
if cc is not None and cc != "EU":
|
if cc is not None and cc != "EU":
|
||||||
print hostl, asn, cc, \
|
print hostl, asn, cc, \
|
||||||
pycountry.countries.get(alpha2=cc).name
|
pycountry.countries.get(alpha_2=cc).name
|
||||||
if cc == cc_critical:
|
if cc == cc_critical:
|
||||||
to_print = 'Url;{};{};{};Detected {} {}'.format(
|
to_print = 'Url;{};{};{};Detected {} {}'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name,
|
PST.p_source, PST.p_date, PST.p_name,
|
||||||
|
|
|
@ -39,7 +39,7 @@ def get_date_range(num_day):
|
||||||
return date_list
|
return date_list
|
||||||
|
|
||||||
# Compute the progression for one keyword
|
# Compute the progression for one keyword
|
||||||
def compute_progression_word(keyword):
|
def compute_progression_word(server, num_day, keyword):
|
||||||
date_range = get_date_range(num_day)
|
date_range = get_date_range(num_day)
|
||||||
# check if this keyword is eligible for progression
|
# check if this keyword is eligible for progression
|
||||||
keyword_total_sum = 0
|
keyword_total_sum = 0
|
||||||
|
@ -73,12 +73,12 @@ def compute_progression(server, field_name, num_day, url_parsed):
|
||||||
if keyword is not None:
|
if keyword is not None:
|
||||||
|
|
||||||
#compute the progression of the current word
|
#compute the progression of the current word
|
||||||
keyword_increase, keyword_total_sum = compute_progression_word(keyword)
|
keyword_increase, keyword_total_sum = compute_progression_word(server, num_day, keyword)
|
||||||
|
|
||||||
#re-compute the progression of 2*max_set_cardinality
|
#re-compute the progression of 2*max_set_cardinality
|
||||||
current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality)
|
current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality)
|
||||||
for word, value in array_top_day:
|
for word, value in current_top:
|
||||||
word_inc, word_tot_sum = compute_progression_word(word)
|
word_inc, word_tot_sum = compute_progression_word(server, num_day, word)
|
||||||
server.zrem(redis_progression_name_set, word)
|
server.zrem(redis_progression_name_set, word)
|
||||||
if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase):
|
if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase):
|
||||||
server.zadd(redis_progression_name_set, float(word_inc), word)
|
server.zadd(redis_progression_name_set, float(word_inc), word)
|
||||||
|
|
|
@ -24,13 +24,28 @@ import sys
|
||||||
import time
|
import time
|
||||||
import redis
|
import redis
|
||||||
import base64
|
import base64
|
||||||
|
import os
|
||||||
|
import ConfigParser
|
||||||
|
|
||||||
port = "5556"
|
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||||
pystemonpath = "/home/pystemon/pystemon/"
|
if not os.path.exists(configfile):
|
||||||
|
raise Exception('Unable to find the configuration file. \
|
||||||
|
Did you set environment variables? \
|
||||||
|
Or activate the virtualenv.')
|
||||||
|
|
||||||
|
cfg = ConfigParser.ConfigParser()
|
||||||
|
cfg.read(configfile)
|
||||||
|
|
||||||
|
if cfg.has_option("ZMQ_Global", "bind"):
|
||||||
|
zmq_url = cfg.get("ZMQ_Global", "bind")
|
||||||
|
else:
|
||||||
|
zmq_url = "tcp://127.0.0.1:5556"
|
||||||
|
|
||||||
|
pystemonpath = cfg.get("Directories", "pystemonpath")
|
||||||
|
|
||||||
context = zmq.Context()
|
context = zmq.Context()
|
||||||
socket = context.socket(zmq.PUB)
|
socket = context.socket(zmq.PUB)
|
||||||
socket.bind("tcp://*:%s" % port)
|
socket.bind(zmq_url)
|
||||||
|
|
||||||
# check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16
|
# check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16
|
||||||
r = redis.StrictRedis(host='localhost', db=10)
|
r = redis.StrictRedis(host='localhost', db=10)
|
||||||
|
|
|
@ -14,6 +14,10 @@ tldsfile = faup/src/data/mozilla.tlds
|
||||||
|
|
||||||
domainstrending_csv = var/www/static/csv/domainstrendingdata
|
domainstrending_csv = var/www/static/csv/domainstrendingdata
|
||||||
|
|
||||||
|
pystemonpath = /home/pystemon/pystemon/
|
||||||
|
|
||||||
|
sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt
|
||||||
|
|
||||||
##### Flask #####
|
##### Flask #####
|
||||||
[Flask]
|
[Flask]
|
||||||
#Maximum number of character to display in the toolip
|
#Maximum number of character to display in the toolip
|
||||||
|
@ -128,6 +132,7 @@ path = indexdir
|
||||||
#address = tcp://crf.circl.lu:5556
|
#address = tcp://crf.circl.lu:5556
|
||||||
address = tcp://127.0.0.1:5556
|
address = tcp://127.0.0.1:5556
|
||||||
channel = 102
|
channel = 102
|
||||||
|
bind = tcp://127.0.0.1:5556
|
||||||
|
|
||||||
[ZMQ_Url]
|
[ZMQ_Url]
|
||||||
address = tcp://127.0.0.1:5004
|
address = tcp://127.0.0.1:5004
|
||||||
|
|
31
doc/all_modules.txt
Normal file
31
doc/all_modules.txt
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
Attributes
|
||||||
|
BrowseWarningPaste
|
||||||
|
Categ
|
||||||
|
Credential
|
||||||
|
CreditCards
|
||||||
|
Curve
|
||||||
|
CurveManageTopSets
|
||||||
|
Cve
|
||||||
|
DomClassifier
|
||||||
|
Duplicates
|
||||||
|
Global
|
||||||
|
Indexer
|
||||||
|
Keys
|
||||||
|
Lines
|
||||||
|
Mail
|
||||||
|
Mixer
|
||||||
|
ModuleInformation
|
||||||
|
Keys
|
||||||
|
Lines
|
||||||
|
Mail
|
||||||
|
Mixer
|
||||||
|
ModuleInformation
|
||||||
|
ModuleStats
|
||||||
|
Onion
|
||||||
|
Phone
|
||||||
|
Release
|
||||||
|
SentimentAnalysis
|
||||||
|
SQLInjectionDetection
|
||||||
|
Tokenize
|
||||||
|
Web
|
||||||
|
WebStats
|
Loading…
Reference in a new issue