mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-30 09:47:17 +00:00
Merge pull request #102 from ptitdoc/bugfix1
Bug fixes in Workers and improvement of pystemon-feeder
This commit is contained in:
commit
b3a3616b0b
6 changed files with 74 additions and 9 deletions
|
@ -32,6 +32,20 @@ accepted_Mime_type = ['text/plain']
|
|||
size_threshold = 250
|
||||
line_max_length_threshold = 1000
|
||||
|
||||
import os
|
||||
import ConfigParser
|
||||
|
||||
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||
if not os.path.exists(configfile):
|
||||
raise Exception('Unable to find the configuration file. \
|
||||
Did you set environment variables? \
|
||||
Or activate the virtualenv.')
|
||||
|
||||
cfg = ConfigParser.ConfigParser()
|
||||
cfg.read(configfile)
|
||||
|
||||
sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file")
|
||||
|
||||
def Analyse(message, server):
|
||||
path = message
|
||||
paste = Paste.Paste(path)
|
||||
|
@ -61,7 +75,7 @@ def Analyse(message, server):
|
|||
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
|
||||
neg_line = 0
|
||||
pos_line = 0
|
||||
sid = SentimentIntensityAnalyzer()
|
||||
sid = SentimentIntensityAnalyzer(sentiment_lexicon_file)
|
||||
for sentence in sentences:
|
||||
ss = sid.polarity_scores(sentence)
|
||||
for k in sorted(ss):
|
||||
|
|
|
@ -113,7 +113,7 @@ if __name__ == "__main__":
|
|||
# IP allocation)
|
||||
if cc is not None and cc != "EU":
|
||||
print hostl, asn, cc, \
|
||||
pycountry.countries.get(alpha2=cc).name
|
||||
pycountry.countries.get(alpha_2=cc).name
|
||||
if cc == cc_critical:
|
||||
to_print = 'Url;{};{};{};Detected {} {}'.format(
|
||||
PST.p_source, PST.p_date, PST.p_name,
|
||||
|
|
|
@ -39,7 +39,7 @@ def get_date_range(num_day):
|
|||
return date_list
|
||||
|
||||
# Compute the progression for one keyword
|
||||
def compute_progression_word(keyword):
|
||||
def compute_progression_word(server, num_day, keyword):
|
||||
date_range = get_date_range(num_day)
|
||||
# check if this keyword is eligible for progression
|
||||
keyword_total_sum = 0
|
||||
|
@ -73,12 +73,12 @@ def compute_progression(server, field_name, num_day, url_parsed):
|
|||
if keyword is not None:
|
||||
|
||||
#compute the progression of the current word
|
||||
keyword_increase, keyword_total_sum = compute_progression_word(keyword)
|
||||
keyword_increase, keyword_total_sum = compute_progression_word(server, num_day, keyword)
|
||||
|
||||
#re-compute the progression of 2*max_set_cardinality
|
||||
current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality)
|
||||
for word, value in array_top_day:
|
||||
word_inc, word_tot_sum = compute_progression_word(word)
|
||||
for word, value in current_top:
|
||||
word_inc, word_tot_sum = compute_progression_word(server, num_day, word)
|
||||
server.zrem(redis_progression_name_set, word)
|
||||
if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase):
|
||||
server.zadd(redis_progression_name_set, float(word_inc), word)
|
||||
|
|
|
@ -24,13 +24,28 @@ import sys
|
|||
import time
|
||||
import redis
|
||||
import base64
|
||||
import os
|
||||
import ConfigParser
|
||||
|
||||
port = "5556"
|
||||
pystemonpath = "/home/pystemon/pystemon/"
|
||||
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||
if not os.path.exists(configfile):
|
||||
raise Exception('Unable to find the configuration file. \
|
||||
Did you set environment variables? \
|
||||
Or activate the virtualenv.')
|
||||
|
||||
cfg = ConfigParser.ConfigParser()
|
||||
cfg.read(configfile)
|
||||
|
||||
if cfg.has_option("ZMQ_Global", "bind"):
|
||||
zmq_url = cfg.get("ZMQ_Global", "bind")
|
||||
else:
|
||||
zmq_url = "tcp://127.0.0.1:5556"
|
||||
|
||||
pystemonpath = cfg.get("Directories", "pystemonpath")
|
||||
|
||||
context = zmq.Context()
|
||||
socket = context.socket(zmq.PUB)
|
||||
socket.bind("tcp://*:%s" % port)
|
||||
socket.bind(zmq_url)
|
||||
|
||||
# check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16
|
||||
r = redis.StrictRedis(host='localhost', db=10)
|
||||
|
|
|
@ -14,6 +14,10 @@ tldsfile = faup/src/data/mozilla.tlds
|
|||
|
||||
domainstrending_csv = var/www/static/csv/domainstrendingdata
|
||||
|
||||
pystemonpath = /home/pystemon/pystemon/
|
||||
|
||||
sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt
|
||||
|
||||
##### Flask #####
|
||||
[Flask]
|
||||
#Maximum number of character to display in the toolip
|
||||
|
@ -128,6 +132,7 @@ path = indexdir
|
|||
#address = tcp://crf.circl.lu:5556
|
||||
address = tcp://127.0.0.1:5556
|
||||
channel = 102
|
||||
bind = tcp://127.0.0.1:5556
|
||||
|
||||
[ZMQ_Url]
|
||||
address = tcp://127.0.0.1:5004
|
||||
|
|
31
doc/all_modules.txt
Normal file
31
doc/all_modules.txt
Normal file
|
@ -0,0 +1,31 @@
|
|||
Attributes
|
||||
BrowseWarningPaste
|
||||
Categ
|
||||
Credential
|
||||
CreditCards
|
||||
Curve
|
||||
CurveManageTopSets
|
||||
Cve
|
||||
DomClassifier
|
||||
Duplicates
|
||||
Global
|
||||
Indexer
|
||||
Keys
|
||||
Lines
|
||||
Mail
|
||||
Mixer
|
||||
ModuleInformation
|
||||
Keys
|
||||
Lines
|
||||
Mail
|
||||
Mixer
|
||||
ModuleInformation
|
||||
ModuleStats
|
||||
Onion
|
||||
Phone
|
||||
Release
|
||||
SentimentAnalysis
|
||||
SQLInjectionDetection
|
||||
Tokenize
|
||||
Web
|
||||
WebStats
|
Loading…
Reference in a new issue