diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py index d49dcb39..8cd71305 100755 --- a/bin/SentimentAnalysis.py +++ b/bin/SentimentAnalysis.py @@ -32,6 +32,20 @@ accepted_Mime_type = ['text/plain'] size_threshold = 250 line_max_length_threshold = 1000 +import os +import ConfigParser + +configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') +if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + +cfg = ConfigParser.ConfigParser() +cfg.read(configfile) + +sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file") + def Analyse(message, server): path = message paste = Paste.Paste(path) @@ -61,7 +75,7 @@ def Analyse(message, server): avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} neg_line = 0 pos_line = 0 - sid = SentimentIntensityAnalyzer() + sid = SentimentIntensityAnalyzer(sentiment_lexicon_file) for sentence in sentences: ss = sid.polarity_scores(sentence) for k in sorted(ss): diff --git a/bin/Web.py b/bin/Web.py index 49790185..0fae546d 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -113,7 +113,7 @@ if __name__ == "__main__": # IP allocation) if cc is not None and cc != "EU": print hostl, asn, cc, \ - pycountry.countries.get(alpha2=cc).name + pycountry.countries.get(alpha_2=cc).name if cc == cc_critical: to_print = 'Url;{};{};{};Detected {} {}'.format( PST.p_source, PST.p_date, PST.p_name, diff --git a/bin/WebStats.py b/bin/WebStats.py index 1c41b64d..4cc05b48 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -39,7 +39,7 @@ def get_date_range(num_day): return date_list # Compute the progression for one keyword -def compute_progression_word(keyword): +def compute_progression_word(server, num_day, keyword): date_range = get_date_range(num_day) # check if this keyword is eligible for progression keyword_total_sum = 0 @@ -73,12 +73,12 @@ def compute_progression(server, field_name, num_day, url_parsed): if keyword is not None: #compute the progression of the current word - keyword_increase, keyword_total_sum = compute_progression_word(keyword) + keyword_increase, keyword_total_sum = compute_progression_word(server, num_day, keyword) #re-compute the progression of 2*max_set_cardinality current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality) - for word, value in array_top_day: - word_inc, word_tot_sum = compute_progression_word(word) + for word, value in current_top: + word_inc, word_tot_sum = compute_progression_word(server, num_day, word) server.zrem(redis_progression_name_set, word) if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase): server.zadd(redis_progression_name_set, float(word_inc), word) diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py index 1a9088b3..770655bc 100755 --- a/bin/feeder/pystemon-feeder.py +++ b/bin/feeder/pystemon-feeder.py @@ -24,13 +24,28 @@ import sys import time import redis import base64 +import os +import ConfigParser -port = "5556" -pystemonpath = "/home/pystemon/pystemon/" +configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') +if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + +cfg = ConfigParser.ConfigParser() +cfg.read(configfile) + +if cfg.has_option("ZMQ_Global", "bind"): + zmq_url = cfg.get("ZMQ_Global", "bind") +else: + zmq_url = "tcp://127.0.0.1:5556" + +pystemonpath = cfg.get("Directories", "pystemonpath") context = zmq.Context() socket = context.socket(zmq.PUB) -socket.bind("tcp://*:%s" % port) +socket.bind(zmq_url) # check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16 r = redis.StrictRedis(host='localhost', db=10) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 4f2899a0..f2e8285d 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -14,6 +14,10 @@ tldsfile = faup/src/data/mozilla.tlds domainstrending_csv = var/www/static/csv/domainstrendingdata +pystemonpath = /home/pystemon/pystemon/ + +sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt + ##### Flask ##### [Flask] #Maximum number of character to display in the toolip @@ -128,6 +132,7 @@ path = indexdir #address = tcp://crf.circl.lu:5556 address = tcp://127.0.0.1:5556 channel = 102 +bind = tcp://127.0.0.1:5556 [ZMQ_Url] address = tcp://127.0.0.1:5004 diff --git a/doc/all_modules.txt b/doc/all_modules.txt new file mode 100644 index 00000000..fabdf4e9 --- /dev/null +++ b/doc/all_modules.txt @@ -0,0 +1,31 @@ +Attributes +BrowseWarningPaste +Categ +Credential +CreditCards +Curve +CurveManageTopSets +Cve +DomClassifier +Duplicates +Global +Indexer +Keys +Lines +Mail +Mixer +ModuleInformation +Keys +Lines +Mail +Mixer +ModuleInformation +ModuleStats +Onion +Phone +Release +SentimentAnalysis +SQLInjectionDetection +Tokenize +Web +WebStats