From 68ca20db8f44751c367c2e30513ec9d6342ec83a Mon Sep 17 00:00:00 2001 From: Olivier MEDOC Date: Tue, 10 Jan 2017 16:45:32 +0100 Subject: [PATCH 1/7] fix to use the new pycountry API --- bin/Web.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/Web.py b/bin/Web.py index 49790185..0fae546d 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -113,7 +113,7 @@ if __name__ == "__main__": # IP allocation) if cc is not None and cc != "EU": print hostl, asn, cc, \ - pycountry.countries.get(alpha2=cc).name + pycountry.countries.get(alpha_2=cc).name if cc == cc_critical: to_print = 'Url;{};{};{};Detected {} {}'.format( PST.p_source, PST.p_date, PST.p_name, From 3dc014dad949b79633e3d6513ee6fea1eb63350d Mon Sep 17 00:00:00 2001 From: Olivier MEDOC Date: Tue, 10 Jan 2017 16:46:46 +0100 Subject: [PATCH 2/7] fix invalid variable propagation --- bin/WebStats.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/WebStats.py b/bin/WebStats.py index 1c41b64d..4cc05b48 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -39,7 +39,7 @@ def get_date_range(num_day): return date_list # Compute the progression for one keyword -def compute_progression_word(keyword): +def compute_progression_word(server, num_day, keyword): date_range = get_date_range(num_day) # check if this keyword is eligible for progression keyword_total_sum = 0 @@ -73,12 +73,12 @@ def compute_progression(server, field_name, num_day, url_parsed): if keyword is not None: #compute the progression of the current word - keyword_increase, keyword_total_sum = compute_progression_word(keyword) + keyword_increase, keyword_total_sum = compute_progression_word(server, num_day, keyword) #re-compute the progression of 2*max_set_cardinality current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality) - for word, value in array_top_day: - word_inc, word_tot_sum = compute_progression_word(word) + for word, value in current_top: + word_inc, word_tot_sum = compute_progression_word(server, num_day, word) server.zrem(redis_progression_name_set, word) if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase): server.zadd(redis_progression_name_set, float(word_inc), word) From 8102ff009dfbc1c6a3133107f0b381ece7dfc064 Mon Sep 17 00:00:00 2001 From: Olivier MEDOC Date: Tue, 10 Jan 2017 16:48:05 +0100 Subject: [PATCH 3/7] add all_modules.txt file so that the WebGUI can do proper cleanup --- doc/all_modules.txt | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 doc/all_modules.txt diff --git a/doc/all_modules.txt b/doc/all_modules.txt new file mode 100644 index 00000000..fabdf4e9 --- /dev/null +++ b/doc/all_modules.txt @@ -0,0 +1,31 @@ +Attributes +BrowseWarningPaste +Categ +Credential +CreditCards +Curve +CurveManageTopSets +Cve +DomClassifier +Duplicates +Global +Indexer +Keys +Lines +Mail +Mixer +ModuleInformation +Keys +Lines +Mail +Mixer +ModuleInformation +ModuleStats +Onion +Phone +Release +SentimentAnalysis +SQLInjectionDetection +Tokenize +Web +WebStats From 83db40104c05e4f3466cfd48a3da3ede627f47ec Mon Sep 17 00:00:00 2001 From: Olivier MEDOC Date: Tue, 10 Jan 2017 18:18:55 +0100 Subject: [PATCH 4/7] import pystemon-feeder configuration from the central configuration file --- bin/feeder/pystemon-feeder.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py index 1a9088b3..d36ed66f 100755 --- a/bin/feeder/pystemon-feeder.py +++ b/bin/feeder/pystemon-feeder.py @@ -24,13 +24,24 @@ import sys import time import redis import base64 +import os +import ConfigParser -port = "5556" -pystemonpath = "/home/pystemon/pystemon/" +configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') +if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + +cfg = ConfigParser.ConfigParser() +cfg.read(configfile) + +zmq_url = cfg.get("ZMQ_Global", "address") +pystemonpath = cfg.get("Directories", "pystemonpath") context = zmq.Context() socket = context.socket(zmq.PUB) -socket.bind("tcp://*:%s" % port) +socket.bind(zmq_url) # check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16 r = redis.StrictRedis(host='localhost', db=10) From c0fac820319a9d5e337251794feefff487468f1f Mon Sep 17 00:00:00 2001 From: ptitdoc Date: Tue, 10 Jan 2017 18:33:46 +0100 Subject: [PATCH 5/7] Add pystemon path in configuration sample --- bin/packages/config.cfg.sample | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 4f2899a0..5ce73e3f 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -14,6 +14,8 @@ tldsfile = faup/src/data/mozilla.tlds domainstrending_csv = var/www/static/csv/domainstrendingdata +pystemonpath = /home/pystemon/pystemon/ + ##### Flask ##### [Flask] #Maximum number of character to display in the toolip From 9f9c265cb01f0d3ff202a1f9554169de4c53e1ae Mon Sep 17 00:00:00 2001 From: Olivier MEDOC Date: Wed, 11 Jan 2017 11:00:36 +0100 Subject: [PATCH 6/7] SentimentAnalysis: use lexicon file path from the ail configuration file --- bin/SentimentAnalysis.py | 16 +++++++++++++++- bin/packages/config.cfg.sample | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py index 09f59e40..e16890e5 100755 --- a/bin/SentimentAnalysis.py +++ b/bin/SentimentAnalysis.py @@ -32,6 +32,20 @@ accepted_Mime_type = ['text/plain'] size_threshold = 250 line_max_length_threshold = 1000 +import os +import ConfigParser + +configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') +if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + +cfg = ConfigParser.ConfigParser() +cfg.read(configfile) + +sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file") + def Analyse(message, server): path = message paste = Paste.Paste(path) @@ -61,7 +75,7 @@ def Analyse(message, server): avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} neg_line = 0 pos_line = 0 - sid = SentimentIntensityAnalyzer() + sid = SentimentIntensityAnalyzer(sentiment_lexicon_file) for sentence in sentences: ss = sid.polarity_scores(sentence) for k in sorted(ss): diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 5ce73e3f..79bd402b 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -16,6 +16,8 @@ domainstrending_csv = var/www/static/csv/domainstrendingdata pystemonpath = /home/pystemon/pystemon/ +sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt + ##### Flask ##### [Flask] #Maximum number of character to display in the toolip From 16044d4d369ae8056e206c4306c6b2c05ac9baab Mon Sep 17 00:00:00 2001 From: Olivier MEDOC Date: Fri, 13 Jan 2017 14:54:43 +0100 Subject: [PATCH 7/7] pystemon-feeder: add the option bind in configuration file instead of address This is essentially because multiple feeds can be used in the address configuration variable. --- bin/feeder/pystemon-feeder.py | 6 +++++- bin/packages/config.cfg.sample | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py index d36ed66f..770655bc 100755 --- a/bin/feeder/pystemon-feeder.py +++ b/bin/feeder/pystemon-feeder.py @@ -36,7 +36,11 @@ if not os.path.exists(configfile): cfg = ConfigParser.ConfigParser() cfg.read(configfile) -zmq_url = cfg.get("ZMQ_Global", "address") +if cfg.has_option("ZMQ_Global", "bind"): + zmq_url = cfg.get("ZMQ_Global", "bind") +else: + zmq_url = "tcp://127.0.0.1:5556" + pystemonpath = cfg.get("Directories", "pystemonpath") context = zmq.Context() diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 79bd402b..f2e8285d 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -132,6 +132,7 @@ path = indexdir #address = tcp://crf.circl.lu:5556 address = tcp://127.0.0.1:5556 channel = 102 +bind = tcp://127.0.0.1:5556 [ZMQ_Url] address = tcp://127.0.0.1:5004