mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
Merge remote-tracking branch 'upstream/master' into production
This commit is contained in:
commit
c422db6e6d
8 changed files with 129 additions and 13 deletions
35
README.md
35
README.md
|
@ -101,6 +101,37 @@ Eventually you can browse the status of the AIL framework website at the followi
|
|||
|
||||
``http://localhost:7000/``
|
||||
|
||||
How to
|
||||
======
|
||||
|
||||
How to feed the AIL framework
|
||||
-----------------------------
|
||||
|
||||
For the moment, there are two different ways to feed AIL with data:
|
||||
|
||||
1. Be a collaborator of CIRCL and ask to access our feed. It will be sent to the static IP your are using for AIL.
|
||||
|
||||
2. You can setup [pystemon](https://github.com/CIRCL/pystemon) and use the custom feeder provided by AIL (see below).
|
||||
|
||||
###Feeding AIL with pystemon
|
||||
AIL is an analysis tool, not a collector!
|
||||
However, if you want to collect some pastes and feed them to AIL, the procedure is described below.
|
||||
|
||||
Nevertheless, moderate your queries!
|
||||
|
||||
Here are the steps to setup pystemon and feed data to AIL:
|
||||
|
||||
1. Clone the [pystemon's git repository](https://github.com/CIRCL/pystemon)
|
||||
|
||||
2. Install its python dependencies inside your virtual environment
|
||||
|
||||
3. Launch pystemon ``` ./pystemon ```
|
||||
|
||||
4. Edit the file ```bin/feeder/pystemon-feeder.py``` and modify the pystemonpath path accordingly
|
||||
|
||||
5. Launch pystemon-feeder ``` ./pystemon-feeder.py ```
|
||||
|
||||
|
||||
How to create a new module
|
||||
--------------------------
|
||||
|
||||
|
@ -117,6 +148,10 @@ Feel free to fork the code, play with it, make some patches or add additional an
|
|||
|
||||
To contribute your module, feel free to pull your contribution.
|
||||
|
||||
Overview and License
|
||||
====================
|
||||
|
||||
|
||||
Redis and LevelDB overview
|
||||
--------------------------
|
||||
|
||||
|
|
|
@ -32,6 +32,20 @@ accepted_Mime_type = ['text/plain']
|
|||
size_threshold = 250
|
||||
line_max_length_threshold = 1000
|
||||
|
||||
import os
|
||||
import ConfigParser
|
||||
|
||||
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||
if not os.path.exists(configfile):
|
||||
raise Exception('Unable to find the configuration file. \
|
||||
Did you set environment variables? \
|
||||
Or activate the virtualenv.')
|
||||
|
||||
cfg = ConfigParser.ConfigParser()
|
||||
cfg.read(configfile)
|
||||
|
||||
sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file")
|
||||
|
||||
def Analyse(message, server):
|
||||
path = message
|
||||
paste = Paste.Paste(path)
|
||||
|
@ -61,7 +75,7 @@ def Analyse(message, server):
|
|||
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
|
||||
neg_line = 0
|
||||
pos_line = 0
|
||||
sid = SentimentIntensityAnalyzer()
|
||||
sid = SentimentIntensityAnalyzer(sentiment_lexicon_file)
|
||||
for sentence in sentences:
|
||||
ss = sid.polarity_scores(sentence)
|
||||
for k in sorted(ss):
|
||||
|
|
|
@ -28,6 +28,15 @@ from packages import Paste
|
|||
from pubsublogger import publisher
|
||||
|
||||
from Helper import Process
|
||||
import signal
|
||||
|
||||
class TimeoutException(Exception):
|
||||
pass
|
||||
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutException
|
||||
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
|
||||
if __name__ == "__main__":
|
||||
publisher.port = 6380
|
||||
|
@ -44,10 +53,17 @@ if __name__ == "__main__":
|
|||
print message
|
||||
if message is not None:
|
||||
paste = Paste.Paste(message)
|
||||
for word, score in paste._get_top_words().items():
|
||||
if len(word) >= 4:
|
||||
msg = '{} {} {}'.format(paste.p_path, word, score)
|
||||
p.populate_set_out(msg)
|
||||
signal.alarm(5)
|
||||
try:
|
||||
for word, score in paste._get_top_words().items():
|
||||
if len(word) >= 4:
|
||||
msg = '{} {} {}'.format(paste.p_path, word, score)
|
||||
p.populate_set_out(msg)
|
||||
except TimeoutException:
|
||||
print ("{0} processing timeout".format(paste.p_path))
|
||||
continue
|
||||
else:
|
||||
signal.alarm(0)
|
||||
else:
|
||||
publisher.debug("Tokeniser is idling 10s")
|
||||
time.sleep(10)
|
||||
|
|
|
@ -113,7 +113,7 @@ if __name__ == "__main__":
|
|||
# IP allocation)
|
||||
if cc is not None and cc != "EU":
|
||||
print hostl, asn, cc, \
|
||||
pycountry.countries.get(alpha2=cc).name
|
||||
pycountry.countries.get(alpha_2=cc).name
|
||||
if cc == cc_critical:
|
||||
to_print = 'Url;{};{};{};Detected {} {}'.format(
|
||||
PST.p_source, PST.p_date, PST.p_name,
|
||||
|
|
|
@ -39,7 +39,7 @@ def get_date_range(num_day):
|
|||
return date_list
|
||||
|
||||
# Compute the progression for one keyword
|
||||
def compute_progression_word(keyword):
|
||||
def compute_progression_word(server, num_day, keyword):
|
||||
date_range = get_date_range(num_day)
|
||||
# check if this keyword is eligible for progression
|
||||
keyword_total_sum = 0
|
||||
|
@ -73,12 +73,12 @@ def compute_progression(server, field_name, num_day, url_parsed):
|
|||
if keyword is not None:
|
||||
|
||||
#compute the progression of the current word
|
||||
keyword_increase, keyword_total_sum = compute_progression_word(keyword)
|
||||
keyword_increase, keyword_total_sum = compute_progression_word(server, num_day, keyword)
|
||||
|
||||
#re-compute the progression of 2*max_set_cardinality
|
||||
current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality)
|
||||
for word, value in array_top_day:
|
||||
word_inc, word_tot_sum = compute_progression_word(word)
|
||||
for word, value in current_top:
|
||||
word_inc, word_tot_sum = compute_progression_word(server, num_day, word)
|
||||
server.zrem(redis_progression_name_set, word)
|
||||
if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase):
|
||||
server.zadd(redis_progression_name_set, float(word_inc), word)
|
||||
|
|
21
bin/feeder/pystemon-feeder.py
Normal file → Executable file
21
bin/feeder/pystemon-feeder.py
Normal file → Executable file
|
@ -24,13 +24,28 @@ import sys
|
|||
import time
|
||||
import redis
|
||||
import base64
|
||||
import os
|
||||
import ConfigParser
|
||||
|
||||
port = "5556"
|
||||
pystemonpath = "/home/pystemon/pystemon/"
|
||||
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||
if not os.path.exists(configfile):
|
||||
raise Exception('Unable to find the configuration file. \
|
||||
Did you set environment variables? \
|
||||
Or activate the virtualenv.')
|
||||
|
||||
cfg = ConfigParser.ConfigParser()
|
||||
cfg.read(configfile)
|
||||
|
||||
if cfg.has_option("ZMQ_Global", "bind"):
|
||||
zmq_url = cfg.get("ZMQ_Global", "bind")
|
||||
else:
|
||||
zmq_url = "tcp://127.0.0.1:5556"
|
||||
|
||||
pystemonpath = cfg.get("Directories", "pystemonpath")
|
||||
|
||||
context = zmq.Context()
|
||||
socket = context.socket(zmq.PUB)
|
||||
socket.bind("tcp://*:%s" % port)
|
||||
socket.bind(zmq_url)
|
||||
|
||||
# check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16
|
||||
r = redis.StrictRedis(host='localhost', db=10)
|
||||
|
|
|
@ -14,6 +14,10 @@ tldsfile = faup/src/data/mozilla.tlds
|
|||
|
||||
domainstrending_csv = var/www/static/csv/domainstrendingdata
|
||||
|
||||
pystemonpath = /home/pystemon/pystemon/
|
||||
|
||||
sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt
|
||||
|
||||
##### Flask #####
|
||||
[Flask]
|
||||
#Maximum number of character to display in the toolip
|
||||
|
@ -128,6 +132,7 @@ path = indexdir
|
|||
#address = tcp://crf.circl.lu:5556
|
||||
address = tcp://127.0.0.1:5556
|
||||
channel = 102
|
||||
bind = tcp://127.0.0.1:5556
|
||||
|
||||
[ZMQ_Url]
|
||||
address = tcp://127.0.0.1:5004
|
||||
|
|
31
doc/all_modules.txt
Normal file
31
doc/all_modules.txt
Normal file
|
@ -0,0 +1,31 @@
|
|||
Attributes
|
||||
BrowseWarningPaste
|
||||
Categ
|
||||
Credential
|
||||
CreditCards
|
||||
Curve
|
||||
CurveManageTopSets
|
||||
Cve
|
||||
DomClassifier
|
||||
Duplicates
|
||||
Global
|
||||
Indexer
|
||||
Keys
|
||||
Lines
|
||||
Mail
|
||||
Mixer
|
||||
ModuleInformation
|
||||
Keys
|
||||
Lines
|
||||
Mail
|
||||
Mixer
|
||||
ModuleInformation
|
||||
ModuleStats
|
||||
Onion
|
||||
Phone
|
||||
Release
|
||||
SentimentAnalysis
|
||||
SQLInjectionDetection
|
||||
Tokenize
|
||||
Web
|
||||
WebStats
|
Loading…
Reference in a new issue