mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 00:28:22 +00:00
chg: [pystemon importer] create new PystemonImporter module, refactor pystemon feeder
This commit is contained in:
parent
ccf935700a
commit
ab7b2bdbab
6 changed files with 100 additions and 86 deletions
|
@ -483,7 +483,7 @@ function launch_feeder {
|
|||
screen -dmS "Feeder_Pystemon"
|
||||
sleep 0.1
|
||||
echo -e $GREEN"\t* Launching Pystemon feeder"$DEFAULT
|
||||
screen -S "Feeder_Pystemon" -X screen -t "Pystemon_feeder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./feeder/pystemon-feeder.py; read x"
|
||||
screen -S "Feeder_Pystemon" -X screen -t "Pystemon_feeder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./importer/PystemonImporter.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Feeder_Pystemon" -X screen -t "Pystemon" bash -c "cd ${AIL_HOME}/../pystemon; ${ENV_PY} ./pystemon.py; read x"
|
||||
else
|
||||
|
|
|
@ -1,83 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# This file is part of AIL framework - Analysis Information Leak framework
|
||||
#
|
||||
# This a simple feeder script feeding data from pystemon to AIL.
|
||||
#
|
||||
# Don't forget to set your pystemonpath and ensure that the
|
||||
# configuration matches this script. Default is Redis DB 10.
|
||||
#
|
||||
# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import zmq
|
||||
import random
|
||||
import time
|
||||
import redis
|
||||
import base64
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
|
||||
if config_loader.has_option("ZMQ_Global", "bind"):
|
||||
zmq_url = config_loader.get_config_str("ZMQ_Global", "bind")
|
||||
else:
|
||||
zmq_url = "tcp://127.0.0.1:5556"
|
||||
|
||||
pystemonpath = config_loader.get_config_str("Directories", "pystemonpath")
|
||||
pastes_directory = config_loader.get_config_str("Directories", "pastes")
|
||||
pastes_directory = os.path.join(os.environ['AIL_HOME'], pastes_directory)
|
||||
base_sleeptime = 0.01
|
||||
sleep_inc = 0
|
||||
|
||||
config_loader = None
|
||||
|
||||
context = zmq.Context()
|
||||
socket = context.socket(zmq.PUB)
|
||||
socket.bind(zmq_url)
|
||||
|
||||
# check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16
|
||||
r = redis.StrictRedis(host='localhost', db=10, decode_responses=True)
|
||||
|
||||
# 101 pastes processed feed
|
||||
# 102 raw pastes feed
|
||||
topic = '102'
|
||||
|
||||
while True:
|
||||
time.sleep(base_sleeptime + sleep_inc)
|
||||
item_id = r.lpop("pastes")
|
||||
if item_id is None:
|
||||
continue
|
||||
try:
|
||||
print(item_id)
|
||||
full_item_path = os.path.join(pystemonpath, item_id)
|
||||
if not os.path.isfile(full_item_path):
|
||||
print('Error: {}, file not found'.format(full_item_path))
|
||||
sleep_inc = 1
|
||||
continue
|
||||
|
||||
with open(full_item_path, 'rb') as f: #.read()
|
||||
messagedata = f.read()
|
||||
path_to_send = os.path.join(pastes_directory, item_id)
|
||||
path_to_send = 'pystemon>>' + path_to_send
|
||||
|
||||
s = b' '.join( [ topic.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] )
|
||||
socket.send(s)
|
||||
sleep_inc = sleep_inc-0.01 if sleep_inc-0.01 > 0 else 0
|
||||
except IOError as e:
|
||||
# file not found, could be a buffering issue -> increase sleeping time
|
||||
print('IOError: Increasing sleep time')
|
||||
sleep_inc += 0.5
|
||||
continue
|
90
bin/importer/PystemonImporter.py
Executable file
90
bin/importer/PystemonImporter.py
Executable file
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# This file is part of AIL framework - Analysis Information Leak framework
|
||||
#
|
||||
# This a simple feeder script feeding data from pystemon to AIL.
|
||||
#
|
||||
# Don't forget to set your pystemonpath and ensure that the
|
||||
# configuration matches this script. Default is Redis DB 10.
|
||||
# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L52
|
||||
#
|
||||
|
||||
import base64
|
||||
import os
|
||||
import gzip
|
||||
import sys
|
||||
import redis
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from importer.abstract_importer import AbstractImporter
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
|
||||
class PystemonImporter(AbstractImporter):
|
||||
def __init__(self, pystemon_dir, host='localhost', port=6379, db=10):
|
||||
super().__init__()
|
||||
# Check Pystemon Redis Config:
|
||||
# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L54
|
||||
self.r_pystemon = redis.StrictRedis(host=host, port=port, db=db, decode_responses=True)
|
||||
self.dir_pystemon = pystemon_dir
|
||||
|
||||
# # TODO: add exception
|
||||
def encode_and_compress_data(self, content):
|
||||
return base64.b64encode(gzip.compress(content)).decode()
|
||||
|
||||
def importer(self):
|
||||
item_id = self.r_pystemon.lpop("pastes")
|
||||
print(item_id)
|
||||
if item_id:
|
||||
print(item_id)
|
||||
full_item_path = os.path.join(self.dir_pystemon, item_id) # TODO SANITIZE PATH
|
||||
# Check if pystemon file exists
|
||||
if not os.path.isfile(full_item_path):
|
||||
print(f'Error: {full_item_path}, file not found')
|
||||
return None
|
||||
# Get Item Content
|
||||
try:
|
||||
with open(full_item_path, 'rb') as f:
|
||||
content = f.read()
|
||||
if not content:
|
||||
return None
|
||||
|
||||
b64_gzipped_content = self.encode_and_compress_data(content)
|
||||
print(item_id, b64_gzipped_content)
|
||||
return f'{item_id} {b64_gzipped_content}'
|
||||
except IOError as e:
|
||||
print(f'Error: {full_item_path}, IOError')
|
||||
return None
|
||||
|
||||
|
||||
class PystemonModuleImporter(AbstractModule):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.pending_seconds = 10
|
||||
config_loader = ConfigLoader()
|
||||
# TODO MIGRATE OLD CONFIG
|
||||
# dir_pystemon = config_loader.get_config_str("Directories", "pystemonpath")
|
||||
# Check Pystemon Redis Config:
|
||||
# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L54
|
||||
dir_pystemon = config_loader.get_config_str("Pystemon", "dir")
|
||||
host = config_loader.get_config_str("Pystemon", "redis_host")
|
||||
port = config_loader.get_config_str("Pystemon", "redis_port")
|
||||
db = config_loader.get_config_str("Pystemon", "redis_db")
|
||||
self.importer = PystemonImporter(dir_pystemon, host=host, port=port, db=db)
|
||||
|
||||
def get_message(self):
|
||||
return self.importer.importer()
|
||||
|
||||
def compute(self, message):
|
||||
relay_message = f'pystemon {message}'
|
||||
self.add_message_to_queue(relay_message)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
module = PystemonModuleImporter()
|
||||
module.run()
|
|
@ -18,10 +18,14 @@ tldsfile = faup/src/data/mozilla.tlds
|
|||
|
||||
domainstrending_csv = var/www/static/csv/domainstrendingdata
|
||||
|
||||
pystemonpath = /home/pystemon/pystemon/
|
||||
|
||||
sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt
|
||||
|
||||
[Pystemon]
|
||||
dir = /home/pystemon/pystemon/
|
||||
redis_host = localhost
|
||||
redis_port = 6379
|
||||
redis_db = 10
|
||||
|
||||
##### Logs ######
|
||||
[Logs]
|
||||
# activate syslog
|
||||
|
|
|
@ -7,6 +7,9 @@ publish = Importers
|
|||
[Importer_Json]
|
||||
publish = Importers,Tags
|
||||
|
||||
[PystemonModuleImporter]
|
||||
publish = Importers
|
||||
|
||||
####################################################
|
||||
|
||||
[Mixer]
|
||||
|
|
Loading…
Reference in a new issue