chg: [core import] add AIL JSON import format, API endpoint added (AIL feeders)

This commit is contained in:
Terrtia 2020-04-28 14:14:45 +02:00
parent 7d59dd883e
commit 8d727cbd31
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
10 changed files with 339 additions and 8 deletions

View file

@ -119,6 +119,12 @@ if __name__ == '__main__':
# decode compressed base64 # decode compressed base64
decoded = base64.standard_b64decode(gzip64encoded) decoded = base64.standard_b64decode(gzip64encoded)
try:
new_file_content = gunzip_bytes_obj(decoded)
except OSError as e:
print('{}, {}'.format(filename, e))
publisher.warning('Global; Invalid Gzip file: {}, {}'.format(filename, e))
continue
# check if file exist # check if file exist
if os.path.isfile(filename): if os.path.isfile(filename):
@ -143,7 +149,6 @@ if __name__ == '__main__':
curr_file_md5 = hashlib.md5(curr_file_content).hexdigest() curr_file_md5 = hashlib.md5(curr_file_content).hexdigest()
new_file_content = gunzip_bytes_obj(decoded)
new_file_md5 = hashlib.md5(new_file_content).hexdigest() new_file_md5 = hashlib.md5(new_file_content).hexdigest()
if new_file_md5 != curr_file_md5: if new_file_md5 != curr_file_md5:

View file

@ -148,6 +148,11 @@ function launching_scripts {
sleep 0.1 sleep 0.1
echo -e $GREEN"\t* Launching scripts"$DEFAULT echo -e $GREEN"\t* Launching scripts"$DEFAULT
# LAUNCH CORE MODULE
screen -S "Script_AIL" -X screen -t "JSON_importer" bash -c "cd ${AIL_BIN}/import; ${ENV_PY} ./JSON_importer.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "ModuleInformation" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModulesInformationV2.py -k 0 -c 1; read x" screen -S "Script_AIL" -X screen -t "ModuleInformation" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModulesInformationV2.py -k 0 -c 1; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Mixer" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Mixer.py; read x" screen -S "Script_AIL" -X screen -t "Mixer" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Mixer.py; read x"

60
bin/import/JSON_importer.py Executable file
View file

@ -0,0 +1,60 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The JSON Receiver Module
================
Recieve Json Items (example: Twitter feeder)
"""
import os
import json
import redis
import sys
import time
sys.path.append(os.environ['AIL_BIN'])
from Helper import Process
from pubsublogger import publisher
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import importer
if __name__ == '__main__':
publisher.port = 6380
publisher.channel = 'Script'
config_section = 'Importer_Json'
process = Process(config_section)
config_loader = ConfigLoader.ConfigLoader()
# REDIS #
server_cache = config_loader.get_redis_conn("Redis_Log_submit")
config_loader = None
# LOGGING #
publisher.info("JSON Feed Script started to receive & publish.")
# OTHER CONFIG #
DEFAULT_FEEDER_NAME = 'Unknow Feeder'
while True:
json_item = importer.get_json_item_to_import()
if json_item:
json_item = json.loads(json_item)
feeder_name = importer.get_json_source(json_item)
print('importing: {} feeder'.format(feeder_name))
json_import_class = importer.get_json_receiver_class(feeder_name)
importer_obj = json_import_class(feeder_name, json_item)
importer.process_json(importer_obj, process)
else:
time.sleep(5)

View file

@ -0,0 +1,42 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The JSON Receiver Module
================
Recieve Json Items (example: Twitter feeder)
"""
import os
import json
import sys
import datetime
# sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
# import item_basic
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import', 'ail_json_importer'))
from Default_json import Default_json
class Ail_feeder_twitter(Default_json):
"""Twitter Feeder functions"""
def __init__(self, name, json_item):
super().__init__(name, json_item)
def get_feeder_name(self):
return 'twitter'
# define item id
def get_item_id(self):
# use twitter timestamp ?
item_date = datetime.date.today().strftime("%Y/%m/%d")
item_id = str(self.json_item['meta']['twitter:tweet_id'])
return os.path.join('twitter', item_date, item_id) + '.gz'
# # TODO:
def process_json_meta(self, process):
'''
Process JSON meta filed.
'''
return None

View file

@ -0,0 +1,69 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The JSON Receiver Module
================
Recieve Json Items (example: Twitter feeder)
"""
import os
import datetime
import json
import redis
import time
import sys
import uuid
#sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
#import ConfigLoader
#import item_basic
class Default_json(object):
"""Default Feeder functions"""
def __init__(self, feeder_name, json_item):
self.name = feeder_name
self.json_item = json_item
def get_feeder_source(self):
'''
Return the original feeder name (json source field).
'''
return self.name
def get_feeder_name(self):
'''
Return feeder name. first part of the item_id and display in the UI
'''
return self.name
def get_json_file(self):
'''
Return the JSON dict,
'''
return self.json_item
def get_feeder_uuid(self):
pass
def get_item_gzip64encoded_content(self):
'''
Return item base64 encoded gzip content,
'''
return self.json_item['data']
## OVERWRITE ME ##
def get_item_id(self):
'''
Return item id. define item id
'''
item_date = datetime.date.today().strftime("%Y/%m/%d")
return os.path.join(self.get_feeder_name(), item_date, str(uuid.uuid4())) + '.gz'
## OVERWRITE ME ##
def process_json_meta(self, process):
'''
Process JSON meta filed.
'''
return None

109
bin/import/importer.py Executable file
View file

@ -0,0 +1,109 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The JSON Receiver Module
================
Recieve Json Items (example: Twitter feeder)
"""
import os
import importlib
import json
import redis
import sys
import time
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
# Import all receiver
#from all_json_receiver import *
#### CONFIG ####
config_loader = ConfigLoader.ConfigLoader()
server_cache = config_loader.get_redis_conn("Redis_Log_submit")
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
DEFAULT_FEEDER_NAME = 'Default_json'
#### ------ ####
def reload_json_importer_list():
global importer_list
importer_json_dir = os.path.join(os.environ['AIL_BIN'], 'import', 'ail_json_importer')
importer_list = [f[:-3] for f in os.listdir(importer_json_dir) if os.path.isfile(os.path.join(importer_json_dir, f))]
# init importer list
importer_list = []
reload_json_importer_list()
#### FUNCTIONS ####
def get_json_importer_list():
return importer_list
def add_json_to_json_queue(json_item):
json_item = json.dumps(json_item)
r_serv_db.sadd('importer:json:item', json_item)
def get_json_item_to_import():
return r_serv_db.spop('importer:json:item')
def get_json_receiver_class(feeder_name_in):
global importer_list
# sanitize class name
feeder_name = feeder_name_in[:1].upper() + feeder_name_in[1:]
feeder_name = feeder_name.replace('-', '_')
if feeder_name is None or feeder_name not in get_json_importer_list():
reload_json_importer_list() # add refresh timing ?
if feeder_name not in get_json_importer_list():
print('Unknow feeder: {}'.format(feeder_name_in))
feeder_name = 'Default_json'
# avoid subpackages
parts = feeder_name.split('.')
module = 'ail_json_importer.{}'.format(parts[-1])
# import json importer class
try:
mod = importlib.import_module(module)
except:
raise
mod = importlib.import_module(module)
class_name = getattr(mod, feeder_name)
return class_name
def get_json_source(json_item):
return json_item.get('source', DEFAULT_FEEDER_NAME)
def process_json(importer_obj, process):
item_id = importer_obj.get_item_id()
if 'meta' in importer_obj.get_json_file():
importer_obj.process_json_meta(process)
# send data to queue
send_item_to_ail_queue(item_id, importer_obj.get_item_gzip64encoded_content(), importer_obj.get_feeder_name(), process)
def send_item_to_ail_queue(item_id, gzip64encoded_content, feeder_name, process):
# Send item to queue
# send paste to Global
relay_message = "{0} {1}".format(item_id, gzip64encoded_content)
process.populate_set_out(relay_message, 'Mixer')
# increase nb of paste by feeder name
server_cache.hincrby("mixer_cache:list_feeder", feeder_name, 1)
#### ---- ####
#### API ####
def api_import_json_item(data_json):
if not data_json:
return ({'status': 'error', 'reason': 'Malformed JSON'}, 400)
# # TODO: add JSON verification
res = add_json_to_json_queue(data_json)
if not res:
return ({'status': 'success'}, 200)

25
bin/lib/item_basic.py Executable file
View file

@ -0,0 +1,25 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
config_loader = ConfigLoader.ConfigLoader()
# get and sanityze PASTE DIRECTORY
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
config_loader = None
def exist_item(item_id):
filename = get_item_filepath(item_id)
if os.path.isfile(filename):
return True
else:
return False
def get_item_filepath(item_id):
filename = os.path.join(PASTES_FOLDER, item_id)
return os.path.realpath(filename)

View file

@ -15,6 +15,7 @@ import Cryptocurrency
import Pgp import Pgp
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import item_basic
import ConfigLoader import ConfigLoader
import Correlate_object import Correlate_object
import Decoded import Decoded
@ -31,11 +32,7 @@ screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_co
config_loader = None config_loader = None
def exist_item(item_id): def exist_item(item_id):
filename = get_item_filepath(item_id) return item_basic.exist_item(item_id)
if os.path.isfile(filename):
return True
else:
return False
def get_basename(item_id): def get_basename(item_id):
return os.path.basename(item_id) return os.path.basename(item_id)
@ -44,8 +41,7 @@ def get_item_id(full_path):
return full_path.replace(PASTES_FOLDER, '', 1) return full_path.replace(PASTES_FOLDER, '', 1)
def get_item_filepath(item_id): def get_item_filepath(item_id):
filename = os.path.join(PASTES_FOLDER, item_id) return item_basic.get_item_filepath(item_id)
return os.path.realpath(filename)
def get_item_date(item_id, add_separator=False): def get_item_date(item_id, add_separator=False):
l_directory = item_id.split('/') l_directory = item_id.split('/')

View file

@ -2,6 +2,10 @@
subscribe = ZMQ_Global subscribe = ZMQ_Global
publish = Redis_Mixer,Redis_preProcess1 publish = Redis_Mixer,Redis_preProcess1
[Importer_Json]
subscribe = ZMQ_JSON
publish = Redis_Mixer,Redis_Tags
[Global] [Global]
subscribe = Redis_Mixer subscribe = Redis_Mixer
publish = Redis_Global,Redis_ModuleStats publish = Redis_Global,Redis_ModuleStats

View file

@ -24,6 +24,10 @@ import Paste
import Tag import Tag
import Term import Term
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import'))
import importer
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response
from flask_login import login_required from flask_login import login_required
@ -588,5 +592,17 @@ def import_item_uuid():
return Response(json.dumps({'status': 'error', 'reason': 'Invalid response'}), mimetype='application/json'), 400 return Response(json.dumps({'status': 'error', 'reason': 'Invalid response'}), mimetype='application/json'), 400
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@restApi.route("api/v1/import/json/item", methods=['POST'])
@token_required('user')
def import_json_item():
data_json = request.get_json()
res = importer.api_import_json_item(data_json)
return Response(json.dumps(res[0]), mimetype='application/json'), res[1]
# ========= REGISTRATION ========= # ========= REGISTRATION =========
app.register_blueprint(restApi, url_prefix=baseUrl) app.register_blueprint(restApi, url_prefix=baseUrl)