mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-23 06:37:15 +00:00
321 lines
14 KiB
Python
Executable file
321 lines
14 KiB
Python
Executable file
#!/usr/bin/env python2
|
|
# -*-coding:UTF-8 -*
|
|
|
|
'''
|
|
|
|
This module can be use to see information of running modules.
|
|
These information are logged in "logs/moduleInfo.log"
|
|
|
|
It can also try to manage them by killing inactive one.
|
|
However, it does not support mutliple occurence of the same module
|
|
(It will kill the first one obtained by get)
|
|
|
|
|
|
'''
|
|
|
|
import time
|
|
import datetime
|
|
import redis
|
|
import os
|
|
import signal
|
|
import argparse
|
|
from subprocess import PIPE, Popen
|
|
import ConfigParser
|
|
import json
|
|
from terminaltables import AsciiTable
|
|
import textwrap
|
|
from colorama import Fore, Back, Style, init
|
|
|
|
# CONFIG VARIABLES
|
|
threshold_stucked_module = 60*10*1 #1 hour
|
|
kill_retry_threshold = 60 #1m
|
|
log_filename = "../logs/moduleInfo.log"
|
|
command_search_pid = "ps a -o pid,cmd | grep {}"
|
|
command_search_name = "ps a -o pid,cmd | grep {}"
|
|
command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\""
|
|
|
|
init() #Necesary for colorama
|
|
printarrayGlob = [None]*14
|
|
printarrayGlob.insert(0, ["Time", "Module", "PID", "Action"])
|
|
lastTimeKillCommand = {}
|
|
|
|
def getPid(module):
|
|
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
|
for line in p.stdout:
|
|
print line
|
|
splittedLine = line.split()
|
|
if 'python2' in splittedLine:
|
|
return int(splittedLine[0])
|
|
return None
|
|
|
|
def clearRedisModuleInfo():
|
|
for k in server.keys("MODULE_*"):
|
|
server.delete(k)
|
|
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
|
|
printarrayGlob.insert(1, [inst_time, "*", "-", "Cleared redis module info"])
|
|
printarrayGlob.pop()
|
|
|
|
def cleanRedis():
|
|
for k in server.keys("MODULE_TYPE_*"):
|
|
moduleName = k[12:].split('_')[0]
|
|
for pid in server.smembers(k):
|
|
flag_pid_valid = False
|
|
proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
|
for line in proc.stdout:
|
|
splittedLine = line.split()
|
|
if ('python2' in splittedLine or 'python' in splittedLine) and "./"+moduleName+".py" in splittedLine:
|
|
flag_pid_valid = True
|
|
|
|
if not flag_pid_valid:
|
|
print flag_pid_valid, 'cleaning', pid, 'in', k
|
|
server.srem(k, pid)
|
|
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
|
|
printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k])
|
|
printarrayGlob.pop()
|
|
#time.sleep(5)
|
|
|
|
|
|
def kill_module(module, pid):
|
|
print ''
|
|
print '-> trying to kill module:', module
|
|
|
|
if pid is None:
|
|
print 'pid was None'
|
|
printarrayGlob.insert(1, [0, module, pid, "PID was None"])
|
|
printarrayGlob.pop()
|
|
pid = getPid(module)
|
|
else: #Verify that the pid is at least in redis
|
|
if server.exists("MODULE_"+module+"_"+str(pid)) == 0:
|
|
return
|
|
|
|
lastTimeKillCommand[pid] = int(time.time())
|
|
if pid is not None:
|
|
try:
|
|
os.kill(pid, signal.SIGUSR1)
|
|
except OSError:
|
|
print pid, 'already killed'
|
|
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
|
|
printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"])
|
|
printarrayGlob.pop()
|
|
return
|
|
time.sleep(1)
|
|
if getPid(module) is None:
|
|
print module, 'has been killed'
|
|
print 'restarting', module, '...'
|
|
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
|
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
|
|
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
|
|
printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"])
|
|
printarrayGlob.pop()
|
|
printarrayGlob.pop()
|
|
|
|
else:
|
|
print 'killing failed, retrying...'
|
|
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
|
|
printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."])
|
|
printarrayGlob.pop()
|
|
|
|
time.sleep(1)
|
|
os.kill(pid, signal.SIGUSR1)
|
|
time.sleep(1)
|
|
if getPid(module) is None:
|
|
print module, 'has been killed'
|
|
print 'restarting', module, '...'
|
|
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
|
|
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
|
|
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
|
|
printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"])
|
|
printarrayGlob.pop()
|
|
printarrayGlob.pop()
|
|
else:
|
|
print 'killing failed!'
|
|
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
|
|
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"])
|
|
printarrayGlob.pop()
|
|
else:
|
|
print 'Module does not exist'
|
|
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
|
|
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"])
|
|
printarrayGlob.pop()
|
|
#time.sleep(5)
|
|
cleanRedis()
|
|
|
|
def get_color(time, idle):
|
|
if time is not None:
|
|
temp = time.split(':')
|
|
time = int(temp[0])*3600 + int(temp[1])*60 + int(temp[2])
|
|
|
|
if time >= threshold_stucked_module:
|
|
if not idle:
|
|
return Back.RED + Style.BRIGHT
|
|
else:
|
|
return Back.MAGENTA + Style.BRIGHT
|
|
elif time > threshold_stucked_module/2:
|
|
return Back.YELLOW + Style.BRIGHT
|
|
else:
|
|
return Back.GREEN + Style.BRIGHT
|
|
else:
|
|
return Style.RESET_ALL
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.')
|
|
parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate')
|
|
parser.add_argument('-k', '--autokill', type=int, required=False, default=0, help='Enable auto kill option (1 for TRUE, anything else for FALSE)')
|
|
parser.add_argument('-c', '--clear', type=int, required=False, default=0, help='Clear the current module information (Used to clear data from old launched modules)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
|
if not os.path.exists(configfile):
|
|
raise Exception('Unable to find the configuration file. \
|
|
Did you set environment variables? \
|
|
Or activate the virtualenv.')
|
|
|
|
cfg = ConfigParser.ConfigParser()
|
|
cfg.read(configfile)
|
|
|
|
threshold_stucked_module = cfg.getint("Module_ModuleInformation", "threshold_stucked_module")
|
|
|
|
# REDIS #
|
|
server = redis.StrictRedis(
|
|
host=cfg.get("Redis_Queues", "host"),
|
|
port=cfg.getint("Redis_Queues", "port"),
|
|
db=cfg.getint("Redis_Queues", "db"))
|
|
|
|
if args.clear == 1:
|
|
clearRedisModuleInfo()
|
|
|
|
lastTime = datetime.datetime.now()
|
|
|
|
module_file_array = set()
|
|
no_info_modules = {}
|
|
path_allmod = os.path.join(os.environ['AIL_HOME'], 'doc/all_modules.txt')
|
|
with open(path_allmod, 'r') as module_file:
|
|
for line in module_file:
|
|
module_file_array.add(line[:-1])
|
|
|
|
cleanRedis()
|
|
|
|
while True:
|
|
|
|
all_queue = set()
|
|
printarray1 = []
|
|
printarray2 = []
|
|
printarray3 = []
|
|
for queue, card in server.hgetall("queues").iteritems():
|
|
all_queue.add(queue)
|
|
key = "MODULE_" + queue + "_"
|
|
keySet = "MODULE_TYPE_" + queue
|
|
array_module_type = []
|
|
|
|
for moduleNum in server.smembers(keySet):
|
|
value = server.get(key + str(moduleNum))
|
|
if value is not None:
|
|
timestamp, path = value.split(", ")
|
|
if timestamp is not None and path is not None:
|
|
startTime_readable = datetime.datetime.fromtimestamp(int(timestamp))
|
|
processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0]
|
|
|
|
if int(card) > 0:
|
|
if int((datetime.datetime.now() - startTime_readable).total_seconds()) > threshold_stucked_module:
|
|
log = open(log_filename, 'a')
|
|
log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n")
|
|
try:
|
|
last_kill_try = time.time() - lastTimeKillCommand[moduleNum]
|
|
except KeyError:
|
|
last_kill_try = kill_retry_threshold+1
|
|
if args.autokill == 1 and last_kill_try > kill_retry_threshold :
|
|
kill_module(queue, int(moduleNum))
|
|
|
|
array_module_type.append([get_color(processed_time_readable, False) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, False)])
|
|
|
|
else:
|
|
printarray2.append([get_color(processed_time_readable, True) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, True)])
|
|
array_module_type.sort(lambda x,y: cmp(x[4], y[4]), reverse=True)
|
|
for e in array_module_type:
|
|
printarray1.append(e)
|
|
|
|
for curr_queue in module_file_array:
|
|
if curr_queue not in all_queue:
|
|
printarray3.append([curr_queue, "Not running"])
|
|
else:
|
|
if len(list(server.smembers('MODULE_TYPE_'+curr_queue))) == 0:
|
|
if curr_queue not in no_info_modules:
|
|
no_info_modules[curr_queue] = int(time.time())
|
|
printarray3.append([curr_queue, "No data"])
|
|
else:
|
|
#If no info since long time, try to kill
|
|
if args.autokill == 1 and int(time.time()) - no_info_modules[curr_queue] > threshold_stucked_module:
|
|
kill_module(curr_queue, None)
|
|
no_info_modules[curr_queue] = int(time.time())
|
|
printarray3.append([curr_queue, "Stuck or idle, restarting in " + str(threshold_stucked_module - (int(time.time()) - no_info_modules[curr_queue])) + "s"])
|
|
|
|
|
|
#printarray1.sort(lambda x,y: cmp(x[0], y[0]), reverse=False)
|
|
printarray1.sort(key=lambda x: x[0][9:], reverse=False)
|
|
#printarray2.sort(lambda x,y: cmp(x[0], y[0]), reverse=False)
|
|
printarray2.sort(key=lambda x: x[0][9:], reverse=False)
|
|
printarray1.insert(0,["Queue", "PID", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"])
|
|
printarray2.insert(0,["Queue", "PID","Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"])
|
|
printarray3.insert(0,["Queue", "State"])
|
|
|
|
os.system('clear')
|
|
t1 = AsciiTable(printarray1, title="Working queues")
|
|
t1.column_max_width(1)
|
|
if not t1.ok:
|
|
longest_col = t1.column_widths.index(max(t1.column_widths))
|
|
max_length_col = t1.column_max_width(longest_col)
|
|
if max_length_col > 0:
|
|
for i, content in enumerate(t1.table_data):
|
|
if len(content[longest_col]) > max_length_col:
|
|
temp = ''
|
|
for l in content[longest_col].splitlines():
|
|
if len(l) > max_length_col:
|
|
temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
|
|
else:
|
|
temp += l + '\n'
|
|
content[longest_col] = temp.strip()
|
|
t1.table_data[i] = content
|
|
|
|
t2 = AsciiTable(printarray2, title="Idling queues")
|
|
t2.column_max_width(1)
|
|
if not t2.ok:
|
|
longest_col = t2.column_widths.index(max(t2.column_widths))
|
|
max_length_col = t2.column_max_width(longest_col)
|
|
if max_length_col > 0:
|
|
for i, content in enumerate(t2.table_data):
|
|
if len(content[longest_col]) > max_length_col:
|
|
temp = ''
|
|
for l in content[longest_col].splitlines():
|
|
if len(l) > max_length_col:
|
|
temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
|
|
else:
|
|
temp += l + '\n'
|
|
content[longest_col] = temp.strip()
|
|
t2.table_data[i] = content
|
|
|
|
t3 = AsciiTable(printarray3, title="Not running queues")
|
|
t3.column_max_width(1)
|
|
|
|
printarray4 = []
|
|
for elem in printarrayGlob:
|
|
if elem is not None:
|
|
printarray4.append(elem)
|
|
|
|
t4 = AsciiTable(printarray4, title="Last actions")
|
|
t4.column_max_width(1)
|
|
|
|
print t1.table
|
|
print '\n'
|
|
print t2.table
|
|
print '\n'
|
|
print t3.table
|
|
print '\n'
|
|
print t4.table
|
|
|
|
if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5:
|
|
lastTime = datetime.datetime.now()
|
|
cleanRedis()
|
|
time.sleep(args.refresh)
|