Improved module + Added support of click and keyboard navigation for killing and starting modules

This commit is contained in:
Mokaddem 2017-01-17 14:31:06 +01:00
parent 3b8149d840
commit 692f13d18b

View file

@ -8,12 +8,14 @@ from asciimatics.scene import Scene
from asciimatics.screen import Screen
from asciimatics.exceptions import ResizeScreenError, NextScene, StopApplication
from asciimatics.event import Event
from asciimatics.event import KeyboardEvent, MouseEvent
import sys, os
import time, datetime
import argparse, ConfigParser
import json
import redis
import psutil
from subprocess import PIPE, Popen
# CONFIG VARIABLES
kill_retry_threshold = 60 #1m
@ -23,9 +25,17 @@ command_search_name = "ps a -o pid,cmd | grep {}"
command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\""
printarrayGlob = [None]*14
printarrayGlob.insert(0, ["Time", "Module", "PID", "Action"])
lastTimeKillCommand = {}
TABLES = {"running": [("fetching information...",0)], "idle": [("fetching information...",0)], "notRunning": [("fetching information...",0)], "logs": [("No events recorded yet", 0)]}
current_selected_value = 0
current_selected_queue = ""
PID_NAME_DICO = {}
TABLES = {"running": [], "idle": [], "notRunning": [], "logs": [("No events recorded yet", 0)]}
TABLES_TITLES = {"running": "", "idle": "", "notRunning": "", "logs": ""}
TABLES_PADDING = {"running": [12, 23, 8, 8, 23, 10, 55, 11, 11, 12], "idle": [9, 23, 8, 12, 50], "notRunning": [9, 23, 35], "logs": [15, 23, 8, 50]}
QUEUE_STATUS = {}
CPU_TABLE = {}
CPU_OBJECT_TABLE = {}
@ -63,14 +73,7 @@ class CListBox(ListBox):
self._start_line = max(0, max(self._line - height + 1,
min(self._start_line, self._line)))
for i, (text, pid) in enumerate(self._options):
if i == 0:
colour, attr, bg = self._frame.palette["title"]
self._frame.canvas.print_at(
"{:{width}}".format(text, width=width),
self._x + self._offset + dx,
self._y + i + dy - self._start_line,
colour, attr, bg)
elif self._start_line <= i < self._start_line + height:
if self._start_line <= i < self._start_line + height:
colour, attr, bg = self._pick_colours("field", i == self._line)
self._frame.canvas.print_at(
"{:{width}}".format(text, width=width),
@ -91,23 +94,67 @@ class CListBox(ListBox):
colour, attr, queueStatus)
def process_event(self, event):
if isinstance(event, KeyboardEvent):
if len(self._options) > 0 and event.key_code == Screen.KEY_UP:
# Move up one line in text - use value to trigger on_select.
self._line = max(0, self._line - 1)
self.value = self._options[self._line][1]
elif len(self._options) > 0 and event.key_code == Screen.KEY_DOWN:
# Move down one line in text - use value to trigger on_select.
self._line = min(len(self._options) - 1, self._line + 1)
self.value = self._options[self._line][1]
elif len(self._options) > 0 and event.key_code == ord(' '):
global current_selected_value, current_selected_queue
current_selected_value = self.value
current_selected_queue = self.queue_name
raise NextScene("confirm")
else:
# Ignore any other key press.
return event
elif isinstance(event, MouseEvent):
# Mouse event - rebase coordinates to Frame context.
new_event = self._frame.rebase_event(event)
if event.buttons != 0:
if (len(self._options) > 0 and
self.is_mouse_over(new_event, include_label=False)):
# Use property to trigger events.
self._line = min(new_event.y - self._y,
len(self._options) - 1)
self.value = self._options[self._line][1]
# If clicked on button <k>, kill the queue
if self._x+2 <= new_event.x < self._x+4:
if self.queue_name in ["running", "idle"]:
kill_module(PID_NAME_DICO[self.value], self.value)
else:
restart_module(self.value)
return
# Ignore other mouse events.
return event
else:
# Ignore other events
return event
class CLabel(Label):
def __init__(self, label):
def __init__(self, label, listTitle=False):
super(Label, self).__init__(None, tab_stop=False)
# Although this is a label, we don't want it to contribute to the layout
# tab calculations, so leave internal `_label` value as None.
self._text = label
self.listTitle = listTitle
def set_layout(self, x, y, offset, w, h):
# Do the usual layout work. then recalculate exact x/w values for the
# rendered button.
super(Label, self).set_layout(x, y, offset, w, h)
self._x += max(0, (self._w - self._offset - len(self._text)) // 2)
self._x += max(0, (self._w - self._offset - len(self._text)) // 2) if not self.listTitle else 0
self._w = min(self._w, len(self._text))
def update(self, frame_no):
(colour, attr, bg) = self._frame.palette["title"]
colour = Screen.COLOUR_YELLOW
colour = Screen.COLOUR_YELLOW if not self.listTitle else colour
self._frame.canvas.print_at(
self._text, self._x, self._y, colour, attr, bg)
@ -116,33 +163,34 @@ class ListView(Frame):
super(ListView, self).__init__(screen,
screen.height,
screen.width,
on_load=self._reload_list,
hover_focus=True,
reduce_cpu=True)
self._list_view_run_queue = CListBox(
"running",
screen.height // 2,
[], name="LIST", on_change=self._on_pick)
[], name="LIST")
self._list_view_idle_queue = CListBox(
"idle",
screen.height // 2,
[], name="LIST", on_change=self._on_pick)
[], name="LIST")
self._list_view_noRunning = CListBox(
"notRunning",
screen.height // 4,
[], name="LIST", on_change=self._on_pick)
screen.height // 5,
[], name="LIST")
self._list_view_Log = CListBox(
"logs",
screen.height // 4,
[], name="LIST", on_change=self._on_pick)
self._list_view_Log.disabled = True
[], name="LIST")
#self._list_view_Log.disabled = True
#Running Queues
layout = Layout([100])
self.add_layout(layout)
text_rq = CLabel("Running Queues")
layout.add_widget(text_rq)
layout.add_widget(CLabel(TABLES_TITLES["running"], listTitle=True))
layout.add_widget(self._list_view_run_queue)
layout.add_widget(Divider())
@ -150,36 +198,86 @@ class ListView(Frame):
layout2 = Layout([1,1])
self.add_layout(layout2)
text_iq = CLabel("Idling Queues")
layout2.add_widget(text_iq)
layout2.add_widget(text_iq, 0)
layout2.add_widget(CLabel(TABLES_TITLES["idle"], listTitle=True), 0)
layout2.add_widget(self._list_view_idle_queue, 0)
#Non Running Queues
text_nq = CLabel("No Running Queues")
layout2.add_widget(text_nq, 1)
layout2.add_widget(CLabel(TABLES_TITLES["notRunning"], listTitle=True), 1)
layout2.add_widget(self._list_view_noRunning, 1)
layout2.add_widget(Divider(), 1)
#Log
text_l = CLabel("Logs")
layout2.add_widget(text_l, 1)
layout2.add_widget(CLabel(TABLES_TITLES["logs"], listTitle=True), 1)
layout2.add_widget(self._list_view_Log, 1)
self.fix()
self._on_pick()
def _on_pick(self):
return
def _reload_list(self):
self._list_view_run_queue = [(time.time(), 123)]
@staticmethod
def _quit():
raise StopApplication("User pressed quit")
class Confirm(Frame):
def __init__(self, screen):
super(Confirm, self).__init__(screen,
screen.height * 1 // 8,
screen.width * 1 // 4,
hover_focus=True,
on_load=self._setValue,
title="Confirm action",
reduce_cpu=True)
# Create the form for displaying the list of contacts.
layout = Layout([100], fill_frame=True)
self.add_layout(layout)
self.label = CLabel("Confirm {} module {} {}?")
layout.add_widget(self.label)
layout2 = Layout([1,1])
self.add_layout(layout2)
layout2.add_widget(Button("Ok", self._ok), 0)
layout2.add_widget(Button("Cancel", self._cancel), 1)
self.fix()
def _ok(self):
global current_selected_value, current_selected_queue
if current_selected_queue in ["running", "idle"]:
kill_module(PID_NAME_DICO[current_selected_value], current_selected_value)
else:
restart_module(current_selected_value)
current_selected_value = 0
current_selected_value = 0
self.label._text = "Confirm {} module {} {}?"
raise NextScene("dashboard")
def _cancel(self):
global current_selected_value
current_selected_value = 0
self.label._text = "Confirm {} module {} {}?"
raise NextScene("dashboard")
def _setValue(self):
global current_selected_value, current_selected_queue
if current_selected_queue in ["running", "idle"]:
action = "KILL"
modulename = PID_NAME_DICO[current_selected_value]
pid = current_selected_value
else:
action = "START"
modulename = current_selected_value
pid = ""
self.label._text = self.label._text.format(action, modulename, pid)
def demo(screen):
LV = ListView(screen)
dashboard = ListView(screen)
confirm = Confirm(screen)
scenes = [
Scene([LV], -1)
Scene([dashboard], -1, name="dashboard"),
Scene([confirm], -1, name="confirm"),
]
# screen.play(scenes)
@ -187,14 +285,16 @@ def demo(screen):
time_cooldown = time.time()
global TABLES
while True:
LV._update(None)
screen.draw_next_frame()
if time.time() - time_cooldown > args.refresh:
cleanRedis()
for key, val in fetchQueueData().iteritems():
TABLES[key] = val
TABLES["logs"] = format_logs(printarrayGlob)
TABLES["logs"] = format_string(printarrayGlob, TABLES_PADDING["logs"])
if current_selected_value == 0:
dashboard._update(None)
screen.refresh()
time_cooldown = time.time()
screen.draw_next_frame()
time.sleep(0.02)
@ -211,7 +311,7 @@ def clearRedisModuleInfo():
for k in server.keys("MODULE_*"):
server.delete(k)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, "*", "-", "Cleared redis module info"])
printarrayGlob.insert(0, ([str(inst_time).split(' ')[1], "*", "-", "Cleared redis module info"], 0))
printarrayGlob.pop()
def cleanRedis():
@ -226,21 +326,29 @@ def cleanRedis():
flag_pid_valid = True
if not flag_pid_valid:
print flag_pid_valid, 'cleaning', pid, 'in', k
#print flag_pid_valid, 'cleaning', pid, 'in', k
server.srem(k, pid)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k])
printarrayGlob.insert(0, ([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + k], 0))
printarrayGlob.pop()
#time.sleep(5)
def restart_module(module):
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(0, ([str(inst_time).split(' ')[1], module, "?", "Restarted"], 0))
printarrayGlob.pop()
def kill_module(module, pid):
print ''
print '-> trying to kill module:', module
#print ''
#print '-> trying to kill module:', module
if pid is None:
print 'pid was None'
printarrayGlob.insert(1, [0, module, pid, "PID was None"])
#print 'pid was None'
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(0, ([str(inst_time).split(' ')[1], module, pid, "PID was None"], 0))
printarrayGlob.pop()
pid = getPid(module)
else: #Verify that the pid is at least in redis
@ -251,54 +359,48 @@ def kill_module(module, pid):
if pid is not None:
try:
#os.kill(pid, signal.SIGUSR1)
p = psutil.Process(pid)
p = psutil.Process(int(pid))
p.terminate()
except Exception:
print pid, 'already killed'
except Exception as e:
#print pid, 'already killed'
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"])
printarrayGlob.insert(0, ([str(inst_time).split(' ')[1], module, pid, "Already killed"], 0))
printarrayGlob.pop()
return
time.sleep(1)
if getPid(module) is None:
print module, 'has been killed'
print 'restarting', module, '...'
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
time.sleep(0.2)
if not p.is_running():
#print module, 'has been killed'
#print 'restarting', module, '...'
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"])
printarrayGlob.pop()
printarrayGlob.insert(0, ([str(inst_time).split(' ')[1], module, pid, "Killed"], 0))
printarrayGlob.pop()
restart_module(module)
else:
print 'killing failed, retrying...'
#print 'killing failed, retrying...'
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."])
printarrayGlob.insert(0, ([str(inst_time).split(' ')[1], module, pid, "Killing #1 failed."], 0))
printarrayGlob.pop()
time.sleep(1)
#os.kill(pid, signal.SIGUSR1)
p = psutil.Process(pid)
#time.sleep(1)
p.terminate()
time.sleep(1)
if getPid(module) is None:
print module, 'has been killed'
print 'restarting', module, '...'
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
if not p.is_running():
#print module, 'has been killed'
#print 'restarting', module, '...'
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"])
printarrayGlob.insert(0, ([str(inst_time).split(' ')[1], module, pid, "Killed"], 0))
printarrayGlob.pop()
restart_module(module)
else:
#print 'killing failed!'
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(0, ([str(inst_time).split(' ')[1], module, pid, "Killing failed!"], 0))
printarrayGlob.pop()
else:
print 'killing failed!'
#print 'Module does not exist'
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"])
printarrayGlob.pop()
else:
print 'Module does not exist'
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"])
printarrayGlob.insert(0, ([str(inst_time).split(' ')[1], module, pid, "Killing failed, module not found"], 0))
printarrayGlob.pop()
#time.sleep(5)
cleanRedis()
@ -347,112 +449,68 @@ def fetchQueueData():
cpu_percent = CPU_OBJECT_TABLE[int(moduleNum)].cpu_percent()
CPU_TABLE[moduleNum].insert(1, cpu_percent)
cpu_avg = sum(CPU_TABLE[moduleNum])/len(CPU_TABLE[moduleNum])
if len(CPU_TABLE[moduleNum]) > args.refresh*10:
CPU_TABLE[moduleNum].pop()
mem_percent = CPU_OBJECT_TABLE[int(moduleNum)].memory_percent()
except KeyError:
try:
CPU_OBJECT_TABLE[int(moduleNum)] = psutil.Process(int(moduleNum))
cpu_percent = CPU_OBJECT_TABLE[int(moduleNum)].cpu_percent()
CPU_TABLE[moduleNum] = []
cpu_avg = cpu_percent
if len(CPU_TABLE[moduleNum]) > args.refresh*10:
CPU_TABLE[moduleNum].pop()
mem_percent = CPU_OBJECT_TABLE[int(moduleNum)].memory_percent()
except psutil.NoSuchProcess:
cpu_percent = 0
cpu_avg = cpu_percent
mem_percent = 0
array_module_type.append( ([" <K> [ ]", str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path), "{0:.2f}".format(cpu_percent)+"%", "{0:.2f}".format(mem_percent)+"%", "{0:.2f}".format(cpu_avg)+"%"], moduleNum) )
else:
printarray2.append( ([" <K> ", str(queue), str(moduleNum), str(processed_time_readable), str(path)], moduleNum) )
PID_NAME_DICO[moduleNum] = str(queue)
array_module_type.sort(lambda x,y: cmp(x[0][4], y[0][4]), reverse=True)
for e in array_module_type:
printarray1.append(e)
for curr_queue in module_file_array:
if curr_queue not in all_queue:
printarray3.append( ([" <S> ", curr_queue, "Not running"], len(printarray3)+1) )
printarray3.append( ([" <S> ", curr_queue, "Not running by default"], curr_queue) )
else:
if len(list(server.smembers('MODULE_TYPE_'+curr_queue))) == 0:
if curr_queue not in no_info_modules:
no_info_modules[curr_queue] = int(time.time())
printarray3.append( ([" <S> ", curr_queue, "No data"], len(printarray3)+1) )
printarray3.append( ([" <S> ", curr_queue, "No data"], curr_queue) )
else:
#If no info since long time, try to kill
if args.autokill == 1:
if int(time.time()) - no_info_modules[curr_queue] > args.treshold:
kill_module(curr_queue, None)
no_info_modules[curr_queue] = int(time.time())
printarray3.append( ([" <S> ", curr_queue, "Stuck or idle, restarting in " + str(abs(args.treshold - (int(time.time()) - no_info_modules[curr_queue]))) + "s"], len(printarray3)+1) )
printarray3.append( ([" <S> ", curr_queue, "Stuck or idle, restarting in " + str(abs(args.treshold - (int(time.time()) - no_info_modules[curr_queue]))) + "s"], curr_queue) )
else:
printarray3.append( ([" <S> ", curr_queue, "Stuck or idle, restarting disabled"], len(printarray3)+1) )
printarray3.append( ([" <S> ", curr_queue, "Stuck or idle, restarting disabled"], curr_queue) )
## FIXME To add:
## Button KILL Process using Curses
printarray1.sort(key=lambda x: x[0], reverse=False)
printarray2.sort(key=lambda x: x[0], reverse=False)
printarray1.insert(0,([" Action", "Queue name", "PID", "#", "S Time", "R Time", "Processed element", "CPU %", "Mem %", "Avg CPU%"], 0) )
printarray2.insert(0,([" Action", "Queue", "PID", "Idle Time", "Last paste hash"], 0) )
printarray3.insert(0,([" Action", "Queue", "State"], 0) )
padding_row = [12, 23, 8,
8, 23, 10,
55, 11, 11, 12]
printstring1 = []
for row in printarray1:
the_array = row[0]
the_pid = row[1]
text=""
for ite, elem in enumerate(the_array):
if len(elem) > padding_row[ite]:
text += "*" + elem[-padding_row[ite]+6:]
padd_off = " "*5
else:
text += elem
padd_off = " "*0
text += (padding_row[ite] - len(elem))*" " + padd_off
printstring1.append( (text, the_pid) )
padding_row = [9, 23, 8,
12, 50]
printstring2 = []
for row in printarray2:
the_array = row[0]
the_pid = row[1]
text=""
for ite, elem in enumerate(the_array):
if len(elem) > padding_row[ite]:
text += "*" + elem[-padding_row[ite]+6:]
padd_off = " "*5
else:
text += elem
padd_off = " "*0
text += (padding_row[ite] - len(elem))*" " + padd_off
printstring2.append( (text, the_pid) )
padding_row = [9, 23, 35]
printstring3 = []
for row in printarray3:
the_array = row[0]
the_pid = row[1]
text=""
for ite, elem in enumerate(the_array):
if len(elem) > padding_row[ite]:
text += "*" + elem[-padding_row[ite]+6:]
padd_off = " "*5
else:
text += elem
padd_off = " "*0
text += (padding_row[ite] - len(elem))*" " + padd_off
printstring3.append( (text, the_pid) )
printstring1 = format_string(printarray1, TABLES_PADDING["running"])
printstring2 = format_string(printarray2, TABLES_PADDING["idle"])
printstring3 = format_string(printarray3, TABLES_PADDING["notRunning"])
return {"running": printstring1, "idle": printstring2, "notRunning": printstring3}
def format_logs(logs):
printstring4 = []
padding_row = [12, 23, 8, 50]
text=""
for row in logs:
def format_string(tab, padding_row):
printstring = []
for row in tab:
if row is None:
continue
for ite, elem in enumerate(row):
the_array = row[0]
the_pid = row[1]
text=""
for ite, elem in enumerate(the_array):
if len(elem) > padding_row[ite]:
text += "*" + elem[-padding_row[ite]+6:]
padd_off = " "*5
@ -460,15 +518,15 @@ def format_logs(logs):
text += elem
padd_off = " "*0
text += (padding_row[ite] - len(elem))*" " + padd_off
printstring4.append( (text, len(printstring4)+1) )
return printstring4
printstring.append( (text, the_pid) )
return printstring
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.')
parser.add_argument('-r', '--refresh', type=int, required=False, default=2, help='Refresh rate')
parser.add_argument('-r', '--refresh', type=int, required=False, default=5, help='Refresh rate')
parser.add_argument('-t', '--treshold', type=int, required=False, default=60*10*1, help='Refresh rate')
parser.add_argument('-k', '--autokill', type=int, required=False, default=0, help='Enable auto kill option (1 for TRUE, anything else for FALSE)')
parser.add_argument('-c', '--clear', type=int, required=False, default=0, help='Clear the current module information (Used to clear data from old launched modules)')
@ -502,9 +560,14 @@ if __name__ == "__main__":
for line in module_file:
module_file_array.add(line[:-1])
#cleanRedis()
cleanRedis()
TABLES_TITLES["running"] = format_string([([" Action", "Queue name", "PID", "#", "S Time", "R Time", "Processed element", "CPU %", "Mem %", "Avg CPU%"],0)], TABLES_PADDING["running"])[0][0]
TABLES_TITLES["idle"] = format_string([([" Action", "Queue", "PID", "Idle Time", "Last paste hash"],0)], TABLES_PADDING["idle"])[0][0]
TABLES_TITLES["notRunning"] = format_string([([" Action", "Queue", "State"],0)], TABLES_PADDING["notRunning"])[0][0]
TABLES_TITLES["logs"] = format_string([(["Time", "Module", "PID", "Info"],0)], TABLES_PADDING["logs"])[0][0]
while True:
Screen.wrapper(demo)