2016-08-24 13:21:41 +00:00
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
2016-08-26 08:07:06 +00:00
'''
This module can be use to see information of running modules .
These information are logged in " logs/moduleInfo.log "
It can also try to manage them by killing inactive one .
However , it does not support mutliple occurence of the same module
( It will kill the first one obtained by get )
'''
2016-08-24 13:21:41 +00:00
import time
import datetime
import redis
import os
2016-08-24 14:52:01 +00:00
import signal
2016-08-25 08:43:04 +00:00
import argparse
2016-08-24 14:52:01 +00:00
from subprocess import PIPE , Popen
2016-08-24 13:21:41 +00:00
import ConfigParser
2016-08-24 13:53:00 +00:00
import json
2016-08-25 08:43:04 +00:00
from terminaltables import AsciiTable
import textwrap
2016-11-25 13:12:09 +00:00
from colorama import Fore , Back , Style , init
2016-08-24 13:21:41 +00:00
2016-08-24 13:53:00 +00:00
# CONFIG VARIABLES
2016-11-25 10:54:16 +00:00
threshold_stucked_module = 60 * 10 * 1 #1 hour
kill_retry_threshold = 60 #1m
2016-08-24 13:53:00 +00:00
log_filename = " ../logs/moduleInfo.log "
2016-08-24 14:52:01 +00:00
command_search_pid = " ps a -o pid,cmd | grep {} "
2016-08-26 13:27:37 +00:00
command_search_name = " ps a -o pid,cmd | grep {} "
2016-08-24 14:52:01 +00:00
command_restart_module = " screen -S \" Script \" -X screen -t \" {} \" bash -c \" ./ {} .py; read x \" "
2016-11-25 13:12:09 +00:00
init ( ) #Necesary for colorama
2016-11-25 10:54:16 +00:00
printarrayGlob = [ None ] * 14
printarrayGlob . insert ( 0 , [ " Time " , " Module " , " PID " , " Action " ] )
lastTimeKillCommand = { }
2016-08-24 14:52:01 +00:00
2016-08-24 15:28:39 +00:00
def getPid ( module ) :
2016-08-24 14:52:01 +00:00
p = Popen ( [ command_search_pid . format ( module + " .py " ) ] , stdin = PIPE , stdout = PIPE , bufsize = 1 , shell = True )
for line in p . stdout :
2016-11-24 15:58:32 +00:00
print line
2016-08-24 14:52:01 +00:00
splittedLine = line . split ( )
if ' python2 ' in splittedLine :
2016-08-24 15:28:39 +00:00
return int ( splittedLine [ 0 ] )
2016-11-24 15:58:32 +00:00
return None
2016-08-24 15:28:39 +00:00
2016-08-25 14:07:47 +00:00
def clearRedisModuleInfo ( ) :
for k in server . keys ( " MODULE_* " ) :
server . delete ( k )
2016-11-25 10:54:16 +00:00
inst_time = datetime . datetime . fromtimestamp ( int ( time . time ( ) ) )
printarrayGlob . insert ( 1 , [ inst_time , " * " , " - " , " Cleared redis module info " ] )
printarrayGlob . pop ( )
2016-08-24 15:28:39 +00:00
2016-08-26 13:27:37 +00:00
def cleanRedis ( ) :
for k in server . keys ( " MODULE_TYPE_* " ) :
moduleName = k [ 12 : ] . split ( ' _ ' ) [ 0 ]
for pid in server . smembers ( k ) :
flag_pid_valid = False
proc = Popen ( [ command_search_name . format ( pid ) ] , stdin = PIPE , stdout = PIPE , bufsize = 1 , shell = True )
for line in proc . stdout :
splittedLine = line . split ( )
if ( ' python2 ' in splittedLine or ' python ' in splittedLine ) and " ./ " + moduleName + " .py " in splittedLine :
flag_pid_valid = True
if not flag_pid_valid :
print flag_pid_valid , ' cleaning ' , pid , ' in ' , k
server . srem ( k , pid )
2016-11-25 10:54:16 +00:00
inst_time = datetime . datetime . fromtimestamp ( int ( time . time ( ) ) )
printarrayGlob . insert ( 1 , [ inst_time , moduleName , pid , " Cleared invalid pid in " + k ] )
printarrayGlob . pop ( )
#time.sleep(5)
2016-08-26 13:27:37 +00:00
2016-11-25 10:54:16 +00:00
def kill_module ( module , pid ) :
2016-08-24 15:28:39 +00:00
print ' '
print ' -> trying to kill module: ' , module
2016-11-25 10:54:16 +00:00
if pid is None :
print ' pid was None '
printarrayGlob . insert ( 1 , [ 0 , module , pid , " PID was None " ] )
printarrayGlob . pop ( )
pid = getPid ( module )
else : #Verify that the pid is at least in redis
if server . exists ( " MODULE_ " + module + " _ " + str ( pid ) ) == 0 :
return
lastTimeKillCommand [ pid ] = int ( time . time ( ) )
2016-08-24 15:28:39 +00:00
if pid is not None :
2016-11-25 10:54:16 +00:00
try :
os . kill ( pid , signal . SIGUSR1 )
except OSError :
print pid , ' already killed '
inst_time = datetime . datetime . fromtimestamp ( int ( time . time ( ) ) )
printarrayGlob . insert ( 1 , [ inst_time , module , pid , " Already killed " ] )
printarrayGlob . pop ( )
return
2016-08-24 15:28:39 +00:00
time . sleep ( 1 )
if getPid ( module ) is None :
print module , ' has been killed '
print ' restarting ' , module , ' ... '
2016-08-24 14:52:01 +00:00
p2 = Popen ( [ command_restart_module . format ( module , module ) ] , stdin = PIPE , stdout = PIPE , bufsize = 1 , shell = True )
2016-11-25 10:54:16 +00:00
inst_time = datetime . datetime . fromtimestamp ( int ( time . time ( ) ) )
printarrayGlob . insert ( 1 , [ inst_time , module , pid , " Killed " ] )
printarrayGlob . insert ( 1 , [ inst_time , module , " ? " , " Restarted " ] )
printarrayGlob . pop ( )
printarrayGlob . pop ( )
2016-08-24 13:53:00 +00:00
2016-08-24 15:28:39 +00:00
else :
print ' killing failed, retrying... '
2016-11-25 10:54:16 +00:00
inst_time = datetime . datetime . fromtimestamp ( int ( time . time ( ) ) )
printarrayGlob . insert ( 1 , [ inst_time , module , pid , " Killing #1 failed. " ] )
printarrayGlob . pop ( )
time . sleep ( 1 )
2016-08-24 15:28:39 +00:00
os . kill ( pid , signal . SIGUSR1 )
time . sleep ( 1 )
if getPid ( module ) is None :
print module , ' has been killed '
print ' restarting ' , module , ' ... '
p2 = Popen ( [ command_restart_module . format ( module , module ) ] , stdin = PIPE , stdout = PIPE , bufsize = 1 , shell = True )
2016-11-25 10:54:16 +00:00
inst_time = datetime . datetime . fromtimestamp ( int ( time . time ( ) ) )
printarrayGlob . insert ( 1 , [ inst_time , module , pid , " Killed " ] )
printarrayGlob . insert ( 1 , [ inst_time , module , " ? " , " Restarted " ] )
printarrayGlob . pop ( )
printarrayGlob . pop ( )
2016-08-24 15:28:39 +00:00
else :
print ' killing failed! '
2016-11-25 10:54:16 +00:00
inst_time = datetime . datetime . fromtimestamp ( int ( time . time ( ) ) )
printarrayGlob . insert ( 1 , [ inst_time , module , pid , " Killing failed! " ] )
printarrayGlob . pop ( )
2016-11-24 15:58:32 +00:00
else :
print ' Module does not exist '
2016-11-25 10:54:16 +00:00
inst_time = datetime . datetime . fromtimestamp ( int ( time . time ( ) ) )
printarrayGlob . insert ( 1 , [ inst_time , module , pid , " Killing failed, module not found " ] )
printarrayGlob . pop ( )
#time.sleep(5)
cleanRedis ( )
2016-08-24 13:53:00 +00:00
2016-11-25 13:12:09 +00:00
def get_color ( time , idle ) :
if time is not None :
temp = time . split ( ' : ' )
time = int ( temp [ 0 ] ) * 3600 + int ( temp [ 1 ] ) * 60 + int ( temp [ 2 ] )
if time > = threshold_stucked_module :
if not idle :
return Back . RED + Style . BRIGHT
else :
return Back . MAGENTA + Style . BRIGHT
elif time > threshold_stucked_module / 2 :
return Back . YELLOW + Style . BRIGHT
else :
return Back . GREEN + Style . BRIGHT
else :
return Style . RESET_ALL
2016-08-24 13:53:00 +00:00
2016-08-24 13:21:41 +00:00
if __name__ == " __main__ " :
2016-08-25 08:43:04 +00:00
parser = argparse . ArgumentParser ( description = ' Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one. ' )
parser . add_argument ( ' -r ' , ' --refresh ' , type = int , required = False , default = 1 , help = ' Refresh rate ' )
2016-08-26 13:27:37 +00:00
parser . add_argument ( ' -k ' , ' --autokill ' , type = int , required = False , default = 0 , help = ' Enable auto kill option (1 for TRUE, anything else for FALSE) ' )
parser . add_argument ( ' -c ' , ' --clear ' , type = int , required = False , default = 0 , help = ' Clear the current module information (Used to clear data from old launched modules) ' )
2016-08-25 08:43:04 +00:00
args = parser . parse_args ( )
2016-08-24 13:21:41 +00:00
configfile = os . path . join ( os . environ [ ' AIL_BIN ' ] , ' packages/config.cfg ' )
if not os . path . exists ( configfile ) :
raise Exception ( ' Unable to find the configuration file. \
Did you set environment variables ? \
Or activate the virtualenv . ' )
2016-08-25 08:43:04 +00:00
2016-08-24 13:21:41 +00:00
cfg = ConfigParser . ConfigParser ( )
cfg . read ( configfile )
2016-11-25 10:54:16 +00:00
threshold_stucked_module = cfg . getint ( " Module_ModuleInformation " , " threshold_stucked_module " )
2016-08-24 13:21:41 +00:00
# REDIS #
server = redis . StrictRedis (
host = cfg . get ( " Redis_Queues " , " host " ) ,
port = cfg . getint ( " Redis_Queues " , " port " ) ,
db = cfg . getint ( " Redis_Queues " , " db " ) )
2016-08-25 14:07:47 +00:00
if args . clear == 1 :
clearRedisModuleInfo ( )
2016-08-26 13:27:37 +00:00
lastTime = datetime . datetime . now ( )
2016-08-26 07:21:02 +00:00
module_file_array = set ( )
2016-11-24 15:58:32 +00:00
no_info_modules = { }
2016-10-14 12:26:33 +00:00
path_allmod = os . path . join ( os . environ [ ' AIL_HOME ' ] , ' doc/all_modules.txt ' )
with open ( path_allmod , ' r ' ) as module_file :
2016-08-26 07:21:02 +00:00
for line in module_file :
module_file_array . add ( line [ : - 1 ] )
2016-11-25 10:54:16 +00:00
cleanRedis ( )
2016-08-26 07:21:02 +00:00
while True :
all_queue = set ( )
printarray1 = [ ]
printarray2 = [ ]
printarray3 = [ ]
for queue , card in server . hgetall ( " queues " ) . iteritems ( ) :
all_queue . add ( queue )
key = " MODULE_ " + queue + " _ "
2016-08-26 13:27:37 +00:00
keySet = " MODULE_TYPE_ " + queue
2016-11-25 13:44:27 +00:00
array_module_type = [ ]
2016-08-26 07:21:02 +00:00
2016-08-26 13:27:37 +00:00
for moduleNum in server . smembers ( keySet ) :
2016-08-26 07:21:02 +00:00
value = server . get ( key + str ( moduleNum ) )
if value is not None :
timestamp , path = value . split ( " , " )
if timestamp is not None and path is not None :
startTime_readable = datetime . datetime . fromtimestamp ( int ( timestamp ) )
processed_time_readable = str ( ( datetime . datetime . now ( ) - startTime_readable ) ) . split ( ' . ' ) [ 0 ]
if int ( card ) > 0 :
if int ( ( datetime . datetime . now ( ) - startTime_readable ) . total_seconds ( ) ) > threshold_stucked_module :
log = open ( log_filename , ' a ' )
log . write ( json . dumps ( [ queue , card , str ( startTime_readable ) , str ( processed_time_readable ) , path ] ) + " \n " )
2016-11-25 10:54:16 +00:00
try :
last_kill_try = time . time ( ) - lastTimeKillCommand [ moduleNum ]
except KeyError :
last_kill_try = kill_retry_threshold + 1
if args . autokill == 1 and last_kill_try > kill_retry_threshold :
kill_module ( queue , int ( moduleNum ) )
2016-08-26 07:21:02 +00:00
2016-11-25 13:44:27 +00:00
array_module_type . append ( [ get_color ( processed_time_readable , False ) + str ( queue ) , str ( moduleNum ) , str ( card ) , str ( startTime_readable ) , str ( processed_time_readable ) , str ( path ) + get_color ( None , False ) ] )
2016-08-26 07:21:02 +00:00
else :
2016-11-25 13:12:09 +00:00
printarray2 . append ( [ get_color ( processed_time_readable , True ) + str ( queue ) , str ( moduleNum ) , str ( card ) , str ( startTime_readable ) , str ( processed_time_readable ) , str ( path ) + get_color ( None , True ) ] )
2016-11-25 13:44:27 +00:00
array_module_type . sort ( lambda x , y : cmp ( x [ 4 ] , y [ 4 ] ) , reverse = True )
for e in array_module_type :
printarray1 . append ( e )
2016-08-26 07:21:02 +00:00
for curr_queue in module_file_array :
if curr_queue not in all_queue :
2016-11-24 15:58:32 +00:00
printarray3 . append ( [ curr_queue , " Not running " ] )
else :
if len ( list ( server . smembers ( ' MODULE_TYPE_ ' + curr_queue ) ) ) == 0 :
if curr_queue not in no_info_modules :
no_info_modules [ curr_queue ] = int ( time . time ( ) )
printarray3 . append ( [ curr_queue , " No data " ] )
else :
#If no info since long time, try to kill
2016-11-25 15:00:46 +00:00
if args . autokill == 1 and int ( time . time ( ) ) - no_info_modules [ curr_queue ] > threshold_stucked_module :
2016-11-25 10:54:16 +00:00
kill_module ( curr_queue , None )
2016-11-24 15:58:32 +00:00
no_info_modules [ curr_queue ] = int ( time . time ( ) )
printarray3 . append ( [ curr_queue , " Stuck or idle, restarting in " + str ( threshold_stucked_module - ( int ( time . time ( ) ) - no_info_modules [ curr_queue ] ) ) + " s " ] )
2016-08-26 07:21:02 +00:00
2016-11-25 13:44:27 +00:00
#printarray1.sort(lambda x,y: cmp(x[0], y[0]), reverse=False)
printarray1 . sort ( key = lambda x : x [ 0 ] [ 9 : ] , reverse = False )
#printarray2.sort(lambda x,y: cmp(x[0], y[0]), reverse=False)
printarray2 . sort ( key = lambda x : x [ 0 ] [ 9 : ] , reverse = False )
2016-08-26 13:27:37 +00:00
printarray1 . insert ( 0 , [ " Queue " , " PID " , " Amount " , " Paste start time " , " Processing time for current paste (H:M:S) " , " Paste hash " ] )
printarray2 . insert ( 0 , [ " Queue " , " PID " , " Amount " , " Paste start time " , " Time since idle (H:M:S) " , " Last paste hash " ] )
2016-08-26 07:21:02 +00:00
printarray3 . insert ( 0 , [ " Queue " , " State " ] )
os . system ( ' clear ' )
t1 = AsciiTable ( printarray1 , title = " Working queues " )
t1 . column_max_width ( 1 )
if not t1 . ok :
longest_col = t1 . column_widths . index ( max ( t1 . column_widths ) )
max_length_col = t1 . column_max_width ( longest_col )
if max_length_col > 0 :
for i , content in enumerate ( t1 . table_data ) :
if len ( content [ longest_col ] ) > max_length_col :
temp = ' '
for l in content [ longest_col ] . splitlines ( ) :
if len ( l ) > max_length_col :
temp + = ' \n ' . join ( textwrap . wrap ( l , max_length_col ) ) + ' \n '
else :
temp + = l + ' \n '
content [ longest_col ] = temp . strip ( )
t1 . table_data [ i ] = content
t2 = AsciiTable ( printarray2 , title = " Idling queues " )
t2 . column_max_width ( 1 )
if not t2 . ok :
longest_col = t2 . column_widths . index ( max ( t2 . column_widths ) )
max_length_col = t2 . column_max_width ( longest_col )
if max_length_col > 0 :
for i , content in enumerate ( t2 . table_data ) :
if len ( content [ longest_col ] ) > max_length_col :
temp = ' '
for l in content [ longest_col ] . splitlines ( ) :
if len ( l ) > max_length_col :
temp + = ' \n ' . join ( textwrap . wrap ( l , max_length_col ) ) + ' \n '
else :
temp + = l + ' \n '
content [ longest_col ] = temp . strip ( )
t2 . table_data [ i ] = content
t3 = AsciiTable ( printarray3 , title = " Not running queues " )
t3 . column_max_width ( 1 )
2016-11-25 10:54:16 +00:00
printarray4 = [ ]
for elem in printarrayGlob :
if elem is not None :
printarray4 . append ( elem )
t4 = AsciiTable ( printarray4 , title = " Last actions " )
t4 . column_max_width ( 1 )
2016-08-26 07:21:02 +00:00
print t1 . table
print ' \n '
print t2 . table
print ' \n '
print t3 . table
2016-11-25 10:54:16 +00:00
print ' \n '
print t4 . table
2016-08-26 07:21:02 +00:00
2016-08-26 13:27:37 +00:00
if ( datetime . datetime . now ( ) - lastTime ) . total_seconds ( ) > args . refresh * 5 :
lastTime = datetime . datetime . now ( )
cleanRedis ( )
2016-08-26 07:21:02 +00:00
time . sleep ( args . refresh )