寫一個python的服務監控程序
前言:
Redhat下安裝Python2.7
rhel6.4自帶的是2.6, 發現有的機器是python2.4。 到python站點下載源碼,解壓到Redhat上。然后執行以下的命令:
# ./configure --prefix=/usr/local/python27
# make
# make install
這樣安裝之后默認不會啟用Python2.7,須要使用/usr/local/python27/bin/python2.7調用新版本號的python。
而以下的安裝方式會直接接管現有的python
# ./configure
# make
# make install
開始:服務子進程被監控主進程創建並監控,當子進程異常關閉。主進程能夠再次啟動之。使用了python的subprocess模塊。就這個簡單的代碼,竟然互聯網上沒有現成可用的樣例。
沒辦法,我寫好了貢獻出來吧。
首先是主進程代碼:service_mgr.py
#!/usr/bin/python #-*- coding: UTF-8 -*- # cheungmine # stdin、stdout和stderr分別表示子程序的標准輸入、標准輸出和標准錯誤。# # 可選的值有: # subprocess.PIPE - 表示須要創建一個新的管道. # 一個有效的文件描寫敘述符(事實上是個正整數) # 一個文件對象 # None - 不會做不論什么重定向工作。子進程的文件描寫敘述符會繼承父進程的. # # stderr的值還能夠是STDOUT, 表示子進程的標准錯誤也輸出到標准輸出. # # subprocess.PIPE # 一個能夠被用於Popen的stdin、stdout和stderr 3個參數的特輸值。表示須要創建一個新的管道. # # subprocess.STDOUT # 一個能夠被用於Popen的stderr參數的特輸值,表示子程序的標准錯誤匯合到標准輸出. ################################################################################ import os import sys import getopt import time import datetime import codecs import optparse import ConfigParser import signal import subprocess import select # logging # require python2.6.6 and later import logging from logging.handlers import RotatingFileHandler ## log settings: SHOULD BE CONFIGURED BY config LOG_PATH_FILE = "./my_service_mgr.log" LOG_MODE = 'a' LOG_MAX_SIZE = 4*1024*1024 # 4M per file LOG_MAX_FILES = 4 # 4 Files: my_service_mgr.log.1, printmy_service_mgrlog.2, ... LOG_LEVEL = logging.DEBUG LOG_FORMAT = "%(asctime)s %(levelname)-10s[%(filename)s:%(lineno)d(%(funcName)s)] %(message)s" handler = RotatingFileHandler(LOG_PATH_FILE, LOG_MODE, LOG_MAX_SIZE, LOG_MAX_FILES) formatter = logging.Formatter(LOG_FORMAT) handler.setFormatter(formatter) Logger = logging.getLogger() Logger.setLevel(LOG_LEVEL) Logger.addHandler(handler) # color output # pid = os.getpid() def print_error(s): print '\033[31m[%d: ERROR] %s\033[31;m' % (pid, s) def print_info(s): print '\033[32m[%d: INFO] %s\033[32;m' % (pid, s) def print_warning(s): print '\033[33m[%d: WARNING] %s\033[33;m' % (pid, s) def start_child_proc(command, merged): try: if command is None: raise OSError, "Invalid command" child = None if merged is True: # merge stdout and stderr child = subprocess.Popen(command, stderr=subprocess.STDOUT, # 表示子進程的標准錯誤也輸出到標准輸出 stdout=subprocess.PIPE # 表示須要創建一個新的管道 ) else: # DO NOT merge stdout and stderr child = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE) return child except subprocess.CalledProcessError: pass # handle errors in the called executable except OSError: pass # executable not found raise OSError, "Failed to run command!" def run_forever(command): print_info("start child process with command: " + ' '.join(command)) Logger.info("start child process with command: " + ' '.join(command)) merged = False child = start_child_proc(command, merged) line = '' errln = '' failover = 0 while True: while child.poll() != None: failover = failover + 1 print_warning("child process shutdown with return code: " + str(child.returncode)) Logger.critical("child process shutdown with return code: " + str(child.returncode)) print_warning("restart child process again, times=%d" % failover) Logger.info("restart child process again, times=%d" % failover) child = start_child_proc(command, merged) # read child process stdout and log it ch = child.stdout.read(1) if ch != '' and ch != '\n': line += ch if ch == '\n': print_info(line) line = '' if merged is not True: # read child process stderr and log it ch = child.stderr.read(1) if ch != '' and ch != '\n': errln += ch if ch == '\n': Logger.info(errln) print_error(errln) errln = '' Logger.exception("!!!should never run to this!!!") if __name__ == "__main__": run_forever(["python", "./testpipe.py"])
然后是子進程代碼:testpipe.py
#!/usr/bin/python #-*- coding: UTF-8 -*- # cheungmine # 模擬一個woker進程,10秒掛掉 import os import sys import time import random cnt = 10 while cnt >= 0: time.sleep(0.5) sys.stdout.write("OUT: %s\n" % str(random.randint(1, 100000))) sys.stdout.flush() time.sleep(0.5) sys.stderr.write("ERR: %s\n" % str(random.randint(1, 100000))) sys.stderr.flush() #print str(cnt) #sys.stdout.flush() cnt = cnt - 1 sys.exit(-1)
Linux上執行非常easy:
$ python service_mgr.py
Windows上以后台進程執行:
> start pythonw service_mgr.py
代碼中須要改動:
run_forever(["python", "testpipe.py"])