一、簡介
守護進程最重要的特性是后台運行;它必須與其運行前的環境隔離開來,這些環境包括未關閉的文件描述符、控制終端、會話和進程組、工作目錄以及文件創建掩碼等;它可以在系統啟動時從啟動腳本/etc/rc.d中啟動,可以由inetd守護進程啟動,也可以有作業規划進程crond啟動,還可以由用戶終端(通常是shell)執行。
Python有時需要保證只運行一個腳本實例,以避免數據的沖突。
二、Python守護進程
1、函數實現
- #!/usr/bin/env python
- #coding: utf-8
- import sys, os
- '''將當前進程fork為一個守護進程
- 注意:如果你的守護進程是由inetd啟動的,不要這樣做!inetd完成了
- 所有需要做的事情,包括重定向標准文件描述符,需要做的事情只有chdir()和umask()了
- '''
- def daemonize (stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
- #重定向標准文件描述符(默認情況下定向到/dev/null)
- try:
- pid = os.fork()
- #父進程(會話組頭領進程)退出,這意味着一個非會話組頭領進程永遠不能重新獲得控制終端。
- if pid > 0:
- sys.exit(0) #父進程退出
- except OSError, e:
- sys.stderr.write ("fork #1 failed: (%d) %s\n" % (e.errno, e.strerror) )
- sys.exit(1)
- #從母體環境脫離
- os.chdir("/") #chdir確認進程不保持任何目錄於使用狀態,否則不能umount一個文件系統。也可以改變到對於守護程序運行重要的文件所在目錄
- os.umask(0) #調用umask(0)以便擁有對於寫的任何東西的完全控制,因為有時不知道繼承了什么樣的umask。
- os.setsid() #setsid調用成功后,進程成為新的會話組長和新的進程組長,並與原來的登錄會話和進程組脫離。
- #執行第二次fork
- try:
- pid = os.fork()
- if pid > 0:
- sys.exit(0) #第二個父進程退出
- except OSError, e:
- sys.stderr.write ("fork #2 failed: (%d) %s\n" % (e.errno, e.strerror) )
- sys.exit(1)
- #進程已經是守護進程了,重定向標准文件描述符
- for f in sys.stdout, sys.stderr: f.flush()
- si = open(stdin, 'r')
- so = open(stdout, 'a+')
- se = open(stderr, 'a+', 0)
- os.dup2(si.fileno(), sys.stdin.fileno()) #dup2函數原子化關閉和復制文件描述符
- os.dup2(so.fileno(), sys.stdout.fileno())
- os.dup2(se.fileno(), sys.stderr.fileno())
- #示例函數:每秒打印一個數字和時間戳
- def main():
- import time
- sys.stdout.write('Daemon started with pid %d\n' % os.getpid())
- sys.stdout.write('Daemon stdout output\n')
- sys.stderr.write('Daemon stderr output\n')
- c = 0
- while True:
- sys.stdout.write('%d: %s\n' %(c, time.ctime()))
- sys.stdout.flush()
- c = c+1
- time.sleep(1)
- if __name__ == "__main__":
- daemonize('/dev/null','/tmp/daemon_stdout.log','/tmp/daemon_error.log')
- main()
可以通過命令ps -ef | grep daemon.py查看后台運行的繼承,在/tmp/daemon_error.log會記錄錯誤運行日志,在/tmp/daemon_stdout.log會記錄標准輸出日志。
2、類實現
- #!/usr/bin/env python
- #coding: utf-8
- #python模擬linux的守護進程
- import sys, os, time, atexit, string
- from signal import SIGTERM
- class Daemon:
- def __init__(self, pidfile, stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
- #需要獲取調試信息,改為stdin='/dev/stdin', stdout='/dev/stdout', stderr='/dev/stderr',以root身份運行。
- self.stdin = stdin
- self.stdout = stdout
- self.stderr = stderr
- self.pidfile = pidfile
- def _daemonize(self):
- try:
- pid = os.fork() #第一次fork,生成子進程,脫離父進程
- if pid > 0:
- sys.exit(0) #退出主進程
- except OSError, e:
- sys.stderr.write('fork #1 failed: %d (%s)\n' % (e.errno, e.strerror))
- sys.exit(1)
- os.chdir("/") #修改工作目錄
- os.setsid() #設置新的會話連接
- os.umask(0) #重新設置文件創建權限
- try:
- pid = os.fork() #第二次fork,禁止進程打開終端
- if pid > 0:
- sys.exit(0)
- except OSError, e:
- sys.stderr.write('fork #2 failed: %d (%s)\n' % (e.errno, e.strerror))
- sys.exit(1)
- #重定向文件描述符
- sys.stdout.flush()
- sys.stderr.flush()
- si = file(self.stdin, 'r')
- so = file(self.stdout, 'a+')
- se = file(self.stderr, 'a+', 0)
- os.dup2(si.fileno(), sys.stdin.fileno())
- os.dup2(so.fileno(), sys.stdout.fileno())
- os.dup2(se.fileno(), sys.stderr.fileno())
- #注冊退出函數,根據文件pid判斷是否存在進程
- atexit.register(self.delpid)
- pid = str(os.getpid())
- file(self.pidfile,'w+').write('%s\n' % pid)
- def delpid(self):
- os.remove(self.pidfile)
- def start(self):
- #檢查pid文件是否存在以探測是否存在進程
- try:
- pf = file(self.pidfile,'r')
- pid = int(pf.read().strip())
- pf.close()
- except IOError:
- pid = None
- if pid:
- message = 'pidfile %s already exist. Daemon already running!\n'
- sys.stderr.write(message % self.pidfile)
- sys.exit(1)
- #啟動監控
- self._daemonize()
- self._run()
- def stop(self):
- #從pid文件中獲取pid
- try:
- pf = file(self.pidfile,'r')
- pid = int(pf.read().strip())
- pf.close()
- except IOError:
- pid = None
- if not pid: #重啟不報錯
- message = 'pidfile %s does not exist. Daemon not running!\n'
- sys.stderr.write(message % self.pidfile)
- return
- #殺進程
- try:
- while 1:
- os.kill(pid, SIGTERM)
- time.sleep(0.1)
- #os.system('hadoop-daemon.sh stop datanode')
- #os.system('hadoop-daemon.sh stop tasktracker')
- #os.remove(self.pidfile)
- except OSError, err:
- err = str(err)
- if err.find('No such process') > 0:
- if os.path.exists(self.pidfile):
- os.remove(self.pidfile)
- else:
- print str(err)
- sys.exit(1)
- def restart(self):
- self.stop()
- self.start()
- def _run(self):
- """ run your fun"""
- while True:
- #fp=open('/tmp/result','a+')
- #fp.write('Hello World\n')
- sys.stdout.write('%s:hello world\n' % (time.ctime(),))
- sys.stdout.flush()
- time.sleep(2)
- if __name__ == '__main__':
- daemon = Daemon('/tmp/watch_process.pid', stdout = '/tmp/watch_stdout.log')
- if len(sys.argv) == 2:
- if 'start' == sys.argv[1]:
- daemon.start()
- elif 'stop' == sys.argv[1]:
- daemon.stop()
- elif 'restart' == sys.argv[1]:
- daemon.restart()
- else:
- print 'unknown command'
- sys.exit(2)
- sys.exit(0)
- else:
- print 'usage: %s start|stop|restart' % sys.argv[0]
- sys.exit(2)
運行結果:
可以參考:http://www.jejik.com/articles/2007/02/a_simple_unix_linux_daemon_in_python/,它是當Daemon設計成一個模板,在其他文件中from daemon import Daemon,然后定義子類,重寫run()方法實現自己的功能。
- class MyDaemon(Daemon):
- def run(self):
- while True:
- fp=open('/tmp/run.log','a+')
- fp.write('Hello World\n')
- time.sleep(1)
不足:信號處理signal.signal(signal.SIGTERM, cleanup_handler)暫時沒有安裝,注冊程序退出時的回調函數delpid()沒有被調用。
然后,再寫個shell命令,加入開機啟動服務,每隔2秒檢測守護進程是否啟動,若沒有啟動則啟動,自動監控恢復程序。
- #/bin/sh
- while true
- do
- count=`ps -ef | grep "daemonclass.py" | grep -v "grep"`
- if [ "$?" != "0" ]; then
- daemonclass.py start
- fi
- sleep 2
- done
三、python保證只能運行一個腳本實例
1、打開文件本身加鎖
- #!/usr/bin/env python
- #coding: utf-8
- import fcntl, sys, time, os
- pidfile = 0
- def ApplicationInstance():
- global pidfile
- pidfile = open(os.path.realpath(__file__), "r")
- try:
- fcntl.flock(pidfile, fcntl.LOCK_EX | fcntl.LOCK_NB) #創建一個排他鎖,並且所被鎖住其他進程不會阻塞
- except:
- print "another instance is running..."
- sys.exit(1)
- if __name__ == "__main__":
- ApplicationInstance()
- while True:
- print 'running...'
- time.sleep(1)
注意:open()參數不能使用w,否則會覆蓋本身文件;pidfile必須聲明為全局變量,否則局部變量生命周期結束,文件描述符會因引用計數為0被系統回收(若整個函數寫在主函數中,則不需要定義成global)。
2、打開自定義文件並加鎖
- #!/usr/bin/env python
- #coding: utf-8
- import fcntl, sys, time
- pidfile = 0
- def ApplicationInstance():
- global pidfile
- pidfile = open("instance.pid", "w")
- try:
- fcntl.lockf(pidfile, fcntl.LOCK_EX | fcntl.LOCK_NB) #創建一個排他鎖,並且所被鎖住其他進程不會阻塞
- except IOError:
- print "another instance is running..."
- sys.exit(0)
- if __name__ == "__main__":
- ApplicationInstance()
- while True:
- print 'running...'
- time.sleep(1)
3、檢測文件中PID
- #!/usr/bin/env python
- #coding: utf-8
- import time, os, sys
- import signal
- pidfile = '/tmp/process.pid'
- def sig_handler(sig, frame):
- if os.path.exists(pidfile):
- os.remove(pidfile)
- sys.exit(0)
- def ApplicationInstance():
- signal.signal(signal.SIGTERM, sig_handler)
- signal.signal(signal.SIGINT, sig_handler)
- signal.signal(signal.SIGQUIT, sig_handler)
- try:
- pf = file(pidfile, 'r')
- pid = int(pf.read().strip())
- pf.close()
- except IOError:
- pid = None
- if pid:
- sys.stdout.write('instance is running...\n')
- sys.exit(0)
- file(pidfile, 'w+').write('%s\n' % os.getpid())
- if __name__ == "__main__":
- ApplicationInstance()
- while True:
- print 'running...'
- time.sleep(1)
4、檢測特定文件夾或文件
- #!/usr/bin/env python
- #coding: utf-8
- import time, commands, signal, sys
- def sig_handler(sig, frame):
- if os.path.exists("/tmp/test"):
- os.rmdir("/tmp/test")
- sys.exit(0)
- def ApplicationInstance():
- signal.signal(signal.SIGTERM, sig_handler)
- signal.signal(signal.SIGINT, sig_handler)
- signal.signal(signal.SIGQUIT, sig_handler)
- if commands.getstatusoutput("mkdir /tmp/test")[0]:
- print "instance is running..."
- sys.exit(0)
- if __name__ == "__main__":
- ApplicationInstance()
- while True:
- print 'running...'
- time.sleep(1)
也可以檢測某一個特定的文件,判斷文件是否存在:
- import os
- import os.path
- import time
- #class used to handle one application instance mechanism
- class ApplicationInstance:
- #specify the file used to save the application instance pid
- def __init__( self, pid_file ):
- self.pid_file = pid_file
- self.check()
- self.startApplication()
- #check if the current application is already running
- def check( self ):
- #check if the pidfile exists
- if not os.path.isfile( self.pid_file ):
- return
- #read the pid from the file
- pid = 0
- try:
- file = open( self.pid_file, 'rt' )
- data = file.read()
- file.close()
- pid = int( data )
- except:
- pass
- #check if the process with specified by pid exists
- if 0 == pid:
- return
- try:
- os.kill( pid, 0 ) #this will raise an exception if the pid is not valid
- except:
- return
- #exit the application
- print "The application is already running..."
- exit(0) #exit raise an exception so don't put it in a try/except block
- #called when the single instance starts to save it's pid
- def startApplication( self ):
- file = open( self.pid_file, 'wt' )
- file.write( str( os.getpid() ) )
- file.close()
- #called when the single instance exit ( remove pid file )
- def exitApplication( self ):
- try:
- os.remove( self.pid_file )
- except:
- pass
- if __name__ == '__main__':
- #create application instance
- appInstance = ApplicationInstance( '/tmp/myapp.pid' )
- #do something here
- print "Start MyApp"
- time.sleep(5) #sleep 5 seconds
- print "End MyApp"
- #remove pid file
- appInstance.exitApplication()
上述os.kill( pid, 0 )用於檢測一個為pid的進程是否還活着,若該pid的進程已經停止則拋出異常,若正在運行則不發送kill信號。
5、socket監聽一個特定端口
- #!/usr/bin/env python
- #coding: utf-8
- import socket, time, sys
- def ApplicationInstance():
- try:
- global s
- s = socket.socket()
- host = socket.gethostname()
- s.bind((host, 60123))
- except:
- print "instance is running..."
- sys.exit(0)
- if __name__ == "__main__":
- ApplicationInstance()
- while True:
- print 'running...'
- time.sleep(1)
可以將該函數使用裝飾器實現,便於重用(效果與上述相同):
- #!/usr/bin/env python
- #coding: utf-8
- import socket, time, sys
- import functools
- #使用裝飾器實現
- def ApplicationInstance(func):
- @functools.wraps(func)
- def fun(*args,**kwargs):
- import socket
- try:
- global s
- s = socket.socket()
- host = socket.gethostname()
- s.bind((host, 60123))
- except:
- print('already has an instance...')
- return None
- return func(*args,**kwargs)
- return fun
- @ApplicationInstance
- def main():
- while True:
- print 'running...'
- time.sleep(1)
- if __name__ == "__main__":
- main()
四、總結
(1)守護進程和單腳本運行在實際應用中比較重要,方法也比較多,可選擇合適的來進行修改,可以將它們做成一個單獨的類或模板,然后子類化實現自定義。
(2)daemon監控進程自動恢復避免了nohup和&的使用,並配合shell腳本可以省去很多不定時啟動掛掉服務器的麻煩。
(3)若有更好的設計和想法,可隨時留言,在此先感謝!