One python multiprocess errors
I have one multprocess demo here, and I met some problems with it. Researched for a night, I cannot resolve the reason. Any one can help me?
I want to have one parent process acts as producer, when there are tasks come, the parent can fork some children to consume these tasks. The parent monitors the child, if any one exits with exception, it can be restarted by parent.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from multiprocessing import Process, Queue from Queue import Empty import sys, signal, os, random, time import traceback
child_process = []
child_process_num = 4
queue = Queue(0)
def work(queue):
signal.signal(signal.SIGINT, signal.SIG_DFL)
signal.signal(signal.SIGTERM, signal.SIG_DFL)
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
time.sleep(10) #demo sleep
def kill_child_processes(signum, frame):
#terminate all children
pass
def restart_child_process(signum, frame):
global child_process
for i in xrange(len(child_process)):
child = child_process[i]
try:
if child.is_alive():
continue
except OSError, e:
pass
child.join() #join this process to make sure there is no zombie process
new_child = Process(target=work, args=(queue,))
new_child.start()
child_process[i] = new_child #restart one new process
child = None
return
if __name__ == '__main__':
reload(sys)
sys.setdefaultencoding("utf-8")
for i in xrange(child_process_num):
child = Process(target=work, args=(queue,))
child.start()
child_process.append(child)
signal.signal(signal.SIGINT, kill开发者_JAVA百科_child_processes)
signal.signal(signal.SIGTERM, kill_child_processes) #hook the SIGTERM
signal.signal(signal.SIGCHLD, restart_child_process)
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
When this program runs, there will be errors as below:
Error in atexit._run_exitfuncs: Error in sys.exitfunc: Traceback (most recent call last): File "/usr/local/python/lib/python2.6/atexit.py", line 30, in _run_exitfuncs traceback.print_exc() File "/usr/local/python/lib/python2.6/traceback.py", line 227, in print_exc print_exception(etype, value, tb, limit, file) File "/usr/local/python/lib/python2.6/traceback.py", line 124, in print_exception _print(file, 'Traceback (most recent call last):') File "/usr/local/python/lib/python2.6/traceback.py", line 12, in _print def _print(file, str='', terminator='\n'): File "test.py", line 42, in restart_child_process new_child.start() File "/usr/local/python/lib/python2.6/multiprocessing/process.py", line 99, in start _cleanup() File "/usr/local/python/lib/python2.6/multiprocessing/process.py", line 53, in _cleanup if p._popen.poll() is not None: File "/usr/local/python/lib/python2.6/multiprocessing/forking.py", line 106, in poll pid, sts = os.waitpid(self.pid, flag) OSError: [Errno 10] No child processes
If I send signal to one child:kill –SIGINT {child_pid} I will get:
[root@mail1 mail]# kill -SIGINT 32545 [root@mail1 mail]# Error in atexit._run_exitfuncs: Traceback (most recent call last): File "/usr/local/python/lib/python2.6/atexit.py", line 24, in _run_exitfuncs func(*targs, **kargs) File "/usr/local/python/lib/python2.6/multiprocessing/util.py", line 269, in _exit_function p.join() File "/usr/local/python/lib/python2.6/multiprocessing/process.py", line 119, in join res = self._popen.wait(timeout) File "/usr/local/python/lib/python2.6/multiprocessing/forking.py", line 117, in wait return self.poll(0) File "/usr/local/python/lib/python2.6/multiprocessing/forking.py", line 106, in poll pid, sts = os.waitpid(self.pid, flag) OSError: [Errno 4] Interrupted system call Error in sys.exitfunc: Traceback (most recent call last): File "/usr/local/python/lib/python2.6/atexit.py", line 24, in _run_exitfuncs func(*targs, **kargs) File "/usr/local/python/lib/python2.6/multiprocessing/util.py", line 269, in _exit_function p.join() File "/usr/local/python/lib/python2.6/multiprocessing/process.py", line 119, in join res = self._popen.wait(timeout) File "/usr/local/python/lib/python2.6/multiprocessing/forking.py", line 117, in wait return self.poll(0) File "/usr/local/python/lib/python2.6/multiprocessing/forking.py", line 106, in poll pid, sts = os.waitpid(self.pid, flag) OSError: [Errno 4] Interrupted system call
Main proc is waiting for all child procs to be terminated before exits itself so there's a blocking call (i.e. wait4) registered as at_exit handles. The signal you sent interrupts that blocking call thus the stack trace.
The thing I'm not clear about is that if the signal sent to child would be redirected to the parent process, which then interrupted that wait4 call. This is something related to the Unix process group behaviors.
精彩评论