feat: 通过匿名管道实现父进程死亡检测,替代轮询方案
- worker.go: 为每个子进程创建 death pipe,Go 持有写端 - __init__.py: run() 中监听管道读端 EOF,Go 退出时立即终止子进程 - 解决 Ctrl+C / panic / SIGKILL 等场景下 Python worker 变成孤儿进程的问题 - Python 包版本升至 0.1.2
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|
||||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "python"))
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "python"))
|
||||||
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
@@ -13,14 +14,17 @@ from gobridge import expose, call_go, run, worker_id, worker_count
|
|||||||
# 其余 worker 跳过,避免端口冲突 / 重复连接。
|
# 其余 worker 跳过,避免端口冲突 / 重复连接。
|
||||||
print(f"[worker {worker_id}/{worker_count}] started", flush=True)
|
print(f"[worker {worker_id}/{worker_count}] started", flush=True)
|
||||||
if worker_id == 0:
|
if worker_id == 0:
|
||||||
|
|
||||||
def _init_shared_resource():
|
def _init_shared_resource():
|
||||||
# 示例:此处可启动 WebSocket 客户端、监听 TCP 端口等
|
# 示例:此处可启动 WebSocket 客户端、监听 TCP 端口等
|
||||||
print(f"[worker {worker_id}] shared resource initialized", flush=True)
|
print(f"[worker {worker_id}] shared resource initialized", flush=True)
|
||||||
|
|
||||||
threading.Thread(target=_init_shared_resource, daemon=True).start()
|
threading.Thread(target=_init_shared_resource, daemon=True).start()
|
||||||
|
|
||||||
|
|
||||||
# ── 基础类型 ─────────────────────────────────────────────────────────────────
|
# ── 基础类型 ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
@expose
|
@expose
|
||||||
def add(a: int, b: int) -> int:
|
def add(a: int, b: int) -> int:
|
||||||
return a + b
|
return a + b
|
||||||
@@ -48,6 +52,7 @@ def double_stream(numbers: Iterator[int]) -> Iterator[int]:
|
|||||||
|
|
||||||
# ── struct(dataclass / dict)类型 ───────────────────────────────────────────
|
# ── struct(dataclass / dict)类型 ───────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class User:
|
class User:
|
||||||
id: int
|
id: int
|
||||||
@@ -95,6 +100,7 @@ def process_users(users: Iterator[dict]) -> Iterator[dict]:
|
|||||||
|
|
||||||
# ── Server 全双工示例 ────────────────────────────────────────────────────────
|
# ── Server 全双工示例 ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
@expose
|
@expose
|
||||||
def compute_with_go_mul(a: int, b: int) -> int:
|
def compute_with_go_mul(a: int, b: int) -> int:
|
||||||
"""示例1:call_go[int] 指定返回类型"""
|
"""示例1:call_go[int] 指定返回类型"""
|
||||||
@@ -137,4 +143,7 @@ def get_user_via_go(uid: int) -> dict:
|
|||||||
return dataclasses.asdict(user)
|
return dataclasses.asdict(user)
|
||||||
|
|
||||||
|
|
||||||
run()
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
|
print("worker_id", worker_id)
|
||||||
|
print("worker_count", worker_count)
|
||||||
|
|||||||
@@ -385,6 +385,17 @@ def run():
|
|||||||
server.bind(sock_path)
|
server.bind(sock_path)
|
||||||
server.listen(64)
|
server.listen(64)
|
||||||
|
|
||||||
|
# Death pipe:Go 持有写端,本进程阻塞读端。
|
||||||
|
# Go 进程消亡(任何原因:Ctrl+C、panic、SIGKILL)时写端自动关闭,read() 立即返回 EOF。
|
||||||
|
death_fd = int(os.environ.get("GOBRIDGE_DEATH_FD", "0"))
|
||||||
|
if death_fd:
|
||||||
|
def _watch_death_pipe():
|
||||||
|
with os.fdopen(death_fd, "rb") as f:
|
||||||
|
f.read(1) # 阻塞直到 Go 关闭写端(EOF)
|
||||||
|
server.close()
|
||||||
|
os._exit(0)
|
||||||
|
threading.Thread(target=_watch_death_pipe, daemon=True).start()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "gobridge"
|
name = "gobridge"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
description = "Python 端库,配合 Go 侧 gobridge 使用"
|
description = "Python 端库,配合 Go 侧 gobridge 使用"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
|
|
||||||
|
|||||||
30
worker.go
30
worker.go
@@ -20,8 +20,9 @@ type worker struct {
|
|||||||
stopCh chan struct{}
|
stopCh chan struct{}
|
||||||
stopOnce sync.Once
|
stopOnce sync.Once
|
||||||
|
|
||||||
mu sync.Mutex // 保护 cmd
|
mu sync.Mutex // 保护 cmd
|
||||||
cmd *exec.Cmd
|
cmd *exec.Cmd
|
||||||
|
deathPipe *os.File // 管道写端,关闭时通知 Python 子进程退出
|
||||||
}
|
}
|
||||||
|
|
||||||
func newWorker(cfg *poolConfig, id int) (*worker, error) {
|
func newWorker(cfg *poolConfig, id int) (*worker, error) {
|
||||||
@@ -44,6 +45,13 @@ func (w *worker) start() error {
|
|||||||
os.Remove(sockPath)
|
os.Remove(sockPath)
|
||||||
w.sockPath = sockPath
|
w.sockPath = sockPath
|
||||||
|
|
||||||
|
// 创建 death pipe:Go 持有写端,Python 阻塞读端。
|
||||||
|
// Go 进程消亡(任何原因)时写端自动关闭,Python 立即收到 EOF 并退出。
|
||||||
|
pr, pw, err := os.Pipe()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("create death pipe: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
cmd := exec.Command(w.cfg.pythonExe, w.cfg.scriptArgs...)
|
cmd := exec.Command(w.cfg.pythonExe, w.cfg.scriptArgs...)
|
||||||
cmd.Dir = w.cfg.workDir
|
cmd.Dir = w.cfg.workDir
|
||||||
cmd.Env = append(os.Environ(), w.cfg.env...)
|
cmd.Env = append(os.Environ(), w.cfg.env...)
|
||||||
@@ -51,7 +59,9 @@ func (w *worker) start() error {
|
|||||||
"GOBRIDGE_SOCKET_PATH="+sockPath,
|
"GOBRIDGE_SOCKET_PATH="+sockPath,
|
||||||
fmt.Sprintf("GOBRIDGE_WORKER_ID=%d", w.id),
|
fmt.Sprintf("GOBRIDGE_WORKER_ID=%d", w.id),
|
||||||
fmt.Sprintf("GOBRIDGE_WORKER_COUNT=%d", w.cfg.workers),
|
fmt.Sprintf("GOBRIDGE_WORKER_COUNT=%d", w.cfg.workers),
|
||||||
|
"GOBRIDGE_DEATH_FD=3", // ExtraFiles[0] → fd 3
|
||||||
)
|
)
|
||||||
|
cmd.ExtraFiles = []*os.File{pr} // fd 3 in child
|
||||||
if w.cfg.stdout != nil {
|
if w.cfg.stdout != nil {
|
||||||
cmd.Stdout = w.cfg.stdout
|
cmd.Stdout = w.cfg.stdout
|
||||||
} else {
|
} else {
|
||||||
@@ -64,11 +74,18 @@ func (w *worker) start() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
|
pr.Close()
|
||||||
|
pw.Close()
|
||||||
return fmt.Errorf("start python worker: %w", err)
|
return fmt.Errorf("start python worker: %w", err)
|
||||||
}
|
}
|
||||||
|
pr.Close() // 父进程不需要读端,关闭后子进程独占
|
||||||
|
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
w.cmd = cmd
|
w.cmd = cmd
|
||||||
|
if w.deathPipe != nil {
|
||||||
|
w.deathPipe.Close() // 关闭上一个周期的写端(重启场景)
|
||||||
|
}
|
||||||
|
w.deathPipe = pw
|
||||||
w.mu.Unlock()
|
w.mu.Unlock()
|
||||||
|
|
||||||
// 等待 socket 文件出现(最多 10 秒)
|
// 等待 socket 文件出现(最多 10 秒)
|
||||||
@@ -174,9 +191,14 @@ func (w *worker) stop() {
|
|||||||
cmd := w.cmd
|
cmd := w.cmd
|
||||||
w.mu.Unlock()
|
w.mu.Unlock()
|
||||||
|
|
||||||
|
w.mu.Lock()
|
||||||
|
pipe := w.deathPipe
|
||||||
|
w.deathPipe = nil
|
||||||
|
w.mu.Unlock()
|
||||||
|
if pipe != nil {
|
||||||
|
pipe.Close() // 关闭写端,Python 收到 EOF 自动退出
|
||||||
|
}
|
||||||
if cmd != nil && cmd.Process != nil {
|
if cmd != nil && cmd.Process != nil {
|
||||||
cmd.Process.Signal(os.Interrupt) // 先发 SIGINT,Python 已忽略,等效于 Kill
|
|
||||||
time.Sleep(50 * time.Millisecond)
|
|
||||||
cmd.Process.Kill()
|
cmd.Process.Kill()
|
||||||
}
|
}
|
||||||
for len(w.conns) > 0 {
|
for len(w.conns) > 0 {
|
||||||
|
|||||||
Reference in New Issue
Block a user