fix(auto-deploy): prevent self-kill when pm2 restart runs inside webhook handler #44

Merged
pangtong-fujunshi merged 2 commits from fix/auto-deploy-self-kill into main 2026-06-12 10:54:26 +00:00
+42 -10
View File
@@ -557,19 +557,51 @@ async def _handle_pr_closed(payload: Dict[str, Any]) -> None:
if needs_restart:
post_deploy_cmds = target.get("post_deploy", [])
pm2_name = target.get("pm2_name", "")
for cmd in post_deploy_cmds:
logger.info("Auto-deploy: executing post_deploy: %s", cmd)
deploy_proc = await asyncio.create_subprocess_exec(
"sh", "-c", cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
_, deploy_err = await asyncio.wait_for(deploy_proc.communicate(), timeout=30)
if deploy_proc.returncode != 0:
logger.error("Auto-deploy: post_deploy failed: %s", deploy_err.decode())
_send_deploy_failure_mail(repo, pr_number, pr_title, f"post_deploy 失败 ({cmd}): {deploy_err.decode()}")
break
# M2: 检测当前进程是否会被此命令杀掉(而非脆弱的字符串匹配)
# 通过 PM2 环境变量判断:pm2 启动的进程有 PM2_HOME
self_restart = False
if pm2_name and os.environ.get("PM2_HOME") and "pm2 restart" in cmd:
# 检查命令是否包含当前进程名
import re
if re.search(rf'pm2\s+restart\s+{re.escape(pm2_name)}', cmd):
self_restart = True
if self_restart:
# M1: 用 asyncio.sleep 延迟而非 nohup,保留子进程输出和错误检测
# 先 sleep 让 handler 正常返回,再启动 restart 命令
# restart 的子进程会在父进程死后被 pm2 新进程接管
logger.info("Auto-deploy: self-restart detected, deferring 2s: %s", cmd)
await asyncio.sleep(2)
deploy_proc = await asyncio.create_subprocess_exec(
"sh", "-c", cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
# restart 会杀掉当前进程,communicate 可能不会完成
# 但我们至少尝试读取输出
try:
_, deploy_err = await asyncio.wait_for(
deploy_proc.communicate(), timeout=10)
except (asyncio.TimeoutError, ProcessLookupError):
# 预期行为:进程被 pm2 restart 杀掉
logger.info("Auto-deploy: process killed by self-restart (expected)")
break
else:
deploy_proc = await asyncio.create_subprocess_exec(
"sh", "-c", cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
_, deploy_err = await asyncio.wait_for(deploy_proc.communicate(), timeout=30)
if deploy_proc.returncode != 0:
logger.error("Auto-deploy: post_deploy failed: %s", deploy_err.decode())
_send_deploy_failure_mail(repo, pr_number, pr_title, f"post_deploy 失败 ({cmd}): {deploy_err.decode()}")
break
else:
logger.info("Auto-deploy: all post_deploy commands succeeded (files: %s)", ", ".join(file_list[:5]))
else: