From e6765340627dd885b5d406cc3302391a7a4867d7 Mon Sep 17 00:00:00 2001 From: Scott Date: Sat, 24 Jan 2026 18:21:49 -0700 Subject: [PATCH 1/4] fix(server): prevent Windows memory leak from orphaned Node.js processes On Windows, child processes spawned via cmd.exe or sh.exe don't terminate when the parent process is killed (unlike Unix where SIGTERM propagates). This caused dev servers and MCP processes to accumulate, consuming 30GB+ memory over time. Changes: - Add tree-kill integration for cross-platform process tree termination - Add cleanupOrphanedProcesses() to kill lingering processes when features complete - Add startup cleanup to catch orphans from previous runs - Fix PowerShell pattern matching (no backslash escaping needed for -like) - Fix PowerShell script syntax (semicolons required for single-line execution) - Apply tree-kill to subprocess.ts, dev-server-service, codex-app-server-service - Add completion tracking for graceful feature cleanup on stop - Add pid to mock process in tests for tree-kill condition Co-Authored-By: Claude Opus 4.5 --- apps/server/src/index.ts | 4 + .../routes/auto-mode/routes/stop-feature.ts | 9 +- apps/server/src/services/auto-mode-service.ts | 255 +++++++++++++++++- .../src/services/codex-app-server-service.ts | 18 +- .../server/src/services/dev-server-service.ts | 15 +- .../unit/services/dev-server-service.test.ts | 26 +- libs/platform/package.json | 3 +- libs/platform/src/index.ts | 3 + libs/platform/src/process-utils.ts | 119 ++++++++ libs/platform/src/subprocess.ts | 34 ++- 10 files changed, 466 insertions(+), 20 deletions(-) create mode 100644 libs/platform/src/process-utils.ts diff --git a/apps/server/src/index.ts b/apps/server/src/index.ts index 4bd496bc8..dbb8704fb 100644 --- a/apps/server/src/index.ts +++ b/apps/server/src/index.ts @@ -259,6 +259,10 @@ const settingsService = new SettingsService(DATA_DIR); const agentService = new AgentService(DATA_DIR, events, settingsService); const featureLoader = new FeatureLoader(); const autoModeService = new AutoModeService(events, settingsService); +// Initialize auto mode service (performs startup cleanup of orphaned processes on Windows) +autoModeService.initialize().catch((err) => { + console.error('[AutoModeService] Initialization error:', err); +}); const claudeUsageService = new ClaudeUsageService(); const codexAppServerService = new CodexAppServerService(); const codexModelCacheService = new CodexModelCacheService(DATA_DIR, codexAppServerService); diff --git a/apps/server/src/routes/auto-mode/routes/stop-feature.ts b/apps/server/src/routes/auto-mode/routes/stop-feature.ts index bec9a4aa0..462ad9c92 100644 --- a/apps/server/src/routes/auto-mode/routes/stop-feature.ts +++ b/apps/server/src/routes/auto-mode/routes/stop-feature.ts @@ -9,14 +9,19 @@ import { getErrorMessage, logError } from '../common.js'; export function createStopFeatureHandler(autoModeService: AutoModeService) { return async (req: Request, res: Response): Promise => { try { - const { featureId } = req.body as { featureId: string }; + const { featureId, waitForCleanup } = req.body as { + featureId: string; + waitForCleanup?: boolean; + }; if (!featureId) { res.status(400).json({ success: false, error: 'featureId is required' }); return; } - const stopped = await autoModeService.stopFeature(featureId); + // Default to waiting for cleanup unless explicitly set to false + const shouldWait = waitForCleanup !== false; + const stopped = await autoModeService.stopFeature(featureId, shouldWait); res.json({ success: true, stopped }); } catch (error) { logError(error, 'Stop feature failed'); diff --git a/apps/server/src/services/auto-mode-service.ts b/apps/server/src/services/auto-mode-service.ts index 1fae8907e..3f5798133 100644 --- a/apps/server/src/services/auto-mode-service.ts +++ b/apps/server/src/services/auto-mode-service.ts @@ -51,6 +51,7 @@ import { getFeaturesDir, getExecutionStatePath, ensureAutomakerDir, + killProcessTree, } from '@automaker/platform'; import { exec } from 'child_process'; import { promisify } from 'util'; @@ -77,6 +78,110 @@ import { getNotificationService } from './notification-service.js'; const execAsync = promisify(exec); +const IS_WINDOWS = process.platform === 'win32'; + +/** + * Clean up orphaned processes related to a specific working directory. + * + * On Windows, processes spawned by agents (like dev servers) don't automatically + * terminate when the parent process is killed. This function finds and kills + * processes whose command line contains the working directory path. + * + * We search for multiple process types because the Claude Agent SDK spawns + * a chain like: sh.exe -> node.exe (npm) -> cmd.exe -> node.exe (next dev) + * If we only kill node.exe, the sh.exe/cmd.exe parents may keep things alive. + * + * @param workDir - The working directory to match against process command lines + * @param featureId - For logging purposes + */ +async function cleanupOrphanedProcesses(workDir: string, featureId: string): Promise { + if (!IS_WINDOWS) { + // On Unix, SIGTERM propagates to child processes properly + return; + } + + try { + // For PowerShell's -like operator, we need to escape only special wildcard chars (*, ?, [) + // Backslashes are NOT special in -like, so don't escape them + // Just normalize forward slashes to backslashes for Windows paths + const normalizedDir = workDir.replace(/\//g, '\\'); + + // Also create a Unix-style path variant (Git Bash uses /c/Users/... format) + const unixStyleDir = workDir + .replace(/^([A-Z]):/i, (_, drive) => `/${drive.toLowerCase()}`) + .replace(/\\/g, '/'); + + // Search for multiple process types that might be in the chain + // sh.exe (Git Bash), cmd.exe (Windows shell), node.exe (Node.js) + const processTypes = ['node.exe', 'sh.exe', 'cmd.exe']; + const processFilter = processTypes.map((p) => `$_.Name -eq '${p}'`).join(' -or '); + + // Get processes via PowerShell - match either Windows-style or Unix-style paths + const psCommand = `Get-CimInstance Win32_Process | Where-Object { (${processFilter}) -and ($_.CommandLine -like '*${normalizedDir}*' -or $_.CommandLine -like '*${unixStyleDir}*') } | Select-Object ProcessId, Name, CommandLine | ConvertTo-Json`; + + const { stdout: result } = await execAsync(`powershell -Command "${psCommand}"`, { + encoding: 'utf8', + timeout: 15000, + windowsHide: true, + }); + + if (!result.trim()) { + return; // No matching processes + } + + // Parse the JSON output + let processes: { ProcessId: number; Name: string; CommandLine: string }[] = []; + try { + const parsed = JSON.parse(result); + // PowerShell returns a single object if one result, array if multiple + processes = Array.isArray(parsed) ? parsed : [parsed]; + } catch { + logger.debug(`[cleanupOrphanedProcesses] Failed to parse PowerShell output`); + return; + } + + // Filter out: + // - The current process (Automaker server) + // - The parent of the current process + // - Any process that's part of the Automaker server chain + const serverPid = process.pid; + const automakerPaths = ['automaker', 'tsx', 'vite']; + + const pidsToKill = processes + .filter((p) => { + // Skip current process and parent + if (p.ProcessId === serverPid || p.ProcessId === process.ppid) { + return false; + } + // Skip Automaker's own processes (server, vite, tsx) + const cmdLower = (p.CommandLine || '').toLowerCase(); + if ( + automakerPaths.some((ap) => cmdLower.includes(`\\automaker\\`) && cmdLower.includes(ap)) + ) { + return false; + } + return true; + }) + .map((p) => p.ProcessId); + + if (pidsToKill.length === 0) { + return; + } + + logger.info( + `[cleanupOrphanedProcesses] Killing ${pidsToKill.length} orphaned processes for feature ${featureId}: ${pidsToKill.join(', ')}` + ); + + // Kill each process tree + await Promise.allSettled(pidsToKill.map((pid) => killProcessTree(pid))); + + logger.info(`[cleanupOrphanedProcesses] Cleanup complete for feature ${featureId}`); + } catch (error) { + // Don't fail the feature if cleanup fails - just log it + logger.warn(`[cleanupOrphanedProcesses] Error during cleanup for feature ${featureId}:`, error); + } +} + /** * Get the current branch name for a git repository * @param projectPath - Path to the git repository @@ -352,6 +457,10 @@ interface RunningFeature { leaseCount: number; model?: string; provider?: ModelProvider; + /** Promise that resolves when execution ends (success, failure, or abort) */ + completionPromise?: Promise; + /** Call to signal execution has completed (for cleanup tracking) */ + signalCompletion?: () => void; } interface AutoLoopState { @@ -597,6 +706,93 @@ export class AutoModeService { } } + /** + * Initialize the service - should be called after construction. + * Performs startup cleanup of any orphaned processes from previous runs. + */ + async initialize(): Promise { + if (IS_WINDOWS) { + await this.cleanupOrphanedProcessesOnStartup(); + } + } + + /** + * Clean up orphaned processes from previous Automaker runs. + * This catches processes that were orphaned when Automaker was force-closed, + * crashed, or the browser tab was closed without proper cleanup. + * + * We look for processes that: + * 1. Are node.exe, sh.exe, or cmd.exe + * 2. Have command lines suggesting they were spawned by npm/next/dev servers + * 3. Have orphaned parent processes (parent PID doesn't exist) + */ + private async cleanupOrphanedProcessesOnStartup(): Promise { + try { + logger.info('[Startup] Checking for orphaned processes from previous runs...'); + + // PowerShell script to find potentially orphaned dev server processes + // We look for processes whose parent no longer exists + // Note: Script must be single-line with semicolons for proper parsing + // We capture $parentPid before the nested Where-Object to avoid $_ shadowing + const psScript = `$orphans = @(); Get-CimInstance Win32_Process | Where-Object { ($_.Name -eq 'node.exe' -or $_.Name -eq 'sh.exe' -or $_.Name -eq 'cmd.exe') -and $_.CommandLine -match 'next|npm run dev|start-server' } | ForEach-Object { $parentPid = $_.ParentProcessId; $parent = Get-CimInstance Win32_Process | Where-Object { $_.ProcessId -eq $parentPid }; if (-not $parent) { $orphans += [PSCustomObject]@{ ProcessId = $_.ProcessId; Name = $_.Name; CommandLine = $_.CommandLine } } }; $orphans | ConvertTo-Json`; + + const { stdout: result } = await execAsync(`powershell -Command "${psScript}"`, { + encoding: 'utf8', + timeout: 20000, + windowsHide: true, + }); + + if (!result.trim() || result.trim() === 'null') { + logger.info('[Startup] No orphaned processes found'); + return; + } + + let orphans: { ProcessId: number; Name: string; CommandLine: string }[] = []; + try { + const parsed = JSON.parse(result); + orphans = Array.isArray(parsed) ? parsed : [parsed]; + } catch { + logger.debug('[Startup] Failed to parse orphan detection output'); + return; + } + + // Filter out Automaker's own processes + const serverPid = process.pid; + const pidsToKill = orphans + .filter((p) => { + if (p.ProcessId === serverPid || p.ProcessId === process.ppid) { + return false; + } + // Skip if it's part of Automaker itself + const cmdLower = (p.CommandLine || '').toLowerCase(); + if ( + cmdLower.includes('\\automaker\\') && + (cmdLower.includes('tsx') || cmdLower.includes('vite')) + ) { + return false; + } + return true; + }) + .map((p) => p.ProcessId); + + if (pidsToKill.length === 0) { + logger.info('[Startup] No orphaned processes to clean up'); + return; + } + + logger.info( + `[Startup] Cleaning up ${pidsToKill.length} orphaned processes: ${pidsToKill.join(', ')}` + ); + + await Promise.allSettled(pidsToKill.map((pid) => killProcessTree(pid))); + + logger.info('[Startup] Orphaned process cleanup complete'); + } catch (error) { + // Don't fail startup if cleanup fails + logger.warn('[Startup] Failed to check for orphaned processes:', error); + } + } + /** * Track a failure and check if we should pause due to consecutive failures. * This handles cases where the SDK doesn't return useful error messages. @@ -1050,7 +1246,34 @@ export class AutoModeService { } /** - * Stop the auto mode loop for a specific project/worktree + * Get running features for a specific project/worktree + * @param projectPath - The project path + * @param branchName - The branch name, or null for main worktree + */ + private getRunningFeaturesForWorktree( + projectPath: string, + branchName: string | null + ): RunningFeature[] { + const features: RunningFeature[] = []; + const normalizedBranch = branchName === 'main' ? null : branchName; + + for (const [, feature] of this.runningFeatures) { + if (feature.projectPath === projectPath) { + // Match by branch: null matches both null and 'main' + const featureBranch = feature.branchName === 'main' ? null : feature.branchName; + if (featureBranch === normalizedBranch) { + features.push(feature); + } + } + } + return features; + } + + /** + * Stop the auto mode loop for a specific project/worktree. + * This only stops new features from being picked up - running features continue to completion. + * Use stopFeature() to explicitly abort a running feature. + * * @param projectPath - The project to stop auto mode for * @param branchName - The branch name, or null for main worktree */ @@ -1839,8 +2062,11 @@ Complete the pipeline step instructions above. Review the previous work and appl /** * Stop a specific feature + * @param featureId - The feature ID to stop + * @param waitForCleanup - Whether to wait for the feature to fully clean up (default: true) + * @param timeoutMs - Maximum time to wait for cleanup in milliseconds (default: 10000) */ - async stopFeature(featureId: string): Promise { + async stopFeature(featureId: string, waitForCleanup = true, timeoutMs = 10000): Promise { const running = this.runningFeatures.get(featureId); if (!running) { return false; @@ -1849,10 +2075,26 @@ Complete the pipeline step instructions above. Review the previous work and appl // Cancel any pending plan approval for this feature this.cancelPlanApproval(featureId); + // Abort the feature execution running.abortController.abort(); - // Remove from running features immediately to allow resume - // The abort signal will still propagate to stop any ongoing execution + // If we have a completion promise and should wait, wait for cleanup with timeout + if (waitForCleanup && running.completionPromise) { + try { + await Promise.race([ + running.completionPromise, + new Promise((_, reject) => + setTimeout(() => reject(new Error('Cleanup timeout')), timeoutMs) + ), + ]); + logger.info(`Feature ${featureId} cleanup completed gracefully`); + } catch (error) { + // Timeout or other error - force release + logger.warn(`Feature ${featureId} cleanup timed out or failed, forcing removal:`, error); + } + } + + // Release the feature (force: true to immediately remove regardless of lease count) this.releaseRunningFeature(featureId, { force: true }); return true; @@ -5057,6 +5299,11 @@ After generating the revised spec, output: clearTimeout(rawWriteTimeout); rawWriteTimeout = null; } + + // Clean up orphaned processes on Windows + // This handles dev servers, MCP processes, etc. that don't terminate + // when the agent is aborted + await cleanupOrphanedProcesses(workDir, featureId); } } diff --git a/apps/server/src/services/codex-app-server-service.ts b/apps/server/src/services/codex-app-server-service.ts index ecfb99da1..01a3af83c 100644 --- a/apps/server/src/services/codex-app-server-service.ts +++ b/apps/server/src/services/codex-app-server-service.ts @@ -1,7 +1,9 @@ import { spawn, type ChildProcess } from 'child_process'; import readline from 'readline'; -import { findCodexCliPath } from '@automaker/platform'; +import { findCodexCliPath, killProcessTree } from '@automaker/platform'; import { createLogger } from '@automaker/utils'; + +const IS_WINDOWS = process.platform === 'win32'; import type { AppServerModelResponse, AppServerAccountResponse, @@ -196,7 +198,11 @@ export class CodexAppServerService { // Clean up rl.close(); - childProcess.kill('SIGTERM'); + if (IS_WINDOWS && childProcess.pid) { + await killProcessTree(childProcess.pid); + } else { + childProcess.kill('SIGTERM'); + } return result; } catch (error) { @@ -204,8 +210,12 @@ export class CodexAppServerService { return null; } finally { // Ensure process is killed - if (childProcess && !childProcess.killed) { - childProcess.kill('SIGTERM'); + if (childProcess && !childProcess.killed && childProcess.pid) { + if (IS_WINDOWS) { + killProcessTree(childProcess.pid).catch(() => {}); + } else { + childProcess.kill('SIGTERM'); + } } } } diff --git a/apps/server/src/services/dev-server-service.ts b/apps/server/src/services/dev-server-service.ts index 74ed82208..dc62716c9 100644 --- a/apps/server/src/services/dev-server-service.ts +++ b/apps/server/src/services/dev-server-service.ts @@ -12,8 +12,11 @@ import * as secureFs from '../lib/secure-fs.js'; import path from 'path'; import net from 'net'; import { createLogger } from '@automaker/utils'; +import { killProcessTree } from '@automaker/platform'; import type { EventEmitter } from '../lib/events.js'; +const IS_WINDOWS = process.platform === 'win32'; + const logger = createLogger('DevServerService'); // Maximum scrollback buffer size (characters) - matches TerminalService pattern @@ -591,9 +594,15 @@ class DevServerService { }); } - // Kill the process - if (server.process && !server.process.killed) { - server.process.kill('SIGTERM'); + // Kill the process tree (important on Windows where child processes aren't auto-terminated) + if (server.process && !server.process.killed && server.process.pid) { + if (IS_WINDOWS) { + // Use tree-kill to terminate the entire process tree + // This prevents orphaned child processes (e.g., Next.js start-server.js) + await killProcessTree(server.process.pid); + } else { + server.process.kill('SIGTERM'); + } } // Free the port diff --git a/apps/server/tests/unit/services/dev-server-service.test.ts b/apps/server/tests/unit/services/dev-server-service.test.ts index 03d3d01c6..9a2129827 100644 --- a/apps/server/tests/unit/services/dev-server-service.test.ts +++ b/apps/server/tests/unit/services/dev-server-service.test.ts @@ -24,17 +24,28 @@ vi.mock('net', () => ({ createServer: vi.fn(), })); +// Mock @automaker/platform for killProcessTree +vi.mock('@automaker/platform', () => ({ + killProcessTree: vi.fn().mockResolvedValue(undefined), +})); + import { spawn, execSync } from 'child_process'; import * as secureFs from '@/lib/secure-fs.js'; import net from 'net'; +import { killProcessTree } from '@automaker/platform'; describe('dev-server-service.ts', () => { let testDir: string; + let originalHostname: string | undefined; beforeEach(async () => { vi.clearAllMocks(); vi.resetModules(); + // Save and set HOSTNAME for consistent test URLs + originalHostname = process.env.HOSTNAME; + process.env.HOSTNAME = 'localhost'; + testDir = path.join(os.tmpdir(), `dev-server-test-${Date.now()}`); await fs.mkdir(testDir, { recursive: true }); @@ -56,6 +67,13 @@ describe('dev-server-service.ts', () => { }); afterEach(async () => { + // Restore HOSTNAME + if (originalHostname !== undefined) { + process.env.HOSTNAME = originalHostname; + } else { + delete process.env.HOSTNAME; + } + try { await fs.rm(testDir, { recursive: true, force: true }); } catch { @@ -250,7 +268,12 @@ describe('dev-server-service.ts', () => { const result = await service.stopDevServer(testDir); expect(result.success).toBe(true); - expect(mockProcess.kill).toHaveBeenCalledWith('SIGTERM'); + // On Windows, killProcessTree is used; on Unix, process.kill('SIGTERM') + if (process.platform === 'win32') { + expect(killProcessTree).toHaveBeenCalledWith(mockProcess.pid); + } else { + expect(mockProcess.kill).toHaveBeenCalledWith('SIGTERM'); + } }); }); @@ -377,6 +400,7 @@ function createMockProcess() { mockProcess.stderr = new EventEmitter(); mockProcess.kill = vi.fn(); mockProcess.killed = false; + mockProcess.pid = 12345; // Add pid for process tree kill condition // Don't exit immediately - let the test control the lifecycle return mockProcess; diff --git a/libs/platform/package.json b/libs/platform/package.json index 21729ef93..350ade3ce 100644 --- a/libs/platform/package.json +++ b/libs/platform/package.json @@ -22,7 +22,8 @@ }, "dependencies": { "@automaker/types": "1.0.0", - "p-limit": "6.2.0" + "p-limit": "6.2.0", + "tree-kill": "1.2.2" }, "devDependencies": { "@types/node": "22.19.3", diff --git a/libs/platform/src/index.ts b/libs/platform/src/index.ts index 5952ba2d8..fc952ab2b 100644 --- a/libs/platform/src/index.ts +++ b/libs/platform/src/index.ts @@ -186,3 +186,6 @@ export { findTerminalById, openInExternalTerminal, } from './terminal.js'; + +// Process utilities (cross-platform process tree management) +export { killProcessTree, waitForProcessExit, forceKillProcessTree } from './process-utils.js'; diff --git a/libs/platform/src/process-utils.ts b/libs/platform/src/process-utils.ts new file mode 100644 index 000000000..5b381c5e0 --- /dev/null +++ b/libs/platform/src/process-utils.ts @@ -0,0 +1,119 @@ +/** + * Cross-platform process utilities + * + * Provides utilities for managing process trees, particularly important on Windows + * where child processes spawned via 'cmd /c' don't automatically terminate when + * the parent process is killed. + */ + +import treeKill from 'tree-kill'; + +const IS_WINDOWS = process.platform === 'win32'; + +/** + * Kill an entire process tree (parent + all children) + * + * On Windows, processes spawned via 'cmd /c' create process trees where killing + * the parent doesn't kill children. This uses tree-kill to terminate the entire tree. + * + * @param pid - Process ID to kill + * @param signal - Signal to send (default: 'SIGTERM', or 'SIGKILL' on Windows) + * @returns Promise that resolves when kill is attempted + * + * @example + * ```typescript + * // Kill MCP server process tree + * const transport = new StdioClientTransport(...); + * await killProcessTree(transport.pid); + * ``` + */ +export async function killProcessTree( + pid: number, + signal: string = IS_WINDOWS ? 'SIGKILL' : 'SIGTERM' +): Promise { + return new Promise((resolve) => { + treeKill(pid, signal, (err) => { + if (err) { + // Process may have already exited, which is fine + // Log at debug level only + if (process.env.DEBUG) { + console.debug(`[process-utils] tree-kill error (may be expected): ${err.message}`); + } + } + resolve(); + }); + }); +} + +/** + * Wait for a process to exit with timeout + * + * Useful after calling killProcessTree to verify the process actually terminated. + * + * @param pid - Process ID to wait for + * @param timeoutMs - Maximum time to wait in milliseconds + * @returns Promise that resolves to true if process exited, false if timeout + * + * @example + * ```typescript + * await killProcessTree(pid); + * const exited = await waitForProcessExit(pid, 5000); + * if (!exited) { + * console.warn('Process did not exit within timeout'); + * } + * ``` + */ +export async function waitForProcessExit(pid: number, timeoutMs: number): Promise { + const startTime = Date.now(); + const checkInterval = 100; // Check every 100ms + + while (Date.now() - startTime < timeoutMs) { + try { + // process.kill(pid, 0) returns true if process exists, throws if it doesn't + process.kill(pid, 0); + // Process still exists, wait and check again + await new Promise((resolve) => setTimeout(resolve, checkInterval)); + } catch { + // Process doesn't exist - it has exited + return true; + } + } + + // Timeout reached, process still running + return false; +} + +/** + * Force kill a process tree with escalation + * + * First tries SIGTERM, then escalates to SIGKILL if process doesn't exit. + * On Windows, always uses SIGKILL (force) since SIGTERM isn't reliably supported. + * + * @param pid - Process ID to kill + * @param gracePeriodMs - Time to wait for graceful exit before force kill (default: 3000) + * @returns Promise that resolves when process is terminated or timeout reached + * + * @example + * ```typescript + * // Gracefully terminate with escalation + * await forceKillProcessTree(pid, 5000); + * ``` + */ +export async function forceKillProcessTree(pid: number, gracePeriodMs = 3000): Promise { + // On Windows, just force kill immediately + if (IS_WINDOWS) { + await killProcessTree(pid, 'SIGKILL'); + return; + } + + // On Unix, try graceful termination first + await killProcessTree(pid, 'SIGTERM'); + + // Wait for graceful exit + const exited = await waitForProcessExit(pid, gracePeriodMs); + + if (!exited) { + // Escalate to force kill + await killProcessTree(pid, 'SIGKILL'); + } +} diff --git a/libs/platform/src/subprocess.ts b/libs/platform/src/subprocess.ts index b17a9538e..974ad4ffd 100644 --- a/libs/platform/src/subprocess.ts +++ b/libs/platform/src/subprocess.ts @@ -4,6 +4,9 @@ import { spawn, type ChildProcess } from 'child_process'; import readline from 'readline'; +import { killProcessTree } from './process-utils.js'; + +const IS_WINDOWS = process.platform === 'win32'; export interface SubprocessOptions { command: string; @@ -76,17 +79,28 @@ export async function* spawnJSONLProcess(options: SubprocessOptions): AsyncGener }); } + // Helper to kill process tree (important on Windows where SIGTERM doesn't propagate) + const killProcess = async () => { + if (IS_WINDOWS && childProcess.pid) { + // On Windows, use tree-kill to terminate entire process tree + // This prevents orphaned child processes (MCP servers, dev servers, etc.) + await killProcessTree(childProcess.pid); + } else { + childProcess.kill('SIGTERM'); + } + }; + // Setup timeout detection const resetTimeout = () => { lastOutputTime = Date.now(); if (timeoutHandle) { clearTimeout(timeoutHandle); } - timeoutHandle = setTimeout(() => { + timeoutHandle = setTimeout(async () => { const elapsed = Date.now() - lastOutputTime; if (elapsed >= timeout) { console.error(`[SubprocessManager] Process timeout: no output for ${timeout}ms`); - childProcess.kill('SIGTERM'); + await killProcess(); } }, timeout); }; @@ -97,11 +111,14 @@ export async function* spawnJSONLProcess(options: SubprocessOptions): AsyncGener let abortHandler: (() => void) | null = null; if (abortController) { abortHandler = () => { - console.log('[SubprocessManager] Abort signal received, killing process'); + console.log('[SubprocessManager] Abort signal received, killing process tree'); if (timeoutHandle) { clearTimeout(timeoutHandle); } - childProcess.kill('SIGTERM'); + // Use async kill but don't await (abort handler is sync) + killProcess().catch((err) => { + console.error('[SubprocessManager] Error killing process tree:', err); + }); }; // Check if already aborted, if so call handler immediately if (abortController.signal.aborted) { @@ -244,7 +261,14 @@ export async function spawnProcess(options: SubprocessOptions): Promise { cleanupAbortListener(); - childProcess.kill('SIGTERM'); + // Use tree-kill on Windows to terminate entire process tree + if (IS_WINDOWS && childProcess.pid) { + killProcessTree(childProcess.pid).catch((err) => { + console.error('[SubprocessManager] Error killing process tree:', err); + }); + } else { + childProcess.kill('SIGTERM'); + } reject(new Error('Process aborted')); }; abortController.signal.addEventListener('abort', abortHandler); From 8b716f76305f1fcc263e859bc7db89ac50ddc568 Mon Sep 17 00:00:00 2001 From: Scott Date: Sat, 24 Jan 2026 18:35:43 -0700 Subject: [PATCH 2/4] fix: wire up completionPromise for graceful feature cleanup - Create completionPromise and signalCompletion in acquireRunningFeature - Call signalCompletion in finally blocks of all execute methods - Fixes CodeRabbit review: completionPromise was defined but never wired Co-Authored-By: Claude Opus 4.5 --- apps/server/src/services/auto-mode-service.ts | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/apps/server/src/services/auto-mode-service.ts b/apps/server/src/services/auto-mode-service.ts index 3f5798133..44b0a4c7a 100644 --- a/apps/server/src/services/auto-mode-service.ts +++ b/apps/server/src/services/auto-mode-service.ts @@ -590,6 +590,13 @@ export class AutoModeService { } const abortController = params.abortController ?? new AbortController(); + + // Create completion tracking for graceful cleanup + let signalCompletion: () => void; + const completionPromise = new Promise((resolve) => { + signalCompletion = resolve; + }); + const entry: RunningFeature = { featureId: params.featureId, projectPath: params.projectPath, @@ -599,6 +606,8 @@ export class AutoModeService { isAutoMode: params.isAutoMode, startTime: Date.now(), leaseCount: 1, + completionPromise, + signalCompletion: signalCompletion!, }; this.runningFeatures.set(params.featureId, entry); return entry; @@ -1896,6 +1905,8 @@ export class AutoModeService { logger.info( `Pending approvals at cleanup: ${Array.from(this.pendingApprovals.keys()).join(', ') || 'none'}` ); + // Signal completion for graceful cleanup + tempRunningFeature.signalCompletion?.(); this.releaseRunningFeature(featureId); // Update execution state after feature completes @@ -2129,7 +2140,7 @@ Complete the pipeline step instructions above. Review the previous work and appl return; } - this.acquireRunningFeature({ + const runningFeature = this.acquireRunningFeature({ featureId, projectPath, isAutoMode: false, @@ -2215,6 +2226,8 @@ Complete the pipeline step instructions above. Review the previous work and appl _calledInternally: true, }); } finally { + // Signal completion for graceful cleanup + runningFeature.signalCompletion?.(); this.releaseRunningFeature(featureId); } } @@ -2523,6 +2536,8 @@ Complete the pipeline step instructions above. Review the previous work and appl }); } } finally { + // Signal completion for graceful cleanup + runningEntry.signalCompletion?.(); this.releaseRunningFeature(featureId); } } @@ -2778,6 +2793,8 @@ Address the follow-up instructions above. Review the previous work and make the } } } finally { + // Signal completion for graceful cleanup + runningEntry.signalCompletion?.(); this.releaseRunningFeature(featureId); } } From 6f3eecef30cf62584ad52474185a10cfc1df71f2 Mon Sep 17 00:00:00 2001 From: Scott Date: Sat, 24 Jan 2026 18:51:39 -0700 Subject: [PATCH 3/4] fix: handle PowerShell "null" output in cleanupOrphanedProcesses PowerShell returns the literal string "null" when no processes match, which JSON.parse accepts as valid JSON (returning null). Added check for this case to prevent null reference errors. Co-Authored-By: Claude Opus 4.5 --- apps/server/src/services/auto-mode-service.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/apps/server/src/services/auto-mode-service.ts b/apps/server/src/services/auto-mode-service.ts index 44b0a4c7a..3e0495861 100644 --- a/apps/server/src/services/auto-mode-service.ts +++ b/apps/server/src/services/auto-mode-service.ts @@ -125,7 +125,7 @@ async function cleanupOrphanedProcesses(workDir: string, featureId: string): Pro windowsHide: true, }); - if (!result.trim()) { + if (!result.trim() || result.trim() === 'null') { return; // No matching processes } @@ -133,7 +133,10 @@ async function cleanupOrphanedProcesses(workDir: string, featureId: string): Pro let processes: { ProcessId: number; Name: string; CommandLine: string }[] = []; try { const parsed = JSON.parse(result); - // PowerShell returns a single object if one result, array if multiple + // PowerShell returns null if no matches, single object if one, array if multiple + if (!parsed) { + return; + } processes = Array.isArray(parsed) ? parsed : [parsed]; } catch { logger.debug(`[cleanupOrphanedProcesses] Failed to parse PowerShell output`); From 4d99e89403891f60b27dbcf0d7aeecb0b547bd01 Mon Sep 17 00:00:00 2001 From: Scott Date: Mon, 26 Jan 2026 09:22:36 -0700 Subject: [PATCH 4/4] fix: clear timeout after Promise.race and use logger.error Address CodeRabbit review feedback: - Clear the timeout timer after Promise.race resolves to avoid stray timers - Use logger.error instead of console.error for consistency Co-Authored-By: Claude Opus 4.5 --- apps/server/src/index.ts | 2 +- apps/server/src/services/auto-mode-service.ts | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/apps/server/src/index.ts b/apps/server/src/index.ts index dbb8704fb..a435971b1 100644 --- a/apps/server/src/index.ts +++ b/apps/server/src/index.ts @@ -261,7 +261,7 @@ const featureLoader = new FeatureLoader(); const autoModeService = new AutoModeService(events, settingsService); // Initialize auto mode service (performs startup cleanup of orphaned processes on Windows) autoModeService.initialize().catch((err) => { - console.error('[AutoModeService] Initialization error:', err); + logger.error('[AutoModeService] Initialization error:', err); }); const claudeUsageService = new ClaudeUsageService(); const codexAppServerService = new CodexAppServerService(); diff --git a/apps/server/src/services/auto-mode-service.ts b/apps/server/src/services/auto-mode-service.ts index 3e0495861..16ba6fbf7 100644 --- a/apps/server/src/services/auto-mode-service.ts +++ b/apps/server/src/services/auto-mode-service.ts @@ -2094,17 +2094,18 @@ Complete the pipeline step instructions above. Review the previous work and appl // If we have a completion promise and should wait, wait for cleanup with timeout if (waitForCleanup && running.completionPromise) { + let timeoutId: NodeJS.Timeout | undefined; + const timeoutPromise = new Promise((_, reject) => { + timeoutId = setTimeout(() => reject(new Error('Cleanup timeout')), timeoutMs); + }); try { - await Promise.race([ - running.completionPromise, - new Promise((_, reject) => - setTimeout(() => reject(new Error('Cleanup timeout')), timeoutMs) - ), - ]); + await Promise.race([running.completionPromise, timeoutPromise]); logger.info(`Feature ${featureId} cleanup completed gracefully`); } catch (error) { // Timeout or other error - force release logger.warn(`Feature ${featureId} cleanup timed out or failed, forcing removal:`, error); + } finally { + if (timeoutId) clearTimeout(timeoutId); } }