Skip to content

Commit ac607f9

Browse files
brianlballclaude
andcommitted
fix: permanent fd redirect for stdout suppression (issue #42)
Redirect C-level stdout (fd 1) to stderr once at startup, give Python sys.stdout a private fd to the real MCP client pipe. Catches ALL C-level pollution (SWIG GC, Polyhedron geometry, future unknowns) with zero races and no per-callsite wrappers. Fixes concurrent tool timeout (issue #42) and Polyhedron stdout leak on complex models (test_complex_model_stdout_purity). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent f776bb0 commit ac607f9

4 files changed

Lines changed: 154 additions & 43 deletions

File tree

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ jobs:
8181
EXTRA_ENV=""
8282
;;
8383
5)
84-
# HVAC supply wiring simulation smoke tests (DOAS, radiant, district, beams)
85-
FILES="tests/test_hvac_supply_sim.py"
84+
# HVAC supply wiring simulation smoke tests (DOAS, radiant, district, beams) + concurrent regression
85+
FILES="tests/test_hvac_supply_sim.py tests/test_concurrent_tools.py"
8686
EXTRA_ENV=""
8787
;;
8888
esac

mcp_server/server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
from fastmcp import FastMCP
44

55
from mcp_server.skills import register_all_skills
6+
from mcp_server.stdout_suppression import redirect_c_stdout_to_stderr
67

78
mcp = FastMCP("openstudio-mcp")
89

910
register_all_skills(mcp)
1011

1112

1213
def main():
14+
redirect_c_stdout_to_stderr()
1315
mcp.run()
1416

1517

mcp_server/stdout_suppression.py

Lines changed: 37 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,63 @@
1-
"""Utilities for suppressing unwanted stdout from OpenStudio Python bindings.
1+
"""Redirect C-level stdout to stderr to protect MCP JSON-RPC protocol.
22
3-
The OpenStudio SWIG bindings print memory leak warnings to stdout:
4-
"swig/python detected a memory leak of type 'openstudio::model::Model *', no destructor found."
3+
OpenStudio's SWIG bindings and C++ geometry engine write directly to
4+
C stdout (fd 1): memory leak warnings, Polyhedron diagnostics, etc.
5+
These corrupt the JSON-RPC stream that MCP clients read from stdout.
56
6-
This pollutes the MCP JSON-RPC protocol which requires clean stdout.
7-
We redirect these warnings to stderr instead.
7+
Strategy: at process startup, permanently redirect fd 1 to stderr so
8+
ALL C-level writes go there harmlessly. Then replace Python's
9+
sys.stdout with a wrapper around the saved original fd so FastMCP's
10+
stdio transport still writes JSON-RPC to the real client pipe.
11+
12+
This is done once — no per-call suppression, no races, no missed callsites.
813
"""
914
from __future__ import annotations
1015

1116
import atexit
1217
import contextlib
18+
import io
1319
import os
1420
import sys
1521

1622

17-
@contextlib.contextmanager
18-
def suppress_openstudio_warnings():
19-
"""Temporarily redirect stdout to stderr to suppress OpenStudio SWIG warnings.
23+
def redirect_c_stdout_to_stderr():
24+
"""Permanently redirect C-level stdout (fd 1) to stderr.
2025
21-
This ensures the MCP JSON-RPC protocol on stdout remains clean.
22-
Works at both Python and C level by redirecting file descriptors.
26+
Must be called before FastMCP's stdio_server() captures sys.stdout.
27+
After this call:
28+
- C code (printf, SWIG, OpenStudio internals) -> fd 1 -> stderr
29+
- Python sys.stdout -> saved fd -> real MCP client pipe
2330
"""
24-
# Save original file descriptors
25-
stdout_fd = sys.stdout.fileno()
26-
stderr_fd = sys.stderr.fileno()
31+
stdout_fd = sys.stdout.fileno() # 1
32+
stderr_fd = sys.stderr.fileno() # 2
2733

28-
# Duplicate the current stdout FD to restore later
29-
saved_stdout_fd = os.dup(stdout_fd)
34+
# Save the real stdout pipe (to MCP client) as a new fd
35+
saved_fd = os.dup(stdout_fd)
3036

31-
# Flush Python-level buffers before redirecting
32-
sys.stdout.flush()
33-
sys.stderr.flush()
34-
35-
try:
36-
# Redirect stdout (fd 1) to stderr (fd 2) at OS level
37-
# This catches C-level fprintf(stdout, ...) from SWIG
38-
os.dup2(stderr_fd, stdout_fd)
37+
# Point fd 1 at stderr — all future C-level printf goes here
38+
os.dup2(stderr_fd, stdout_fd)
3939

40-
yield
40+
# Build a new Python stdout that writes to the saved fd.
41+
# Line buffering so each JSON-RPC message flushes immediately.
42+
binary = io.open(saved_fd, "wb", closefd=True)
43+
text = io.TextIOWrapper(binary, encoding="utf-8", line_buffering=True)
44+
sys.stdout = text
4145

42-
finally:
43-
# Flush again before restoring
44-
sys.stdout.flush()
45-
sys.stderr.flush()
4646

47-
# Restore original stdout
48-
os.dup2(saved_stdout_fd, stdout_fd)
49-
os.close(saved_stdout_fd)
47+
# Retain context-manager API so model_manager.py imports don't break.
48+
# Now a no-op since fd 1 is permanently redirected.
49+
@contextlib.contextmanager
50+
def suppress_openstudio_warnings():
51+
"""No-op — fd 1 is permanently redirected at startup."""
52+
yield
5053

5154

5255
def _redirect_stdout_to_stderr_at_exit():
53-
"""Redirect stdout to stderr during Python cleanup to catch SWIG warnings.
54-
55-
OpenStudio prints memory leak warnings when models are garbage-collected
56-
during Python interpreter shutdown. This redirects those to stderr.
57-
"""
56+
"""Safety net: ensure fd 1 points to stderr during interpreter shutdown."""
5857
try:
59-
stdout_fd = 1 # sys.stdout might be None at exit
60-
stderr_fd = 2
61-
os.dup2(stderr_fd, stdout_fd)
58+
os.dup2(2, 1)
6259
except Exception:
63-
pass # Silently ignore errors during shutdown
60+
pass
6461

6562

66-
# Register the cleanup handler to run before Python exits
6763
atexit.register(_redirect_stdout_to_stderr_at_exit)

tests/test_concurrent_tools.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""Regression test for issue #42: stdout suppression race condition.
2+
3+
The global FastMCP middleware held os.dup2() on fd 1 (stdout→stderr) for
4+
the entire tool call. FastMCP dispatches sync tools via
5+
anyio.to_thread.run_sync, so two tools CAN run concurrently. When Thread A
6+
held the redirect, Thread B's JSON-RPC response goes to stderr and the
7+
client receives nothing → MCP error -32001 timeout.
8+
9+
This test fires a slow tool (create_baseline_osm, several seconds) and a
10+
fast tool (get_server_status, near-instant) concurrently. On buggy code,
11+
get_server_status's response is lost → timeout. After the fix, both return.
12+
"""
13+
import asyncio
14+
import pytest
15+
16+
from conftest import integration_enabled, server_params, unwrap
17+
from mcp import ClientSession
18+
from mcp.client.stdio import stdio_client
19+
20+
21+
@pytest.mark.integration
22+
def test_concurrent_tool_calls_both_respond():
23+
# Regression: issue #42 — concurrent tool calls lost responses due to
24+
# global stdout suppression middleware redirecting fd 1 for entire tool duration.
25+
if not integration_enabled():
26+
pytest.skip("Set RUN_OPENSTUDIO_INTEGRATION=1 to enable MCP integration tests.")
27+
28+
async def _run():
29+
async with stdio_client(server_params()) as (read, write):
30+
async with ClientSession(read, write) as session:
31+
await session.initialize()
32+
33+
# --- Arrange ---
34+
# Fire slow tool first
35+
baseline_task = asyncio.create_task(
36+
session.call_tool("create_baseline_osm", {
37+
"name": "concurrent_race_test", "num_floors": 1,
38+
})
39+
)
40+
# Small delay so baseline_osm enters its execution window
41+
await asyncio.sleep(0.5)
42+
43+
# --- Act ---
44+
# Fire fast tool while slow tool holds middleware fd redirect
45+
status_task = asyncio.create_task(
46+
session.call_tool("get_server_status", {})
47+
)
48+
49+
# --- Assert ---
50+
# 30s timeout: get_server_status should return in <1s.
51+
# If it times out, the race condition is present — the response
52+
# went to stderr and the client never received it.
53+
try:
54+
baseline_res, status_res = await asyncio.wait_for(
55+
asyncio.gather(baseline_task, status_task),
56+
timeout=30,
57+
)
58+
except asyncio.TimeoutError:
59+
pytest.fail(
60+
"Concurrent tool call timed out — stdout suppression race "
61+
"condition is present (issue #42). get_server_status response "
62+
"was likely written to stderr while create_baseline_osm held "
63+
"the fd 1 redirect."
64+
)
65+
66+
baseline = unwrap(baseline_res)
67+
status = unwrap(status_res)
68+
69+
assert baseline.get("ok") is True, f"create_baseline_osm failed: {baseline}"
70+
assert status.get("ok") is True, f"get_server_status failed: {status}"
71+
assert "run_root" in status, f"status missing expected keys: {status}"
72+
73+
asyncio.run(_run())
74+
75+
76+
@pytest.mark.integration
77+
def test_concurrent_fast_tools_both_respond():
78+
# Regression: issue #42 — even two fast tools can race if both enter
79+
# the middleware's fd redirect window simultaneously.
80+
if not integration_enabled():
81+
pytest.skip("Set RUN_OPENSTUDIO_INTEGRATION=1 to enable MCP integration tests.")
82+
83+
async def _run():
84+
async with stdio_client(server_params()) as (read, write):
85+
async with ClientSession(read, write) as session:
86+
await session.initialize()
87+
88+
# Fire two fast tools concurrently
89+
task_a = asyncio.create_task(
90+
session.call_tool("get_server_status", {})
91+
)
92+
task_b = asyncio.create_task(
93+
session.call_tool("get_server_status", {})
94+
)
95+
96+
try:
97+
res_a, res_b = await asyncio.wait_for(
98+
asyncio.gather(task_a, task_b),
99+
timeout=15,
100+
)
101+
except asyncio.TimeoutError:
102+
pytest.fail(
103+
"Concurrent fast tool calls timed out — stdout suppression "
104+
"race condition (issue #42)."
105+
)
106+
107+
a = unwrap(res_a)
108+
b = unwrap(res_b)
109+
110+
assert a.get("ok") is True, f"First get_server_status failed: {a}"
111+
assert b.get("ok") is True, f"Second get_server_status failed: {b}"
112+
113+
asyncio.run(_run())

0 commit comments

Comments
 (0)