From a23de11122c3c5dbbf491efd18e7a5d223a33fd4 Mon Sep 17 00:00:00 2001 From: akrherz Date: Tue, 21 Apr 2026 18:40:26 -0500 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=9A=9A=20Move=20ip=5Fthrottle=20to=20?= =?UTF-8?q?prevent=20telemetry=20write?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/pyiem/webutil.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/pyiem/webutil.py b/src/pyiem/webutil.py index 0670fc486..841a7856c 100644 --- a/src/pyiem/webutil.py +++ b/src/pyiem/webutil.py @@ -464,18 +464,11 @@ def _mcall( func: Callable, environ: dict, start_response: Callable, - ip_throttle_secs: float | Callable, memcachekey: str | Callable | None, expire: int | Callable, content_type: str | Callable, ): """Call the function with memcachekey handling.""" - if ip_is_throttled(environ, ip_throttle_secs): - start_response( - "429 Too Many Requests", - [("Content-type", "text/plain")], - ) - return b"Too many requests from your IP address, slow down." if memcachekey is None: return func(environ, start_response) key = memcachekey if isinstance(memcachekey, str) else memcachekey(environ) @@ -610,11 +603,17 @@ def _handle_exp(errormsg, routine=False, code=500): yield _handle_help(start_response, **kwargs) return add_to_environ(environ, form, **kwargs) + if ip_is_throttled(environ, kwargs.get("ip_throttle_secs", 0)): + start_response( + "429 Too Many Requests", + [("Content-type", "text/plain")], + ) + yield b"Too many requests from your IP address, slow down." + return res = _mcall( func, environ, start_response, - kwargs.get("ip_throttle_secs", 0), kwargs.get("memcachekey"), kwargs.get("memcacheexpire", 3600), kwargs.get("content_type", "application/json"), From 7a26984c7e5961df10a3c26c0abbe93081d703da Mon Sep 17 00:00:00 2001 From: akrherz Date: Tue, 21 Apr 2026 18:58:35 -0500 Subject: [PATCH 2/3] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactor=20iemapp,=20n?= =?UTF-8?q?o=20functional=20changes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/pyiem/webutil.py | 253 +++++++++++++++++++++++++++--------------- tests/test_webutil.py | 34 ++++++ 2 files changed, 198 insertions(+), 89 deletions(-) diff --git a/src/pyiem/webutil.py b/src/pyiem/webutil.py index 841a7856c..f6dc2db44 100644 --- a/src/pyiem/webutil.py +++ b/src/pyiem/webutil.py @@ -14,6 +14,7 @@ from collections.abc import Callable from datetime import datetime, timezone from http import HTTPStatus +from typing import Any, Iterator from zoneinfo import ZoneInfo, ZoneInfoNotFoundError from docutils.core import publish_string @@ -527,6 +528,135 @@ def ip_is_throttled(environ: dict, throttle_secs: float | Callable) -> bool: return False +def _iemapp_error_response( + environ: dict, + start_response: Callable, + errormsg: str, + routine: bool = False, + code: int = 500, +) -> bytes: + """Build an iemapp text/plain error response payload.""" + # generate a random string so we can track this request + uid = "".join( + random.choice(string.ascii_uppercase + string.digits) + for _ in range(12) + ) + msg = ( + "Oopsy, something failed on our end, but fear not.\n" + "Please contact akrherz@iastate.edu and reference " + f"this unique identifier: {uid}\n" + "Or wait a day for daryl to review the web logs and fix " + "the bugs he wrote. What a life." + ) + if not routine: + # Nicely log things about this actual request + sys.stderr.write(f"={uid} URL: {environ.get('REQUEST_URI')}\n") + sys.stderr.write(errormsg) + else: + msg = errormsg + start_response( + f"{code} {HTTPStatus(code).phrase}", + [("Content-type", "text/plain")], + ) + return msg.encode("ascii", errors="replace") + + +def _iemapp_preflight( + environ: dict, + start_response: Callable, + kwargs: dict[str, Any], + ip_throttle_secs: float | Callable, +) -> tuple[bool, bytes | None]: + """Run request preflight checks and return early payload when needed.""" + # mixed converts this to a regular dict + form = parse_formvars(environ).mixed() + form = clean_form(form) + if "help" in form: + return True, _handle_help(start_response, **kwargs) + add_to_environ(environ, form, **kwargs) + if ip_is_throttled(environ, ip_throttle_secs): + start_response( + "429 Too Many Requests", + [("Content-type", "text/plain")], + ) + return True, b"Too many requests from your IP address, slow down." + return False, None + + +def _normalize_iemapp_response(res: Any) -> Iterator[bytes]: + """Yield response chunks in a uniform iterable form.""" + # Need to be careful here and ensure we are returning a list of bytes. + if isinstance(res, str): + yield res.encode("utf-8") + return + if isinstance(res, bytes): + yield res + return + if isinstance(res, (tuple, list)): + for chunk in res: + yield chunk + return + yield from res + + +def _iemapp_emit_telemetry( + environ: dict, + start_time: datetime, + status_code: int, +) -> None: + """Emit telemetry for an iemapp request.""" + end_time = datetime.now(timezone.utc) + write_telemetry( + TELEMETRY( + (end_time - start_time).total_seconds(), + status_code, + environ.get("REMOTE_ADDR"), + environ.get("SCRIPT_NAME"), + environ.get("REQUEST_URI"), + environ.get("HTTP_HOST"), + ) + ) + + +def _iemapp_handle_exception( + environ: dict, + start_response: Callable, + exp: Exception, +) -> tuple[int, bytes]: + """Map exceptions to status code and user-facing payload.""" + if isinstance(exp, (IncompleteWebRequest, NoDataFound)): + status_code = 422 + return status_code, _iemapp_error_response( + environ, + start_response, + str(exp), + routine=True, + code=status_code, + ) + if isinstance(exp, BadWebRequest): + status_code = 422 + log_request(environ, multiplier=2) + return status_code, _iemapp_error_response( + environ, + start_response, + str(exp), + code=status_code, + ) + if isinstance(exp, NewDatabaseConnectionFailure): + status_code = 503 + return status_code, _iemapp_error_response( + environ, + start_response, + f"get_dbconn() failed with `{exp}`", + code=status_code, + ) + return 500, _iemapp_error_response( + environ, + start_response, + traceback.format_exc(), + ) + + def iemapp(**kwargs): """Attempt to do all kinds of nice things for the user and the developer. @@ -555,10 +685,22 @@ def iemapp(**kwargs): 3) If the wrapped function returns a str or bytes, it will be encoded and made into a list for the WSGI response. - Notes - ----- - - raise `NoDataFound` to have a nice error message generated + Exception Raising + ----------------- + + The following Exception types raised within the mod_wsgi wrapped code will + trigger the following HTTP status codes sent to the client. + + - `NoDataFound` or `IncompleteWebRequest` -> 422 Unprocessable Entity + - `BadWebRequest` -> 422 Unprocessable Entity (also db logged...) + - `NewDatabaseConnectionFailure` -> 503 Service Unavailable + - Any other Exception -> 500 Internal Server Error """ + enable_telemetry = kwargs.get("enable_telemetry", True) + ip_throttle_secs = kwargs.get("ip_throttle_secs", 0) + memcachekey = kwargs.get("memcachekey") + memcacheexpire = kwargs.get("memcacheexpire", 3600) + content_type = kwargs.get("content_type", "application/json") def _decorator(func): """Decorate a function to catch exceptions and do nice things.""" @@ -566,107 +708,40 @@ def _decorator(func): def _wrapped(environ, start_response): """Decorate function.""" - def _handle_exp(errormsg, routine=False, code=500): - # generate a random string so we can track this request - uid = "".join( - random.choice(string.ascii_uppercase + string.digits) - for _ in range(12) - ) - msg = ( - "Oopsy, something failed on our end, but fear not.\n" - "Please contact akrherz@iastate.edu and reference " - f"this unique identifier: {uid}\n" - "Or wait a day for daryl to review the web logs and fix " - "the bugs he wrote. What a life." - ) - if not routine: - # Nicely log things about this actual request - sys.stderr.write( - f"={uid} URL: {environ.get('REQUEST_URI')}\n" - ) - sys.stderr.write(errormsg) - else: - msg = errormsg - start_response( - f"{code} {HTTPStatus(code).phrase}", - [("Content-type", "text/plain")], - ) - return msg.encode("ascii", errors="replace") - start_time = datetime.now(timezone.utc) status_code = 500 try: - # mixed convers this to a regular dict - form = parse_formvars(environ).mixed() - form = clean_form(form) - if "help" in form: - yield _handle_help(start_response, **kwargs) - return - add_to_environ(environ, form, **kwargs) - if ip_is_throttled(environ, kwargs.get("ip_throttle_secs", 0)): - start_response( - "429 Too Many Requests", - [("Content-type", "text/plain")], - ) - yield b"Too many requests from your IP address, slow down." + short_circuit, payload = _iemapp_preflight( + environ, + start_response, + kwargs, + ip_throttle_secs, + ) + if short_circuit: + yield payload return res = _mcall( func, environ, start_response, - kwargs.get("memcachekey"), - kwargs.get("memcacheexpire", 3600), - kwargs.get("content_type", "application/json"), + memcachekey, + memcacheexpire, + content_type, ) # If res is a generator, we should yield from it here if inspect.isgenerator(res): yield from res # you know what assumptions do status_code = 200 - except (IncompleteWebRequest, NoDataFound) as exp: - # Intention is to tell the client that the server understood - # the request, but something is missing - status_code = 422 - res = _handle_exp(str(exp), routine=True, code=status_code) - except BadWebRequest as exp: - status_code = 422 - log_request(environ, multiplier=2) - res = _handle_exp(str(exp), code=status_code) - except NewDatabaseConnectionFailure as exp: - status_code = 503 - res = _handle_exp( - f"get_dbconn() failed with `{exp}`", - code=status_code, - ) - except Exception: - res = _handle_exp(traceback.format_exc()) - end_time = datetime.now(timezone.utc) - if kwargs.get("enable_telemetry", True) and not environ.get( - MEMCACHED_HIT, False - ): - write_telemetry( - TELEMETRY( - (end_time - start_time).total_seconds(), - status_code, - environ.get("REMOTE_ADDR"), - environ.get("SCRIPT_NAME"), - environ.get("REQUEST_URI"), - environ.get("HTTP_HOST"), - ) + except Exception as exp: + status_code, res = _iemapp_handle_exception( + environ, + start_response, + exp, ) - # Need to be careful here and ensure we are returning a list - # of bytes - if isinstance(res, str): - yield res.encode("utf-8") - return - if isinstance(res, bytes): - yield res - return - if isinstance(res, (tuple, list)): - for r in res: - yield r - return - yield from res + if enable_telemetry and not environ.get(MEMCACHED_HIT, False): + _iemapp_emit_telemetry(environ, start_time, status_code) + yield from _normalize_iemapp_response(res) return _wrapped diff --git a/tests/test_webutil.py b/tests/test_webutil.py index ead573894..35712d174 100644 --- a/tests/test_webutil.py +++ b/tests/test_webutil.py @@ -264,6 +264,40 @@ def application(environ, _start_response): assert res1 == res2 +def test_iemapp_telemetry_skipped_on_memcache_hit(): + """Test that telemetry is not written when response is memcache-backed.""" + + cache = {} + + class DummyMemcacheClient: + """Simple in-memory memcache stand-in for deterministic testing.""" + + def __init__(self, _server): + pass + + def get(self, key): + return cache.get(key) + + def set(self, key, value, expire=None): + cache[key] = value + + def close(self): + pass + + @iemapp(memcachekey="iem") + def application(_environ, start_response): + """Test.""" + start_response("200 OK", [("Content-type", "text/plain")]) + return b"Hello!" + + with mock.patch("pyiem.webutil.Client", DummyMemcacheClient): + with mock.patch("pyiem.webutil.write_telemetry") as write_mock: + c = Client(application) + assert c.get("/").status_code == 200 + assert c.get("/").status_code == 200 + assert write_mock.call_count == 1 + + def test_sts_ets_are_set(): """Test that we cross set things properly.""" From db3c9482b9eab23fae89f94b8700aa81751a1551 Mon Sep 17 00:00:00 2001 From: akrherz Date: Tue, 21 Apr 2026 19:26:36 -0500 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=8E=A8=20QoL=20for=20iemapp=20by=20im?= =?UTF-8?q?proving=20docs,=20exp=20handling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 2 ++ src/pyiem/webutil.py | 67 ++++++++++++++++++++++++++++++++++++------- tests/test_webutil.py | 47 ++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99500c25e..5dda6b901 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ All notable changes to this library are documented in this file. - Account for SPC PTS one-off with reversed and closed polygon. - Add `ip_throttle_secs` to `webutil.iemapp` to deal with IEM pain. +- Improve `iemapp` to better capture actual HTTP status_code and document + what happens during Exception to status_code mapping. - Prevent a GIGO on certain autoplot date fields. ### Bug Fixes diff --git a/src/pyiem/webutil.py b/src/pyiem/webutil.py index f6dc2db44..dced23215 100644 --- a/src/pyiem/webutil.py +++ b/src/pyiem/webutil.py @@ -618,6 +618,29 @@ def _iemapp_emit_telemetry( ) +def _parse_status_code(status: str) -> int | None: + """Parse integer HTTP status code from a WSGI status line.""" + # This could raise, but this should not be accounted for. + return int(status.split()[0]) + + +def _capture_start_response(start_response: Callable) -> tuple[Callable, dict]: + """Wrap start_response and capture if it was called and status code.""" + state = { + "started": False, + "status_code": None, + } + + def _wrapped_start_response(status, headers, exc_info=None): + state["started"] = True + state["status_code"] = _parse_status_code(status) + if exc_info is None: + return start_response(status, headers) + return start_response(status, headers, exc_info) + + return _wrapped_start_response, state + + def _iemapp_handle_exception( environ: dict, start_response: Callable, @@ -710,10 +733,13 @@ def _wrapped(environ, start_response): start_time = datetime.now(timezone.utc) status_code = 500 + wrapped_start_response, response_state = _capture_start_response( + start_response + ) try: short_circuit, payload = _iemapp_preflight( environ, - start_response, + wrapped_start_response, kwargs, ip_throttle_secs, ) @@ -723,24 +749,45 @@ def _wrapped(environ, start_response): res = _mcall( func, environ, - start_response, + wrapped_start_response, memcachekey, memcacheexpire, content_type, ) - # If res is a generator, we should yield from it here - if inspect.isgenerator(res): - yield from res # you know what assumptions do status_code = 200 + # Keep generator iteration in the try block so downstream + # iteration exceptions are mapped by our exception handler. + if inspect.isgenerator(res): + yield from _normalize_iemapp_response(res) + if enable_telemetry and not environ.get( + MEMCACHED_HIT, False + ): + _iemapp_emit_telemetry( + environ, start_time, status_code + ) + return except Exception as exp: - status_code, res = _iemapp_handle_exception( + if response_state["started"]: + # Once streaming has started, we cannot safely restart + # the response with a new status/body. + LOG.exception( + "iemapp: exception raised after start_response." + ) + res = [] + status_code = response_state["status_code"] or status_code + else: + status_code, res = _iemapp_handle_exception( + environ, + wrapped_start_response, + exp, + ) + if enable_telemetry and not environ.get(MEMCACHED_HIT, False): + _iemapp_emit_telemetry( environ, - start_response, - exp, + start_time, + response_state["status_code"] or status_code, ) - if enable_telemetry and not environ.get(MEMCACHED_HIT, False): - _iemapp_emit_telemetry(environ, start_time, status_code) yield from _normalize_iemapp_response(res) return _wrapped diff --git a/tests/test_webutil.py b/tests/test_webutil.py index 35712d174..e9e930f02 100644 --- a/tests/test_webutil.py +++ b/tests/test_webutil.py @@ -298,6 +298,53 @@ def application(_environ, start_response): assert write_mock.call_count == 1 +def test_iemapp_telemetry_uses_captured_start_response_status(): + """Test telemetry logs status emitted by downstream start_response.""" + + @iemapp() + def application(_environ, start_response): + """Test.""" + start_response("404 Not Found", [("Content-type", "text/plain")]) + return b"missing" + + with mock.patch("pyiem.webutil.write_telemetry") as write_mock: + c = Client(application) + resp = c.get("/") + assert resp.status_code == 404 + assert write_mock.call_count == 1 + assert write_mock.call_args[0][0].status_code == 404 + + +def test_iemapp_generator_exception_after_start_response(): + """Test post-start generator exceptions are logged without restart.""" + + start_response_calls = [] + + @iemapp() + def application(_environ, start_response): + """Test.""" + start_response("200 OK", [("Content-type", "text/plain")]) + yield b"first" + raise RuntimeError("boom") + + environ = { + "wsgi.input": mock.MagicMock(), + "QUERY_STRING": "", + "REQUEST_URI": "/", + "REMOTE_ADDR": "127.0.0.1", + } + + def sr(status, headers): + start_response_calls.append((status, headers)) + + with mock.patch("pyiem.webutil.LOG.exception") as mock_exception: + res = list(application(environ, sr)) + assert res == [b"first"] + assert len(start_response_calls) == 1 + assert start_response_calls[0][0] == "200 OK" + assert mock_exception.call_count == 1 + + def test_sts_ets_are_set(): """Test that we cross set things properly."""