Skip to content

Commit 58da628

Browse files
authored
Merge pull request #1201 from akrherz/http429
Improvements to iemapp
2 parents 6acdc6e + db3c948 commit 58da628

3 files changed

Lines changed: 297 additions & 93 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ All notable changes to this library are documented in this file.
1414

1515
- Account for SPC PTS one-off with reversed and closed polygon.
1616
- Add `ip_throttle_secs` to `webutil.iemapp` to deal with IEM pain.
17+
- Improve `iemapp` to better capture actual HTTP status_code and document
18+
what happens during Exception to status_code mapping.
1719
- Prevent a GIGO on certain autoplot date fields.
1820

1921
### Bug Fixes

src/pyiem/webutil.py

Lines changed: 214 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from collections.abc import Callable
1515
from datetime import datetime, timezone
1616
from http import HTTPStatus
17+
from typing import Any, Iterator
1718
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
1819

1920
from docutils.core import publish_string
@@ -464,18 +465,11 @@ def _mcall(
464465
func: Callable,
465466
environ: dict,
466467
start_response: Callable,
467-
ip_throttle_secs: float | Callable,
468468
memcachekey: str | Callable | None,
469469
expire: int | Callable,
470470
content_type: str | Callable,
471471
):
472472
"""Call the function with memcachekey handling."""
473-
if ip_is_throttled(environ, ip_throttle_secs):
474-
start_response(
475-
"429 Too Many Requests",
476-
[("Content-type", "text/plain")],
477-
)
478-
return b"Too many requests from your IP address, slow down."
479473
if memcachekey is None:
480474
return func(environ, start_response)
481475
key = memcachekey if isinstance(memcachekey, str) else memcachekey(environ)
@@ -534,6 +528,158 @@ def ip_is_throttled(environ: dict, throttle_secs: float | Callable) -> bool:
534528
return False
535529

536530

531+
def _iemapp_error_response(
532+
environ: dict,
533+
start_response: Callable,
534+
errormsg: str,
535+
routine: bool = False,
536+
code: int = 500,
537+
) -> bytes:
538+
"""Build an iemapp text/plain error response payload."""
539+
# generate a random string so we can track this request
540+
uid = "".join(
541+
random.choice(string.ascii_uppercase + string.digits)
542+
for _ in range(12)
543+
)
544+
msg = (
545+
"Oopsy, something failed on our end, but fear not.\n"
546+
"Please contact akrherz@iastate.edu and reference "
547+
f"this unique identifier: {uid}\n"
548+
"Or wait a day for daryl to review the web logs and fix "
549+
"the bugs he wrote. What a life."
550+
)
551+
if not routine:
552+
# Nicely log things about this actual request
553+
sys.stderr.write(f"={uid} URL: {environ.get('REQUEST_URI')}\n")
554+
sys.stderr.write(errormsg)
555+
else:
556+
msg = errormsg
557+
start_response(
558+
f"{code} {HTTPStatus(code).phrase}",
559+
[("Content-type", "text/plain")],
560+
)
561+
return msg.encode("ascii", errors="replace")
562+
563+
564+
def _iemapp_preflight(
565+
environ: dict,
566+
start_response: Callable,
567+
kwargs: dict[str, Any],
568+
ip_throttle_secs: float | Callable,
569+
) -> tuple[bool, bytes | None]:
570+
"""Run request preflight checks and return early payload when needed."""
571+
# mixed converts this to a regular dict
572+
form = parse_formvars(environ).mixed()
573+
form = clean_form(form)
574+
if "help" in form:
575+
return True, _handle_help(start_response, **kwargs)
576+
add_to_environ(environ, form, **kwargs)
577+
if ip_is_throttled(environ, ip_throttle_secs):
578+
start_response(
579+
"429 Too Many Requests",
580+
[("Content-type", "text/plain")],
581+
)
582+
return True, b"Too many requests from your IP address, slow down."
583+
return False, None
584+
585+
586+
def _normalize_iemapp_response(res: Any) -> Iterator[bytes]:
587+
"""Yield response chunks in a uniform iterable form."""
588+
# Need to be careful here and ensure we are returning a list of bytes.
589+
if isinstance(res, str):
590+
yield res.encode("utf-8")
591+
return
592+
if isinstance(res, bytes):
593+
yield res
594+
return
595+
if isinstance(res, (tuple, list)):
596+
for chunk in res:
597+
yield chunk
598+
return
599+
yield from res
600+
601+
602+
def _iemapp_emit_telemetry(
603+
environ: dict,
604+
start_time: datetime,
605+
status_code: int,
606+
) -> None:
607+
"""Emit telemetry for an iemapp request."""
608+
end_time = datetime.now(timezone.utc)
609+
write_telemetry(
610+
TELEMETRY(
611+
(end_time - start_time).total_seconds(),
612+
status_code,
613+
environ.get("REMOTE_ADDR"),
614+
environ.get("SCRIPT_NAME"),
615+
environ.get("REQUEST_URI"),
616+
environ.get("HTTP_HOST"),
617+
)
618+
)
619+
620+
621+
def _parse_status_code(status: str) -> int | None:
622+
"""Parse integer HTTP status code from a WSGI status line."""
623+
# This could raise, but this should not be accounted for.
624+
return int(status.split()[0])
625+
626+
627+
def _capture_start_response(start_response: Callable) -> tuple[Callable, dict]:
628+
"""Wrap start_response and capture if it was called and status code."""
629+
state = {
630+
"started": False,
631+
"status_code": None,
632+
}
633+
634+
def _wrapped_start_response(status, headers, exc_info=None):
635+
state["started"] = True
636+
state["status_code"] = _parse_status_code(status)
637+
if exc_info is None:
638+
return start_response(status, headers)
639+
return start_response(status, headers, exc_info)
640+
641+
return _wrapped_start_response, state
642+
643+
644+
def _iemapp_handle_exception(
645+
environ: dict,
646+
start_response: Callable,
647+
exp: Exception,
648+
) -> tuple[int, bytes]:
649+
"""Map exceptions to status code and user-facing payload."""
650+
if isinstance(exp, (IncompleteWebRequest, NoDataFound)):
651+
status_code = 422
652+
return status_code, _iemapp_error_response(
653+
environ,
654+
start_response,
655+
str(exp),
656+
routine=True,
657+
code=status_code,
658+
)
659+
if isinstance(exp, BadWebRequest):
660+
status_code = 422
661+
log_request(environ, multiplier=2)
662+
return status_code, _iemapp_error_response(
663+
environ,
664+
start_response,
665+
str(exp),
666+
code=status_code,
667+
)
668+
if isinstance(exp, NewDatabaseConnectionFailure):
669+
status_code = 503
670+
return status_code, _iemapp_error_response(
671+
environ,
672+
start_response,
673+
f"get_dbconn() failed with `{exp}`",
674+
code=status_code,
675+
)
676+
return 500, _iemapp_error_response(
677+
environ,
678+
start_response,
679+
traceback.format_exc(),
680+
)
681+
682+
537683
def iemapp(**kwargs):
538684
"""Attempt to do all kinds of nice things for the user and the developer.
539685
@@ -562,112 +708,87 @@ def iemapp(**kwargs):
562708
3) If the wrapped function returns a str or bytes, it will be encoded
563709
and made into a list for the WSGI response.
564710
565-
Notes
566-
-----
567-
- raise `NoDataFound` to have a nice error message generated
711+
Exception Raising
712+
-----------------
713+
714+
The following Exception types raised within the mod_wsgi wrapped code will
715+
trigger the following HTTP status codes sent to the client.
716+
717+
- `NoDataFound` or `IncompleteWebRequest` -> 422 Unprocessable Entity
718+
- `BadWebRequest` -> 422 Unprocessable Entity (also db logged...)
719+
- `NewDatabaseConnectionFailure` -> 503 Service Unavailable
720+
- Any other Exception -> 500 Internal Server Error
568721
"""
722+
enable_telemetry = kwargs.get("enable_telemetry", True)
723+
ip_throttle_secs = kwargs.get("ip_throttle_secs", 0)
724+
memcachekey = kwargs.get("memcachekey")
725+
memcacheexpire = kwargs.get("memcacheexpire", 3600)
726+
content_type = kwargs.get("content_type", "application/json")
569727

570728
def _decorator(func):
571729
"""Decorate a function to catch exceptions and do nice things."""
572730

573731
def _wrapped(environ, start_response):
574732
"""Decorate function."""
575733

576-
def _handle_exp(errormsg, routine=False, code=500):
577-
# generate a random string so we can track this request
578-
uid = "".join(
579-
random.choice(string.ascii_uppercase + string.digits)
580-
for _ in range(12)
581-
)
582-
msg = (
583-
"Oopsy, something failed on our end, but fear not.\n"
584-
"Please contact akrherz@iastate.edu and reference "
585-
f"this unique identifier: {uid}\n"
586-
"Or wait a day for daryl to review the web logs and fix "
587-
"the bugs he wrote. What a life."
588-
)
589-
if not routine:
590-
# Nicely log things about this actual request
591-
sys.stderr.write(
592-
f"={uid} URL: {environ.get('REQUEST_URI')}\n"
593-
)
594-
sys.stderr.write(errormsg)
595-
else:
596-
msg = errormsg
597-
start_response(
598-
f"{code} {HTTPStatus(code).phrase}",
599-
[("Content-type", "text/plain")],
600-
)
601-
return msg.encode("ascii", errors="replace")
602-
603734
start_time = datetime.now(timezone.utc)
604735
status_code = 500
736+
wrapped_start_response, response_state = _capture_start_response(
737+
start_response
738+
)
605739
try:
606-
# mixed convers this to a regular dict
607-
form = parse_formvars(environ).mixed()
608-
form = clean_form(form)
609-
if "help" in form:
610-
yield _handle_help(start_response, **kwargs)
740+
short_circuit, payload = _iemapp_preflight(
741+
environ,
742+
wrapped_start_response,
743+
kwargs,
744+
ip_throttle_secs,
745+
)
746+
if short_circuit:
747+
yield payload
611748
return
612-
add_to_environ(environ, form, **kwargs)
613749
res = _mcall(
614750
func,
615751
environ,
616-
start_response,
617-
kwargs.get("ip_throttle_secs", 0),
618-
kwargs.get("memcachekey"),
619-
kwargs.get("memcacheexpire", 3600),
620-
kwargs.get("content_type", "application/json"),
752+
wrapped_start_response,
753+
memcachekey,
754+
memcacheexpire,
755+
content_type,
621756
)
622-
# If res is a generator, we should yield from it here
623-
if inspect.isgenerator(res):
624-
yield from res
625757
# you know what assumptions do
626758
status_code = 200
627-
except (IncompleteWebRequest, NoDataFound) as exp:
628-
# Intention is to tell the client that the server understood
629-
# the request, but something is missing
630-
status_code = 422
631-
res = _handle_exp(str(exp), routine=True, code=status_code)
632-
except BadWebRequest as exp:
633-
status_code = 422
634-
log_request(environ, multiplier=2)
635-
res = _handle_exp(str(exp), code=status_code)
636-
except NewDatabaseConnectionFailure as exp:
637-
status_code = 503
638-
res = _handle_exp(
639-
f"get_dbconn() failed with `{exp}`",
640-
code=status_code,
641-
)
642-
except Exception:
643-
res = _handle_exp(traceback.format_exc())
644-
end_time = datetime.now(timezone.utc)
645-
if kwargs.get("enable_telemetry", True) and not environ.get(
646-
MEMCACHED_HIT, False
647-
):
648-
write_telemetry(
649-
TELEMETRY(
650-
(end_time - start_time).total_seconds(),
651-
status_code,
652-
environ.get("REMOTE_ADDR"),
653-
environ.get("SCRIPT_NAME"),
654-
environ.get("REQUEST_URI"),
655-
environ.get("HTTP_HOST"),
759+
# Keep generator iteration in the try block so downstream
760+
# iteration exceptions are mapped by our exception handler.
761+
if inspect.isgenerator(res):
762+
yield from _normalize_iemapp_response(res)
763+
if enable_telemetry and not environ.get(
764+
MEMCACHED_HIT, False
765+
):
766+
_iemapp_emit_telemetry(
767+
environ, start_time, status_code
768+
)
769+
return
770+
except Exception as exp:
771+
if response_state["started"]:
772+
# Once streaming has started, we cannot safely restart
773+
# the response with a new status/body.
774+
LOG.exception(
775+
"iemapp: exception raised after start_response."
656776
)
777+
res = []
778+
status_code = response_state["status_code"] or status_code
779+
else:
780+
status_code, res = _iemapp_handle_exception(
781+
environ,
782+
wrapped_start_response,
783+
exp,
784+
)
785+
if enable_telemetry and not environ.get(MEMCACHED_HIT, False):
786+
_iemapp_emit_telemetry(
787+
environ,
788+
start_time,
789+
response_state["status_code"] or status_code,
657790
)
658-
# Need to be careful here and ensure we are returning a list
659-
# of bytes
660-
if isinstance(res, str):
661-
yield res.encode("utf-8")
662-
return
663-
if isinstance(res, bytes):
664-
yield res
665-
return
666-
if isinstance(res, (tuple, list)):
667-
for r in res:
668-
yield r
669-
return
670-
yield from res
791+
yield from _normalize_iemapp_response(res)
671792

672793
return _wrapped
673794

0 commit comments

Comments
 (0)