|
14 | 14 | from collections.abc import Callable |
15 | 15 | from datetime import datetime, timezone |
16 | 16 | from http import HTTPStatus |
| 17 | +from typing import Any, Iterator |
17 | 18 | from zoneinfo import ZoneInfo, ZoneInfoNotFoundError |
18 | 19 |
|
19 | 20 | from docutils.core import publish_string |
@@ -464,18 +465,11 @@ def _mcall( |
464 | 465 | func: Callable, |
465 | 466 | environ: dict, |
466 | 467 | start_response: Callable, |
467 | | - ip_throttle_secs: float | Callable, |
468 | 468 | memcachekey: str | Callable | None, |
469 | 469 | expire: int | Callable, |
470 | 470 | content_type: str | Callable, |
471 | 471 | ): |
472 | 472 | """Call the function with memcachekey handling.""" |
473 | | - if ip_is_throttled(environ, ip_throttle_secs): |
474 | | - start_response( |
475 | | - "429 Too Many Requests", |
476 | | - [("Content-type", "text/plain")], |
477 | | - ) |
478 | | - return b"Too many requests from your IP address, slow down." |
479 | 473 | if memcachekey is None: |
480 | 474 | return func(environ, start_response) |
481 | 475 | key = memcachekey if isinstance(memcachekey, str) else memcachekey(environ) |
@@ -534,6 +528,158 @@ def ip_is_throttled(environ: dict, throttle_secs: float | Callable) -> bool: |
534 | 528 | return False |
535 | 529 |
|
536 | 530 |
|
| 531 | +def _iemapp_error_response( |
| 532 | + environ: dict, |
| 533 | + start_response: Callable, |
| 534 | + errormsg: str, |
| 535 | + routine: bool = False, |
| 536 | + code: int = 500, |
| 537 | +) -> bytes: |
| 538 | + """Build an iemapp text/plain error response payload.""" |
| 539 | + # generate a random string so we can track this request |
| 540 | + uid = "".join( |
| 541 | + random.choice(string.ascii_uppercase + string.digits) |
| 542 | + for _ in range(12) |
| 543 | + ) |
| 544 | + msg = ( |
| 545 | + "Oopsy, something failed on our end, but fear not.\n" |
| 546 | + "Please contact akrherz@iastate.edu and reference " |
| 547 | + f"this unique identifier: {uid}\n" |
| 548 | + "Or wait a day for daryl to review the web logs and fix " |
| 549 | + "the bugs he wrote. What a life." |
| 550 | + ) |
| 551 | + if not routine: |
| 552 | + # Nicely log things about this actual request |
| 553 | + sys.stderr.write(f"={uid} URL: {environ.get('REQUEST_URI')}\n") |
| 554 | + sys.stderr.write(errormsg) |
| 555 | + else: |
| 556 | + msg = errormsg |
| 557 | + start_response( |
| 558 | + f"{code} {HTTPStatus(code).phrase}", |
| 559 | + [("Content-type", "text/plain")], |
| 560 | + ) |
| 561 | + return msg.encode("ascii", errors="replace") |
| 562 | + |
| 563 | + |
| 564 | +def _iemapp_preflight( |
| 565 | + environ: dict, |
| 566 | + start_response: Callable, |
| 567 | + kwargs: dict[str, Any], |
| 568 | + ip_throttle_secs: float | Callable, |
| 569 | +) -> tuple[bool, bytes | None]: |
| 570 | + """Run request preflight checks and return early payload when needed.""" |
| 571 | + # mixed converts this to a regular dict |
| 572 | + form = parse_formvars(environ).mixed() |
| 573 | + form = clean_form(form) |
| 574 | + if "help" in form: |
| 575 | + return True, _handle_help(start_response, **kwargs) |
| 576 | + add_to_environ(environ, form, **kwargs) |
| 577 | + if ip_is_throttled(environ, ip_throttle_secs): |
| 578 | + start_response( |
| 579 | + "429 Too Many Requests", |
| 580 | + [("Content-type", "text/plain")], |
| 581 | + ) |
| 582 | + return True, b"Too many requests from your IP address, slow down." |
| 583 | + return False, None |
| 584 | + |
| 585 | + |
| 586 | +def _normalize_iemapp_response(res: Any) -> Iterator[bytes]: |
| 587 | + """Yield response chunks in a uniform iterable form.""" |
| 588 | + # Need to be careful here and ensure we are returning a list of bytes. |
| 589 | + if isinstance(res, str): |
| 590 | + yield res.encode("utf-8") |
| 591 | + return |
| 592 | + if isinstance(res, bytes): |
| 593 | + yield res |
| 594 | + return |
| 595 | + if isinstance(res, (tuple, list)): |
| 596 | + for chunk in res: |
| 597 | + yield chunk |
| 598 | + return |
| 599 | + yield from res |
| 600 | + |
| 601 | + |
| 602 | +def _iemapp_emit_telemetry( |
| 603 | + environ: dict, |
| 604 | + start_time: datetime, |
| 605 | + status_code: int, |
| 606 | +) -> None: |
| 607 | + """Emit telemetry for an iemapp request.""" |
| 608 | + end_time = datetime.now(timezone.utc) |
| 609 | + write_telemetry( |
| 610 | + TELEMETRY( |
| 611 | + (end_time - start_time).total_seconds(), |
| 612 | + status_code, |
| 613 | + environ.get("REMOTE_ADDR"), |
| 614 | + environ.get("SCRIPT_NAME"), |
| 615 | + environ.get("REQUEST_URI"), |
| 616 | + environ.get("HTTP_HOST"), |
| 617 | + ) |
| 618 | + ) |
| 619 | + |
| 620 | + |
| 621 | +def _parse_status_code(status: str) -> int | None: |
| 622 | + """Parse integer HTTP status code from a WSGI status line.""" |
| 623 | + # This could raise, but this should not be accounted for. |
| 624 | + return int(status.split()[0]) |
| 625 | + |
| 626 | + |
| 627 | +def _capture_start_response(start_response: Callable) -> tuple[Callable, dict]: |
| 628 | + """Wrap start_response and capture if it was called and status code.""" |
| 629 | + state = { |
| 630 | + "started": False, |
| 631 | + "status_code": None, |
| 632 | + } |
| 633 | + |
| 634 | + def _wrapped_start_response(status, headers, exc_info=None): |
| 635 | + state["started"] = True |
| 636 | + state["status_code"] = _parse_status_code(status) |
| 637 | + if exc_info is None: |
| 638 | + return start_response(status, headers) |
| 639 | + return start_response(status, headers, exc_info) |
| 640 | + |
| 641 | + return _wrapped_start_response, state |
| 642 | + |
| 643 | + |
| 644 | +def _iemapp_handle_exception( |
| 645 | + environ: dict, |
| 646 | + start_response: Callable, |
| 647 | + exp: Exception, |
| 648 | +) -> tuple[int, bytes]: |
| 649 | + """Map exceptions to status code and user-facing payload.""" |
| 650 | + if isinstance(exp, (IncompleteWebRequest, NoDataFound)): |
| 651 | + status_code = 422 |
| 652 | + return status_code, _iemapp_error_response( |
| 653 | + environ, |
| 654 | + start_response, |
| 655 | + str(exp), |
| 656 | + routine=True, |
| 657 | + code=status_code, |
| 658 | + ) |
| 659 | + if isinstance(exp, BadWebRequest): |
| 660 | + status_code = 422 |
| 661 | + log_request(environ, multiplier=2) |
| 662 | + return status_code, _iemapp_error_response( |
| 663 | + environ, |
| 664 | + start_response, |
| 665 | + str(exp), |
| 666 | + code=status_code, |
| 667 | + ) |
| 668 | + if isinstance(exp, NewDatabaseConnectionFailure): |
| 669 | + status_code = 503 |
| 670 | + return status_code, _iemapp_error_response( |
| 671 | + environ, |
| 672 | + start_response, |
| 673 | + f"get_dbconn() failed with `{exp}`", |
| 674 | + code=status_code, |
| 675 | + ) |
| 676 | + return 500, _iemapp_error_response( |
| 677 | + environ, |
| 678 | + start_response, |
| 679 | + traceback.format_exc(), |
| 680 | + ) |
| 681 | + |
| 682 | + |
537 | 683 | def iemapp(**kwargs): |
538 | 684 | """Attempt to do all kinds of nice things for the user and the developer. |
539 | 685 |
|
@@ -562,112 +708,87 @@ def iemapp(**kwargs): |
562 | 708 | 3) If the wrapped function returns a str or bytes, it will be encoded |
563 | 709 | and made into a list for the WSGI response. |
564 | 710 |
|
565 | | - Notes |
566 | | - ----- |
567 | | - - raise `NoDataFound` to have a nice error message generated |
| 711 | + Exception Raising |
| 712 | + ----------------- |
| 713 | +
|
| 714 | + The following Exception types raised within the mod_wsgi wrapped code will |
| 715 | + trigger the following HTTP status codes sent to the client. |
| 716 | +
|
| 717 | + - `NoDataFound` or `IncompleteWebRequest` -> 422 Unprocessable Entity |
| 718 | + - `BadWebRequest` -> 422 Unprocessable Entity (also db logged...) |
| 719 | + - `NewDatabaseConnectionFailure` -> 503 Service Unavailable |
| 720 | + - Any other Exception -> 500 Internal Server Error |
568 | 721 | """ |
| 722 | + enable_telemetry = kwargs.get("enable_telemetry", True) |
| 723 | + ip_throttle_secs = kwargs.get("ip_throttle_secs", 0) |
| 724 | + memcachekey = kwargs.get("memcachekey") |
| 725 | + memcacheexpire = kwargs.get("memcacheexpire", 3600) |
| 726 | + content_type = kwargs.get("content_type", "application/json") |
569 | 727 |
|
570 | 728 | def _decorator(func): |
571 | 729 | """Decorate a function to catch exceptions and do nice things.""" |
572 | 730 |
|
573 | 731 | def _wrapped(environ, start_response): |
574 | 732 | """Decorate function.""" |
575 | 733 |
|
576 | | - def _handle_exp(errormsg, routine=False, code=500): |
577 | | - # generate a random string so we can track this request |
578 | | - uid = "".join( |
579 | | - random.choice(string.ascii_uppercase + string.digits) |
580 | | - for _ in range(12) |
581 | | - ) |
582 | | - msg = ( |
583 | | - "Oopsy, something failed on our end, but fear not.\n" |
584 | | - "Please contact akrherz@iastate.edu and reference " |
585 | | - f"this unique identifier: {uid}\n" |
586 | | - "Or wait a day for daryl to review the web logs and fix " |
587 | | - "the bugs he wrote. What a life." |
588 | | - ) |
589 | | - if not routine: |
590 | | - # Nicely log things about this actual request |
591 | | - sys.stderr.write( |
592 | | - f"={uid} URL: {environ.get('REQUEST_URI')}\n" |
593 | | - ) |
594 | | - sys.stderr.write(errormsg) |
595 | | - else: |
596 | | - msg = errormsg |
597 | | - start_response( |
598 | | - f"{code} {HTTPStatus(code).phrase}", |
599 | | - [("Content-type", "text/plain")], |
600 | | - ) |
601 | | - return msg.encode("ascii", errors="replace") |
602 | | - |
603 | 734 | start_time = datetime.now(timezone.utc) |
604 | 735 | status_code = 500 |
| 736 | + wrapped_start_response, response_state = _capture_start_response( |
| 737 | + start_response |
| 738 | + ) |
605 | 739 | try: |
606 | | - # mixed convers this to a regular dict |
607 | | - form = parse_formvars(environ).mixed() |
608 | | - form = clean_form(form) |
609 | | - if "help" in form: |
610 | | - yield _handle_help(start_response, **kwargs) |
| 740 | + short_circuit, payload = _iemapp_preflight( |
| 741 | + environ, |
| 742 | + wrapped_start_response, |
| 743 | + kwargs, |
| 744 | + ip_throttle_secs, |
| 745 | + ) |
| 746 | + if short_circuit: |
| 747 | + yield payload |
611 | 748 | return |
612 | | - add_to_environ(environ, form, **kwargs) |
613 | 749 | res = _mcall( |
614 | 750 | func, |
615 | 751 | environ, |
616 | | - start_response, |
617 | | - kwargs.get("ip_throttle_secs", 0), |
618 | | - kwargs.get("memcachekey"), |
619 | | - kwargs.get("memcacheexpire", 3600), |
620 | | - kwargs.get("content_type", "application/json"), |
| 752 | + wrapped_start_response, |
| 753 | + memcachekey, |
| 754 | + memcacheexpire, |
| 755 | + content_type, |
621 | 756 | ) |
622 | | - # If res is a generator, we should yield from it here |
623 | | - if inspect.isgenerator(res): |
624 | | - yield from res |
625 | 757 | # you know what assumptions do |
626 | 758 | status_code = 200 |
627 | | - except (IncompleteWebRequest, NoDataFound) as exp: |
628 | | - # Intention is to tell the client that the server understood |
629 | | - # the request, but something is missing |
630 | | - status_code = 422 |
631 | | - res = _handle_exp(str(exp), routine=True, code=status_code) |
632 | | - except BadWebRequest as exp: |
633 | | - status_code = 422 |
634 | | - log_request(environ, multiplier=2) |
635 | | - res = _handle_exp(str(exp), code=status_code) |
636 | | - except NewDatabaseConnectionFailure as exp: |
637 | | - status_code = 503 |
638 | | - res = _handle_exp( |
639 | | - f"get_dbconn() failed with `{exp}`", |
640 | | - code=status_code, |
641 | | - ) |
642 | | - except Exception: |
643 | | - res = _handle_exp(traceback.format_exc()) |
644 | | - end_time = datetime.now(timezone.utc) |
645 | | - if kwargs.get("enable_telemetry", True) and not environ.get( |
646 | | - MEMCACHED_HIT, False |
647 | | - ): |
648 | | - write_telemetry( |
649 | | - TELEMETRY( |
650 | | - (end_time - start_time).total_seconds(), |
651 | | - status_code, |
652 | | - environ.get("REMOTE_ADDR"), |
653 | | - environ.get("SCRIPT_NAME"), |
654 | | - environ.get("REQUEST_URI"), |
655 | | - environ.get("HTTP_HOST"), |
| 759 | + # Keep generator iteration in the try block so downstream |
| 760 | + # iteration exceptions are mapped by our exception handler. |
| 761 | + if inspect.isgenerator(res): |
| 762 | + yield from _normalize_iemapp_response(res) |
| 763 | + if enable_telemetry and not environ.get( |
| 764 | + MEMCACHED_HIT, False |
| 765 | + ): |
| 766 | + _iemapp_emit_telemetry( |
| 767 | + environ, start_time, status_code |
| 768 | + ) |
| 769 | + return |
| 770 | + except Exception as exp: |
| 771 | + if response_state["started"]: |
| 772 | + # Once streaming has started, we cannot safely restart |
| 773 | + # the response with a new status/body. |
| 774 | + LOG.exception( |
| 775 | + "iemapp: exception raised after start_response." |
656 | 776 | ) |
| 777 | + res = [] |
| 778 | + status_code = response_state["status_code"] or status_code |
| 779 | + else: |
| 780 | + status_code, res = _iemapp_handle_exception( |
| 781 | + environ, |
| 782 | + wrapped_start_response, |
| 783 | + exp, |
| 784 | + ) |
| 785 | + if enable_telemetry and not environ.get(MEMCACHED_HIT, False): |
| 786 | + _iemapp_emit_telemetry( |
| 787 | + environ, |
| 788 | + start_time, |
| 789 | + response_state["status_code"] or status_code, |
657 | 790 | ) |
658 | | - # Need to be careful here and ensure we are returning a list |
659 | | - # of bytes |
660 | | - if isinstance(res, str): |
661 | | - yield res.encode("utf-8") |
662 | | - return |
663 | | - if isinstance(res, bytes): |
664 | | - yield res |
665 | | - return |
666 | | - if isinstance(res, (tuple, list)): |
667 | | - for r in res: |
668 | | - yield r |
669 | | - return |
670 | | - yield from res |
| 791 | + yield from _normalize_iemapp_response(res) |
671 | 792 |
|
672 | 793 | return _wrapped |
673 | 794 |
|
|
0 commit comments