Sage-Bionetworks-IT · ConsoleCatzirl · Jul 28, 2025 · Jul 16, 2025 · Jul 25, 2025 · brucehoff
@@ -2,4 +2,4 @@
 relative_files = True
 
 # Use 'source' instead of 'omit' in order to ignore 'tests/unit/__init__.py'
-source = mips_api
+source = mip_api
@@ -6,16 +6,6 @@ on:
   workflow_call:
 
 jobs:
-  pre-commit:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: 3.12
-      - uses: pre-commit/action@v3.0.0
-
   pytest:
     runs-on: ubuntu-latest
     steps:

@@ -0,0 +1,105 @@
+import json
+import logging
+
+from mip_api import chart, s3, ssm, upstream, util
+
+
+LOG = logging.getLogger(__name__)
+LOG.setLevel(logging.DEBUG)
+
+
+def lambda_handler(event, context):
+    """Entry Point for Lambda
+
+    Collect configuration from environment variables and query-string parameters,
+    determine data requested based on the API endpoint called, and finally
+    present the requested data in the desired format.
+
+    Note: The Python process will continue to run for the entire lifecycle of
+    the Lambda execution environment (15 minutes). Subsequent Lambda
+    runs will re-enter this function.
+
+    Parameters
+    ----------
+    event: dict, required
+        API Gateway Lambda Proxy Input Format
+
+        Event doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html#api-gateway-simple-proxy-for-lambda-input-format
+
+    context: object, required
+        Lambda Context runtime methods and attributes
+
+        Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html
+
+    Returns
+    ------
+    API Gateway Lambda Proxy Output Format: dict
+
+        Return doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html
+    """
+
+    # helper functions to encapsulate the body, headers, and status code
+    def _build_return(code, body):
+        return {
+            "statusCode": code,
+            "body": json.dumps(body, indent=2),
+        }
+
+    try:
+        # collect environment variables
+        mip_org = util.get_os_var("MipsOrg")
+        ssm_path = util.get_os_var("SsmPath")
+        s3_bucket = util.get_os_var("CacheBucket")
+        s3_path = util.get_os_var("CacheBucketPath")
+
+        code_other = util.get_os_var("OtherCode")
+        code_no_program = util.get_os_var("NoProgramCode")
+
+        api_routes = {
+            "ApiChartOfAccounts": util.get_os_var("ApiChartOfAccounts"),
+            "ApiValidTags": util.get_os_var("ApiValidTags"),
+        }
+
+        _to_omit = util.get_os_var("CodesToOmit")
+        omit_codes_list = util.parse_codes(_to_omit)
+
+        # get secure parameters
+        ssm_secrets = ssm.get_secrets(ssm_path)
+
+        # get chart of accounts from mip cloud
+        raw_chart = chart.get_chart(mip_org, ssm_secrets, s3_bucket, s3_path)
+        LOG.debug(f"Raw chart data: {raw_chart}")
+
+        # collect query-string parameters
+        params = {}
+        if "queryStringParameters" in event:
+            params = event["queryStringParameters"]
+            LOG.debug(f"Query-string parameters: {params}")
+
+        # parse the path and return appropriate data
+        if "path" in event:
+            event_path = event["path"]
+
+            # always process the chart of accounts
+            mip_chart = chart.process_chart(
+                params, raw_chart, omit_codes_list, code_other, code_no_program
+            )
+
+            if event_path == api_routes["ApiChartOfAccounts"]:
+                # conditionally limit the size of the output
+                return_chart = chart.limit_chart(params, mip_chart)
+                return _build_return(200, return_chart)
+
+            elif event_path == api_routes["ApiValidTags"]:
+                # build a list of strings from the processed dictionary
+                valid_tags = chart.list_tags(params, mip_chart)
+                return _build_return(200, valid_tags)
+
+            else:
+                return _build_return(404, {"error": "Invalid request path"})
+
+        return _build_return(400, {"error": f"Invalid event: No path found: {event}"})
+
+    except Exception as exc:
+        LOG.exception(exc)
+        return _build_return(500, {"error": str(exc)})
@@ -0,0 +1,168 @@
+import logging
+import re
+
+from mip_api import s3, upstream, util
+
+LOG = logging.getLogger(__name__)
+LOG.setLevel(logging.DEBUG)
+
+
+def get_chart(org_name, secrets, bucket, path):
+    """
+    Access the Chart of Accounts from MIP Cloud, and implement a write-through
+    cache of successful responses to tolerate long-term faults in the upstream
+    API.
+
+    A successful API response will be stored in S3 indefinitely, to be retrieved
+    and used in the case of an API failure.
+
+    The S3 bucket has versioning enabled for disaster recovery, but this means
+    that every PUT request will create a new S3 object. In order to minimize
+    the number of objects in the bucket, read the cache value on every run and
+    only update the S3 object if it changes.
+    """
+
+    # get the upstream API response
+    LOG.info("Read chart of accounts from upstream API")
+    upstream_dict = upstream.program_chart(org_name, secrets)
+    LOG.debug(f"Upstream API response: {upstream_dict}")
+
+    # always read cached value
+    LOG.info("Read cached chart of accounts from S3")
+    cache_dict = None
+    try:
+        cache_dict = s3.cache_read(bucket, path)
+        LOG.debug(f"Cached API response: {cache_dict}")
+    except Exception as exc:
+        LOG.exception("S3 read failure")
+
+    if upstream_dict:
+        # if we received a non-empty response from the upstream API, compare it
+        # to our cached response and update the S3 write-through cache if needed
+        if upstream_dict == cache_dict:
+            LOG.debug("No change in chart of accounts")
+        else:
+            # store write-through cache
+            LOG.info("Write updated chart of accounts to S3")
+            try:
+                s3.cache_write(upstream_dict, bucket, path)
+            except Exception as exc:
+                LOG.exception("S3 write failure")
+        coa_dict = upstream_dict
+    else:
+        # no response (or an empty response) from the upstream API,
+        # rely on our response cached in S3.
+        coa_dict = cache_dict
+
+    if not coa_dict:
+        # make sure we don't return an empty value
+        raise ValueError("No valid chart of accounts found")
+
+    return coa_dict
+
+
+def process_chart(params, chart_dict, omit_list, other, no_program):
+    """
+    Process chart of accounts to remove unneeded programs,
+    and inject some extra (meta) programs.
+
+    5-digit codes are inactive and should be ignored in most cases.
+    8-digit codes are active, but only the first 6 digits are significant,
+      i.e. 12345601 and 12345602 should be deduplicated as 123456.
+    """
+
+    # deduplicate on shortened numeric codes
+    # pre-populate with codes to omit to short-circuit their processing
+    found_codes = []
+    found_codes.extend(omit_list)
+
+    # output object
+    out_chart = {}
+
+    # whether to filter out inactive codes
+    code_len = 5
+    if util.param_inactive_bool(params):
+        code_len = 6
+
+    # optionally move this list of codes to the top of the output
+    priority_codes = util.param_priority_list(params)
+
+    # add short codes
+    for code, _name in chart_dict.items():
+        if len(code) >= code_len:
+            # truncate active codes to the first 6 significant digits
+            short = code[:6]
+            # enforce AWS tags limitations
+            # https://docs.aws.amazon.com/tag-editor/latest/userguide/best-practices-and-strats.html
+            # enforce removing special characters globally for consistency,
+            # only enforce string limit when listing tag values because the string size will change.
+            regex = r"[^\d\w\s.:/=+\-@]+"
+            name = re.sub(regex, "", _name)
+
+            if short in found_codes:
+                LOG.info(f"Code {short} has already been processed")
+                continue
+
+            if priority_codes is not None:
+                if short in priority_codes:
+                    out_chart = util.dict_prepend(out_chart, short, name)
+                else:
+                    out_chart[short] = name
+            else:
+                out_chart[short] = name
+            found_codes.append(short)
+
+    # inject "other" code
+    if util.param_other_bool(params):
+        out_chart = util.dict_prepend(out_chart, other, "Other")
+
+    # inject "no program" code
+    if util.param_no_program_bool(params):
+        out_chart = util.dict_prepend(out_chart, no_program, "No Program")
+
+    return out_chart
+
+
+def limit_chart(params, chart_dict):
+    """
+    Optionally limit the size of the chart to the given number of high-
+    priority items based on a query-string parameter.
+    """
+
+    # if a 'limit' query-string parameter is defined, "slice" the dictionary
+    limit = util.param_limit_int(params)
+    if limit > 0:
+        # https://stackoverflow.com/a/66535220/1742875
+        short_dict = dict(list(chart_dict.items())[:limit])
+        return short_dict
+
+    return chart_dict
+
+
+def list_tags(params, chart_dict):
+    """
+    Generate a list of valid AWS tags. Only active codes are listed.
+
+    The string format is `{Program Name} / {Program Code}`.
+
+    Returns
+        A list of strings.
+    """
+
+    tags = []
+
+    # build tags from chart of accounts
+    for code, name in chart_dict.items():
+        # enforce AWS tags limitations
+        # https://docs.aws.amazon.com/tag-editor/latest/userguide/best-practices-and-strats.html
+        # max tag value length is 256, truncate
+        # only enforce when listing tag values
+        tag = f"{name[:245]} / {code[:6]}"
+        tags.append(tag)
+
+    limit = util.param_limit_int(params)
+    if limit > 0:
+        LOG.info(f"limiting output to {limit} values")
+        return tags[0:limit]
+    else:
+        return tags
@@ -0,0 +1,28 @@
+import json
+import boto3
+
+s3_client = None
+
+
+def cache_read(bucket, path):
+    """
+    Read MIP response from S3 cache object
+    """
+    global s3_client
+    if s3_client is None:
+        s3_client = boto3.client("s3")
+
+    data = s3_client.get_object(Bucket=bucket, Key=path)
+    return json.loads(data["Body"].read())
+
+
+def cache_write(data, bucket, path):
+    """
+    Write MIP response to S3 cache object
+    """
+    global s3_client
+    if s3_client is None:
+        s3_client = boto3.client("s3")
+
+    body = json.dumps(data)
+    s3_client.put_object(Bucket=bucket, Key=path, Body=body)
@@ -0,0 +1,51 @@
+import logging
+
+import boto3
+
+LOG = logging.getLogger(__name__)
+LOG.setLevel(logging.DEBUG)
+
+# This is global so that it can be stubbed in test.
+# Because it's global, its value will be retained
+# in the lambda environment and re-used on warm runs.
+ssm_client = None
+
+
+def get_secrets(ssm_path):
+    """Collect secure parameters from SSM"""
+
+    # create boto client
+    global ssm_client
+    if ssm_client is None:
+        ssm_client = boto3.client("ssm")
+
+    # object to return
+    ssm_secrets = {}
+
+    # get secret parameters from ssm
+    params = ssm_client.get_parameters_by_path(
+        Path=ssm_path,
+        Recursive=True,
+        WithDecryption=True,
+    )
+    if "Parameters" in params:
+        for p in params["Parameters"]:
+            # strip leading path plus / char
+            if len(p["Name"]) > len(ssm_path):
+                name = p["Name"][len(ssm_path) + 1 :]
+            else:
+                name = p["Name"]
+            ssm_secrets[name] = p["Value"]
+            LOG.info(f"Loaded secret: {name}")
+    else:
+        msg = "Invalid response from SSM client"
+        LOG.error(msg)
+        raise Exception(msg)
+
+    for reqkey in ["user", "pass"]:
+        if reqkey not in ssm_secrets:
+            msg = f"Missing required secure parameter: {reqkey}"
+            LOG.error(msg)
+            raise Exception(msg)
+
+    return ssm_secrets