-
Notifications
You must be signed in to change notification settings - Fork 3
[IT-4485] Refactor to prep for adding new endpoint #52
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,105 @@ | ||
| import json | ||
| import logging | ||
|
|
||
| from mip_api import chart, s3, ssm, upstream, util | ||
|
|
||
|
|
||
| LOG = logging.getLogger(__name__) | ||
| LOG.setLevel(logging.DEBUG) | ||
|
|
||
|
|
||
| def lambda_handler(event, context): | ||
| """Entry Point for Lambda | ||
|
|
||
| Collect configuration from environment variables and query-string parameters, | ||
| determine data requested based on the API endpoint called, and finally | ||
| present the requested data in the desired format. | ||
|
|
||
| Note: The Python process will continue to run for the entire lifecycle of | ||
| the Lambda execution environment (15 minutes). Subsequent Lambda | ||
| runs will re-enter this function. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| event: dict, required | ||
| API Gateway Lambda Proxy Input Format | ||
|
|
||
| Event doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html#api-gateway-simple-proxy-for-lambda-input-format | ||
|
|
||
| context: object, required | ||
| Lambda Context runtime methods and attributes | ||
|
|
||
| Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html | ||
|
|
||
| Returns | ||
| ------ | ||
| API Gateway Lambda Proxy Output Format: dict | ||
|
|
||
| Return doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html | ||
| """ | ||
|
|
||
| # helper functions to encapsulate the body, headers, and status code | ||
| def _build_return(code, body): | ||
| return { | ||
| "statusCode": code, | ||
| "body": json.dumps(body, indent=2), | ||
| } | ||
|
|
||
| try: | ||
| # collect environment variables | ||
| mip_org = util.get_os_var("MipsOrg") | ||
| ssm_path = util.get_os_var("SsmPath") | ||
| s3_bucket = util.get_os_var("CacheBucket") | ||
| s3_path = util.get_os_var("CacheBucketPath") | ||
|
|
||
| code_other = util.get_os_var("OtherCode") | ||
| code_no_program = util.get_os_var("NoProgramCode") | ||
|
|
||
| api_routes = { | ||
| "ApiChartOfAccounts": util.get_os_var("ApiChartOfAccounts"), | ||
| "ApiValidTags": util.get_os_var("ApiValidTags"), | ||
| } | ||
|
|
||
| _to_omit = util.get_os_var("CodesToOmit") | ||
| omit_codes_list = util.parse_codes(_to_omit) | ||
|
|
||
| # get secure parameters | ||
| ssm_secrets = ssm.get_secrets(ssm_path) | ||
|
|
||
| # get chart of accounts from mip cloud | ||
| raw_chart = chart.get_chart(mip_org, ssm_secrets, s3_bucket, s3_path) | ||
| LOG.debug(f"Raw chart data: {raw_chart}") | ||
|
|
||
| # collect query-string parameters | ||
| params = {} | ||
| if "queryStringParameters" in event: | ||
| params = event["queryStringParameters"] | ||
| LOG.debug(f"Query-string parameters: {params}") | ||
|
|
||
| # parse the path and return appropriate data | ||
| if "path" in event: | ||
| event_path = event["path"] | ||
|
|
||
| # always process the chart of accounts | ||
| mip_chart = chart.process_chart( | ||
| params, raw_chart, omit_codes_list, code_other, code_no_program | ||
| ) | ||
|
|
||
| if event_path == api_routes["ApiChartOfAccounts"]: | ||
| # conditionally limit the size of the output | ||
| return_chart = chart.limit_chart(params, mip_chart) | ||
| return _build_return(200, return_chart) | ||
|
|
||
| elif event_path == api_routes["ApiValidTags"]: | ||
| # build a list of strings from the processed dictionary | ||
| valid_tags = chart.list_tags(params, mip_chart) | ||
| return _build_return(200, valid_tags) | ||
|
|
||
| else: | ||
| return _build_return(404, {"error": "Invalid request path"}) | ||
|
|
||
| return _build_return(400, {"error": f"Invalid event: No path found: {event}"}) | ||
|
|
||
| except Exception as exc: | ||
| LOG.exception(exc) | ||
| return _build_return(500, {"error": str(exc)}) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,168 @@ | ||
| import logging | ||
| import re | ||
|
|
||
| from mip_api import s3, upstream, util | ||
|
|
||
| LOG = logging.getLogger(__name__) | ||
| LOG.setLevel(logging.DEBUG) | ||
|
|
||
|
|
||
| def get_chart(org_name, secrets, bucket, path): | ||
| """ | ||
| Access the Chart of Accounts from MIP Cloud, and implement a write-through | ||
| cache of successful responses to tolerate long-term faults in the upstream | ||
| API. | ||
|
|
||
| A successful API response will be stored in S3 indefinitely, to be retrieved | ||
| and used in the case of an API failure. | ||
|
|
||
| The S3 bucket has versioning enabled for disaster recovery, but this means | ||
| that every PUT request will create a new S3 object. In order to minimize | ||
| the number of objects in the bucket, read the cache value on every run and | ||
| only update the S3 object if it changes. | ||
| """ | ||
|
|
||
| # get the upstream API response | ||
| LOG.info("Read chart of accounts from upstream API") | ||
| upstream_dict = upstream.program_chart(org_name, secrets) | ||
| LOG.debug(f"Upstream API response: {upstream_dict}") | ||
|
|
||
| # always read cached value | ||
| LOG.info("Read cached chart of accounts from S3") | ||
| cache_dict = None | ||
| try: | ||
| cache_dict = s3.cache_read(bucket, path) | ||
| LOG.debug(f"Cached API response: {cache_dict}") | ||
| except Exception as exc: | ||
| LOG.exception("S3 read failure") | ||
|
|
||
| if upstream_dict: | ||
| # if we received a non-empty response from the upstream API, compare it | ||
| # to our cached response and update the S3 write-through cache if needed | ||
| if upstream_dict == cache_dict: | ||
| LOG.debug("No change in chart of accounts") | ||
| else: | ||
| # store write-through cache | ||
| LOG.info("Write updated chart of accounts to S3") | ||
| try: | ||
| s3.cache_write(upstream_dict, bucket, path) | ||
| except Exception as exc: | ||
| LOG.exception("S3 write failure") | ||
| coa_dict = upstream_dict | ||
| else: | ||
| # no response (or an empty response) from the upstream API, | ||
| # rely on our response cached in S3. | ||
| coa_dict = cache_dict | ||
|
|
||
| if not coa_dict: | ||
| # make sure we don't return an empty value | ||
| raise ValueError("No valid chart of accounts found") | ||
|
|
||
| return coa_dict | ||
|
|
||
|
|
||
| def process_chart(params, chart_dict, omit_list, other, no_program): | ||
| """ | ||
| Process chart of accounts to remove unneeded programs, | ||
| and inject some extra (meta) programs. | ||
|
|
||
| 5-digit codes are inactive and should be ignored in most cases. | ||
| 8-digit codes are active, but only the first 6 digits are significant, | ||
| i.e. 12345601 and 12345602 should be deduplicated as 123456. | ||
| """ | ||
|
|
||
| # deduplicate on shortened numeric codes | ||
| # pre-populate with codes to omit to short-circuit their processing | ||
| found_codes = [] | ||
| found_codes.extend(omit_list) | ||
|
|
||
| # output object | ||
| out_chart = {} | ||
|
|
||
| # whether to filter out inactive codes | ||
| code_len = 5 | ||
| if util.param_inactive_bool(params): | ||
| code_len = 6 | ||
|
|
||
| # optionally move this list of codes to the top of the output | ||
| priority_codes = util.param_priority_list(params) | ||
|
|
||
| # add short codes | ||
| for code, _name in chart_dict.items(): | ||
| if len(code) >= code_len: | ||
| # truncate active codes to the first 6 significant digits | ||
| short = code[:6] | ||
| # enforce AWS tags limitations | ||
| # https://docs.aws.amazon.com/tag-editor/latest/userguide/best-practices-and-strats.html | ||
| # enforce removing special characters globally for consistency, | ||
| # only enforce string limit when listing tag values because the string size will change. | ||
| regex = r"[^\d\w\s.:/=+\-@]+" | ||
| name = re.sub(regex, "", _name) | ||
|
|
||
| if short in found_codes: | ||
| LOG.info(f"Code {short} has already been processed") | ||
| continue | ||
|
|
||
| if priority_codes is not None: | ||
| if short in priority_codes: | ||
| out_chart = util.dict_prepend(out_chart, short, name) | ||
| else: | ||
| out_chart[short] = name | ||
| else: | ||
| out_chart[short] = name | ||
| found_codes.append(short) | ||
|
|
||
| # inject "other" code | ||
| if util.param_other_bool(params): | ||
| out_chart = util.dict_prepend(out_chart, other, "Other") | ||
|
|
||
| # inject "no program" code | ||
| if util.param_no_program_bool(params): | ||
| out_chart = util.dict_prepend(out_chart, no_program, "No Program") | ||
|
|
||
| return out_chart | ||
|
|
||
|
|
||
| def limit_chart(params, chart_dict): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typically long lists are handled by pagination, i.e. the client can limit the (page) size but then it can also request later page(s) to get all the content. What's the implication of having truncation without a way to get the rest of the content?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interesting, I hadn't realized that this is basically half of a pagination implementation. The use case this is supporting is truncating the chart to the first 25 entries for configuring Service Catalog tag options: https://github.com/Sage-Bionetworks-IT/organizations-infra/blob/master/sceptre/scipool/config/prod/sc-tag-options.yaml#L16 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for providing more context. So it's critical here that the items in the chart are prioritized, so that if a client can only handle |
||
| """ | ||
| Optionally limit the size of the chart to the given number of high- | ||
| priority items based on a query-string parameter. | ||
| """ | ||
|
|
||
| # if a 'limit' query-string parameter is defined, "slice" the dictionary | ||
| limit = util.param_limit_int(params) | ||
| if limit > 0: | ||
| # https://stackoverflow.com/a/66535220/1742875 | ||
| short_dict = dict(list(chart_dict.items())[:limit]) | ||
| return short_dict | ||
|
|
||
| return chart_dict | ||
|
|
||
|
|
||
| def list_tags(params, chart_dict): | ||
| """ | ||
| Generate a list of valid AWS tags. Only active codes are listed. | ||
|
|
||
| The string format is `{Program Name} / {Program Code}`. | ||
|
|
||
| Returns | ||
| A list of strings. | ||
| """ | ||
|
|
||
| tags = [] | ||
|
|
||
| # build tags from chart of accounts | ||
| for code, name in chart_dict.items(): | ||
| # enforce AWS tags limitations | ||
| # https://docs.aws.amazon.com/tag-editor/latest/userguide/best-practices-and-strats.html | ||
| # max tag value length is 256, truncate | ||
| # only enforce when listing tag values | ||
| tag = f"{name[:245]} / {code[:6]}" | ||
| tags.append(tag) | ||
|
|
||
| limit = util.param_limit_int(params) | ||
| if limit > 0: | ||
| LOG.info(f"limiting output to {limit} values") | ||
| return tags[0:limit] | ||
| else: | ||
| return tags | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| import json | ||
| import boto3 | ||
|
|
||
| s3_client = None | ||
|
|
||
|
|
||
| def cache_read(bucket, path): | ||
| """ | ||
| Read MIP response from S3 cache object | ||
| """ | ||
| global s3_client | ||
| if s3_client is None: | ||
| s3_client = boto3.client("s3") | ||
|
|
||
| data = s3_client.get_object(Bucket=bucket, Key=path) | ||
| return json.loads(data["Body"].read()) | ||
|
|
||
|
|
||
| def cache_write(data, bucket, path): | ||
| """ | ||
| Write MIP response to S3 cache object | ||
| """ | ||
| global s3_client | ||
| if s3_client is None: | ||
| s3_client = boto3.client("s3") | ||
|
|
||
| body = json.dumps(data) | ||
| s3_client.put_object(Bucket=bucket, Key=path, Body=body) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| import logging | ||
|
|
||
| import boto3 | ||
|
|
||
| LOG = logging.getLogger(__name__) | ||
| LOG.setLevel(logging.DEBUG) | ||
|
|
||
| # This is global so that it can be stubbed in test. | ||
| # Because it's global, its value will be retained | ||
| # in the lambda environment and re-used on warm runs. | ||
| ssm_client = None | ||
|
|
||
|
|
||
| def get_secrets(ssm_path): | ||
| """Collect secure parameters from SSM""" | ||
|
|
||
| # create boto client | ||
| global ssm_client | ||
| if ssm_client is None: | ||
| ssm_client = boto3.client("ssm") | ||
|
|
||
| # object to return | ||
| ssm_secrets = {} | ||
|
|
||
| # get secret parameters from ssm | ||
| params = ssm_client.get_parameters_by_path( | ||
| Path=ssm_path, | ||
| Recursive=True, | ||
| WithDecryption=True, | ||
| ) | ||
| if "Parameters" in params: | ||
| for p in params["Parameters"]: | ||
| # strip leading path plus / char | ||
| if len(p["Name"]) > len(ssm_path): | ||
| name = p["Name"][len(ssm_path) + 1 :] | ||
| else: | ||
| name = p["Name"] | ||
| ssm_secrets[name] = p["Value"] | ||
| LOG.info(f"Loaded secret: {name}") | ||
| else: | ||
| msg = "Invalid response from SSM client" | ||
| LOG.error(msg) | ||
| raise Exception(msg) | ||
|
|
||
| for reqkey in ["user", "pass"]: | ||
| if reqkey not in ssm_secrets: | ||
| msg = f"Missing required secure parameter: {reqkey}" | ||
| LOG.error(msg) | ||
| raise Exception(msg) | ||
|
|
||
| return ssm_secrets |
Uh oh!
There was an error while loading. Please reload this page.