Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
relative_files = True

# Use 'source' instead of 'omit' in order to ignore 'tests/unit/__init__.py'
source = mips_api
source = mip_api
10 changes: 0 additions & 10 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,6 @@ on:
workflow_call:

jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.12
- uses: pre-commit/action@v3.0.0

pytest:
runs-on: ubuntu-latest
steps:
Expand Down
105 changes: 105 additions & 0 deletions mip_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import json
import logging

from mip_api import chart, s3, ssm, upstream, util


LOG = logging.getLogger(__name__)
LOG.setLevel(logging.DEBUG)


def lambda_handler(event, context):
"""Entry Point for Lambda

Collect configuration from environment variables and query-string parameters,
determine data requested based on the API endpoint called, and finally
present the requested data in the desired format.

Note: The Python process will continue to run for the entire lifecycle of
the Lambda execution environment (15 minutes). Subsequent Lambda
runs will re-enter this function.

Parameters
----------
event: dict, required
API Gateway Lambda Proxy Input Format

Event doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html#api-gateway-simple-proxy-for-lambda-input-format

context: object, required
Lambda Context runtime methods and attributes

Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html

Returns
------
API Gateway Lambda Proxy Output Format: dict

Return doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html
"""

# helper functions to encapsulate the body, headers, and status code
def _build_return(code, body):
return {
"statusCode": code,
"body": json.dumps(body, indent=2),
}

try:
# collect environment variables
mip_org = util.get_os_var("MipsOrg")
ssm_path = util.get_os_var("SsmPath")
s3_bucket = util.get_os_var("CacheBucket")
s3_path = util.get_os_var("CacheBucketPath")

code_other = util.get_os_var("OtherCode")
code_no_program = util.get_os_var("NoProgramCode")

api_routes = {
"ApiChartOfAccounts": util.get_os_var("ApiChartOfAccounts"),
"ApiValidTags": util.get_os_var("ApiValidTags"),
}

_to_omit = util.get_os_var("CodesToOmit")
omit_codes_list = util.parse_codes(_to_omit)

# get secure parameters
ssm_secrets = ssm.get_secrets(ssm_path)

# get chart of accounts from mip cloud
raw_chart = chart.get_chart(mip_org, ssm_secrets, s3_bucket, s3_path)
LOG.debug(f"Raw chart data: {raw_chart}")

# collect query-string parameters
params = {}
if "queryStringParameters" in event:
params = event["queryStringParameters"]
LOG.debug(f"Query-string parameters: {params}")

# parse the path and return appropriate data
if "path" in event:
event_path = event["path"]

# always process the chart of accounts
mip_chart = chart.process_chart(
params, raw_chart, omit_codes_list, code_other, code_no_program
)

if event_path == api_routes["ApiChartOfAccounts"]:
# conditionally limit the size of the output
return_chart = chart.limit_chart(params, mip_chart)
return _build_return(200, return_chart)

elif event_path == api_routes["ApiValidTags"]:
# build a list of strings from the processed dictionary
valid_tags = chart.list_tags(params, mip_chart)
return _build_return(200, valid_tags)

else:
return _build_return(404, {"error": "Invalid request path"})

return _build_return(400, {"error": f"Invalid event: No path found: {event}"})

except Exception as exc:
LOG.exception(exc)
return _build_return(500, {"error": str(exc)})
168 changes: 168 additions & 0 deletions mip_api/chart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import logging
import re

from mip_api import s3, upstream, util

LOG = logging.getLogger(__name__)
LOG.setLevel(logging.DEBUG)


def get_chart(org_name, secrets, bucket, path):
"""
Access the Chart of Accounts from MIP Cloud, and implement a write-through
cache of successful responses to tolerate long-term faults in the upstream
API.

A successful API response will be stored in S3 indefinitely, to be retrieved
and used in the case of an API failure.

The S3 bucket has versioning enabled for disaster recovery, but this means
that every PUT request will create a new S3 object. In order to minimize
the number of objects in the bucket, read the cache value on every run and
only update the S3 object if it changes.
"""

# get the upstream API response
LOG.info("Read chart of accounts from upstream API")
upstream_dict = upstream.program_chart(org_name, secrets)
LOG.debug(f"Upstream API response: {upstream_dict}")

# always read cached value
LOG.info("Read cached chart of accounts from S3")
cache_dict = None
try:
cache_dict = s3.cache_read(bucket, path)
LOG.debug(f"Cached API response: {cache_dict}")
except Exception as exc:
LOG.exception("S3 read failure")

if upstream_dict:
# if we received a non-empty response from the upstream API, compare it
# to our cached response and update the S3 write-through cache if needed
if upstream_dict == cache_dict:
LOG.debug("No change in chart of accounts")
else:
# store write-through cache
LOG.info("Write updated chart of accounts to S3")
try:
s3.cache_write(upstream_dict, bucket, path)
except Exception as exc:
LOG.exception("S3 write failure")
coa_dict = upstream_dict
else:
# no response (or an empty response) from the upstream API,
# rely on our response cached in S3.
coa_dict = cache_dict

if not coa_dict:
# make sure we don't return an empty value
raise ValueError("No valid chart of accounts found")

return coa_dict


def process_chart(params, chart_dict, omit_list, other, no_program):
"""
Process chart of accounts to remove unneeded programs,
and inject some extra (meta) programs.

5-digit codes are inactive and should be ignored in most cases.
8-digit codes are active, but only the first 6 digits are significant,
i.e. 12345601 and 12345602 should be deduplicated as 123456.
"""

# deduplicate on shortened numeric codes
# pre-populate with codes to omit to short-circuit their processing
found_codes = []
found_codes.extend(omit_list)

# output object
out_chart = {}

# whether to filter out inactive codes
code_len = 5
if util.param_inactive_bool(params):
code_len = 6

# optionally move this list of codes to the top of the output
priority_codes = util.param_priority_list(params)

# add short codes
for code, _name in chart_dict.items():
if len(code) >= code_len:
# truncate active codes to the first 6 significant digits
short = code[:6]
# enforce AWS tags limitations
# https://docs.aws.amazon.com/tag-editor/latest/userguide/best-practices-and-strats.html
# enforce removing special characters globally for consistency,
# only enforce string limit when listing tag values because the string size will change.
regex = r"[^\d\w\s.:/=+\-@]+"
name = re.sub(regex, "", _name)

if short in found_codes:
LOG.info(f"Code {short} has already been processed")
continue

if priority_codes is not None:
if short in priority_codes:
out_chart = util.dict_prepend(out_chart, short, name)
else:
out_chart[short] = name
else:
out_chart[short] = name
found_codes.append(short)

# inject "other" code
if util.param_other_bool(params):
out_chart = util.dict_prepend(out_chart, other, "Other")

# inject "no program" code
if util.param_no_program_bool(params):
out_chart = util.dict_prepend(out_chart, no_program, "No Program")

return out_chart


def limit_chart(params, chart_dict):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typically long lists are handled by pagination, i.e. the client can limit the (page) size but then it can also request later page(s) to get all the content. What's the implication of having truncation without a way to get the rest of the content?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, I hadn't realized that this is basically half of a pagination implementation. The use case this is supporting is truncating the chart to the first 25 entries for configuring Service Catalog tag options: https://github.com/Sage-Bionetworks-IT/organizations-infra/blob/master/sceptre/scipool/config/prod/sc-tag-options.yaml#L16

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for providing more context. So it's critical here that the items in the chart are prioritized, so that if a client can only handle n items, it can say, "give me the n most important ones." (Maybe it's more analogous to string truncation than to pagination.) With this understanding I think the code is fine, but perhaps you can just add a comment like, "limit the response to the chosen number of high priority items."

"""
Optionally limit the size of the chart to the given number of high-
priority items based on a query-string parameter.
"""

# if a 'limit' query-string parameter is defined, "slice" the dictionary
limit = util.param_limit_int(params)
if limit > 0:
# https://stackoverflow.com/a/66535220/1742875
short_dict = dict(list(chart_dict.items())[:limit])
return short_dict

return chart_dict


def list_tags(params, chart_dict):
"""
Generate a list of valid AWS tags. Only active codes are listed.

The string format is `{Program Name} / {Program Code}`.

Returns
A list of strings.
"""

tags = []

# build tags from chart of accounts
for code, name in chart_dict.items():
# enforce AWS tags limitations
# https://docs.aws.amazon.com/tag-editor/latest/userguide/best-practices-and-strats.html
# max tag value length is 256, truncate
# only enforce when listing tag values
tag = f"{name[:245]} / {code[:6]}"
tags.append(tag)

limit = util.param_limit_int(params)
if limit > 0:
LOG.info(f"limiting output to {limit} values")
return tags[0:limit]
else:
return tags
28 changes: 28 additions & 0 deletions mip_api/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json
import boto3

s3_client = None


def cache_read(bucket, path):
"""
Read MIP response from S3 cache object
"""
global s3_client
if s3_client is None:
s3_client = boto3.client("s3")

data = s3_client.get_object(Bucket=bucket, Key=path)
return json.loads(data["Body"].read())


def cache_write(data, bucket, path):
"""
Write MIP response to S3 cache object
"""
global s3_client
if s3_client is None:
s3_client = boto3.client("s3")

body = json.dumps(data)
s3_client.put_object(Bucket=bucket, Key=path, Body=body)
51 changes: 51 additions & 0 deletions mip_api/ssm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import logging

import boto3

LOG = logging.getLogger(__name__)
LOG.setLevel(logging.DEBUG)

# This is global so that it can be stubbed in test.
# Because it's global, its value will be retained
# in the lambda environment and re-used on warm runs.
ssm_client = None


def get_secrets(ssm_path):
"""Collect secure parameters from SSM"""

# create boto client
global ssm_client
if ssm_client is None:
ssm_client = boto3.client("ssm")

# object to return
ssm_secrets = {}

# get secret parameters from ssm
params = ssm_client.get_parameters_by_path(
Path=ssm_path,
Recursive=True,
WithDecryption=True,
)
if "Parameters" in params:
for p in params["Parameters"]:
# strip leading path plus / char
if len(p["Name"]) > len(ssm_path):
name = p["Name"][len(ssm_path) + 1 :]
else:
name = p["Name"]
ssm_secrets[name] = p["Value"]
LOG.info(f"Loaded secret: {name}")
else:
msg = "Invalid response from SSM client"
LOG.error(msg)
raise Exception(msg)

for reqkey in ["user", "pass"]:
if reqkey not in ssm_secrets:
msg = f"Missing required secure parameter: {reqkey}"
LOG.error(msg)
raise Exception(msg)

return ssm_secrets
Loading