Skip to content

Commit c9a4ea4

Browse files
authored
Use environment variables for S3 bucket prefixes (#395)
## Description This PR standardizes the use of environment variables for S3 bucket and key prefixes, refactors S3 client usage for efficiency and testability, and updates tests and helper functions accordingly. Lambda Handler (packages/lambda-handler/src/lambda_handler/lambda_handler.py): - Added optional s3_client parameter to get_file_content_from_s3() and put_file() for client reuse TTC Lambda (packages/text-to-code-lambda/src/text_to_code_lambda/lambda_function.py): - Added S3_BUCKET env var (default: dibbs-text-to-code) - Updated TTC_INPUT_PREFIX default to TextToCodeValidateSubmissionV2/ - Removed all .split("/")[0] bucket derivations. Now uses S3_BUCKET + prefix as object key - Threaded s3_client through helper functions Augmentation Lambda (packages/augmentation-lambda/src/augmentation_lambda/lambda_function.py): - Added S3_BUCKET, AUGMENTED_EICR_PREFIX, AUGMENTATION_METADATA_PREFIX env vars - Added S3 client caching and S3 writes for augmented eICR and metadata after augmentation Augmentation main.py (packages/augmentation/src/augmentation/main.py): - Replaced hardcoded bucket names with env var driven pattern Terraform: - _variables.tf: Updated ttc_input_prefix default, added augmented_eicr_prefix and augmentation_metadata_prefix variables - main.tf: Renamed BUCKET_NAME → S3_BUCKET Tests: Updated all test fixtures and assertions for the single-bucket + prefix pattern. All 27 tests pass across all three packages. ## Related Issues Closes #349
1 parent 13c0bb0 commit c9a4ea4

File tree

10 files changed

+193
-119
lines changed

10 files changed

+193
-119
lines changed

packages/augmentation-lambda/src/augmentation_lambda/lambda_function.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,26 @@
1+
import io
12
import json
3+
import os
24
from typing import TypedDict
35

46
from aws_lambda_typing import context as lambda_context
57
from aws_lambda_typing import events as lambda_events
68

9+
import lambda_handler
710
from augmentation.models import TTCAugmenterConfig
11+
from botocore.client import BaseClient
812
from augmentation.models.application import TTCAugmenterOutput
913
from augmentation.services.eicr_augmenter import EICRAugmenter
1014
from shared_models import TTCAugmenterInput
1115

16+
# Environment variables
17+
S3_BUCKET = os.getenv("S3_BUCKET", "dibbs-text-to-code")
18+
AUGMENTED_EICR_PREFIX = os.getenv("AUGMENTED_EICR_PREFIX", "AugmentationEICRV2/")
19+
AUGMENTATION_METADATA_PREFIX = os.getenv("AUGMENTATION_METADATA_PREFIX", "AugmentationMetadata/")
20+
21+
# Cache S3 client to reuse across Lambda invocations
22+
_cached_s3_client: BaseClient | None = None
23+
1224

1325
class HandlerResponse(TypedDict):
1426
"""Response from the AWS Lambda handler."""
@@ -24,6 +36,12 @@ def handler(event: lambda_events.SQSEvent, context: lambda_context.Context) -> H
2436
:param context: The AWS Lambda context object.
2537
:return: A dictionary containing the results of the augmentation and any batch item failures.
2638
"""
39+
global _cached_s3_client # noqa: PLW0603
40+
41+
if _cached_s3_client is None:
42+
_cached_s3_client = lambda_handler.create_s3_client()
43+
s3_client = _cached_s3_client
44+
2745
results: list[dict[str, object]] = []
2846
batch_item_failures: list[dict[str, str]] = []
2947

@@ -64,6 +82,9 @@ def handler(event: lambda_events.SQSEvent, context: lambda_context.Context) -> H
6482
metadata=metadata,
6583
)
6684

85+
# Save augmented eICR and metadata to S3
86+
_save_augmentation_outputs(augmenter_input.eicr_id, output, s3_client)
87+
6788
results.append(
6889
{
6990
"messageId": message_id,
@@ -85,3 +106,26 @@ def handler(event: lambda_events.SQSEvent, context: lambda_context.Context) -> H
85106
"results": results,
86107
"batchItemFailures": batch_item_failures,
87108
}
109+
110+
111+
def _save_augmentation_outputs(
112+
eicr_id: str, output: TTCAugmenterOutput, s3_client: BaseClient
113+
) -> None:
114+
"""Save augmented eICR and metadata to S3.
115+
116+
:param eicr_id: The eICR identifier.
117+
:param output: The augmentation output containing the augmented eICR and metadata.
118+
:param s3_client: The S3 client to use for uploading files.
119+
"""
120+
lambda_handler.put_file(
121+
file_obj=io.BytesIO(output.augmented_eicr.encode("utf-8")),
122+
bucket_name=S3_BUCKET,
123+
object_key=f"{AUGMENTED_EICR_PREFIX}{eicr_id}",
124+
s3_client=s3_client,
125+
)
126+
lambda_handler.put_file(
127+
file_obj=io.BytesIO(output.metadata.model_dump_json().encode("utf-8")),
128+
bucket_name=S3_BUCKET,
129+
object_key=f"{AUGMENTATION_METADATA_PREFIX}{eicr_id}",
130+
s3_client=s3_client,
131+
)

packages/augmentation-lambda/tests/test_augmentation_lambda_function.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
import json
2+
from unittest.mock import MagicMock
3+
from unittest.mock import patch
4+
5+
import pytest
26

37
from augmentation.models import Metadata
48
from augmentation_lambda import lambda_function
@@ -30,7 +34,18 @@ def augment(self) -> Metadata:
3034
)
3135

3236

33-
def test_handler_returns_success_result(mocker) -> None:
37+
@pytest.fixture(autouse=True)
38+
def mock_s3_client():
39+
"""Mock the S3 client and put_file for all tests."""
40+
lambda_function._cached_s3_client = MagicMock()
41+
with patch.object(lambda_function, "lambda_handler") as mock_handler:
42+
mock_handler.create_s3_client.return_value = MagicMock()
43+
mock_handler.put_file = MagicMock()
44+
yield mock_handler
45+
lambda_function._cached_s3_client = None
46+
47+
48+
def test_handler_returns_success_result(mocker, mock_s3_client) -> None:
3449
"""Tests that the handler returns a successful result when the augmenter runs without errors.
3550
3651
:param mocker: The pytest-mock fixture for mocking objects.
@@ -82,7 +97,49 @@ def test_handler_returns_success_result(mocker) -> None:
8297
}
8398

8499

85-
def test_handler_uses_provided_config(mocker) -> None:
100+
def test_handler_saves_outputs_to_s3(mocker, mock_s3_client) -> None:
101+
"""Tests that the handler writes augmented eICR and metadata to S3.
102+
103+
:param mocker: The pytest-mock fixture for mocking objects.
104+
"""
105+
mocker.patch.object(lambda_function, "EICRAugmenter", FakeAugmenter)
106+
107+
event = {
108+
"Records": [
109+
{
110+
"messageId": "message-s3",
111+
"body": json.dumps(
112+
{
113+
"eicr_id": "test-eicr-id",
114+
"eicr": "<ClinicalDocument />",
115+
"nonstandard_codes": [],
116+
}
117+
),
118+
}
119+
]
120+
}
121+
122+
lambda_function.handler(event, None)
123+
124+
# Verify put_file was called once for augmented eICR and once for metadata
125+
expected_put_file_calls = 2
126+
assert mock_s3_client.put_file.call_count == expected_put_file_calls
127+
128+
# First call: augmented eICR
129+
eicr_call = mock_s3_client.put_file.call_args_list[0]
130+
assert eicr_call.kwargs["bucket_name"] == lambda_function.S3_BUCKET
131+
assert eicr_call.kwargs["object_key"] == f"{lambda_function.AUGMENTED_EICR_PREFIX}test-eicr-id"
132+
133+
# Second call: metadata
134+
metadata_call = mock_s3_client.put_file.call_args_list[1]
135+
assert metadata_call.kwargs["bucket_name"] == lambda_function.S3_BUCKET
136+
assert (
137+
metadata_call.kwargs["object_key"]
138+
== f"{lambda_function.AUGMENTATION_METADATA_PREFIX}test-eicr-id"
139+
)
140+
141+
142+
def test_handler_uses_provided_config(mocker, mock_s3_client) -> None:
86143
"""Tests that the handler uses the provided config when creating the augmenter.
87144
88145
:param mocker: The pytest-mock fixture for mocking objects.

packages/augmentation/src/augmentation/main.py

Lines changed: 0 additions & 43 deletions
This file was deleted.

packages/lambda-handler/src/lambda_handler/lambda_handler.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,17 @@ def create_opensearch_client(aws_auth: AWS4Auth) -> OpenSearch:
8383
)
8484

8585

86-
def get_file_content_from_s3(bucket_name: str, object_key: str) -> str:
86+
def get_file_content_from_s3(
87+
bucket_name: str, object_key: str, s3_client: BaseClient | None = None
88+
) -> str:
8789
"""Extracts the file content from an S3 bucket.
8890
8991
:param bucket_name: The name of the S3 bucket.
9092
:param object_key: The key of the S3 object.
93+
:param s3_client: Optional pre-created S3 client. If None, a new client is created.
9194
:return: The content of the file as a string.
9295
"""
93-
client = create_s3_client()
96+
client = s3_client or create_s3_client()
9497

9598
# Check if object exists
9699
if not check_s3_object_exists(client, bucket_name, object_key):
@@ -112,14 +115,20 @@ def get_eventbridge_data_from_s3_event(event: lambda_events.EventBridgeEvent) ->
112115
return {"bucket_name": bucket_name, "object_key": object_key}
113116

114117

115-
def put_file(file_obj: typing.BinaryIO, bucket_name: str, object_key: str) -> None:
118+
def put_file(
119+
file_obj: typing.BinaryIO,
120+
bucket_name: str,
121+
object_key: str,
122+
s3_client: BaseClient | None = None,
123+
) -> None:
116124
"""Uploads a file object to a S3 bucket.
117125
118126
:param file_obj: The file object to upload.
119127
:param bucket_name: The name of the S3 bucket to upload to.
120128
:param object_key: The key to assign to the uploaded object in S3.
129+
:param s3_client: Optional pre-created S3 client. If None, a new client is created.
121130
"""
122-
client = create_s3_client()
131+
client = s3_client or create_s3_client()
123132
client.put_object(Body=file_obj, Bucket=bucket_name, Key=object_key)
124133

125134

0 commit comments

Comments
 (0)