Skip to content

Commit 915dbab

Browse files
Create a separate lambda-handler service for lambdas (#337)
## Description Creates a separate package and moves tests from ttc-lambda into that folder. Updates relevant sections of pyproject.tomls. ## Related Issues Closes #324 ## Additional Notes [Add any additional context or notes that reviewers should know about.] <--------------------- REMOVE THE LINES BELOW BEFORE MERGING ---------------------> ## Checklist Please review and complete the following checklist before submitting your pull request: - [ ] I have ensured that the pull request is of a manageable size, allowing it to be reviewed within a single session. - [ ] I have reviewed my changes to ensure they are clear, concise, and well-documented. - [ ] I have updated the documentation, if applicable. - [ ] I have added or updated test cases to cover my changes, if applicable. - [ ] I have minimized the number of reviewers to include only those essential for the review. ## Checklist for Reviewers Please review and complete the following checklist during the review process: - [ ] The code follows best practices and conventions. - [ ] The changes implement the desired functionality or fix the reported issue. - [ ] The tests cover the new changes and pass successfully. - [ ] Any potential edge cases or error scenarios have been considered.
1 parent 94fd3ba commit 915dbab

File tree

14 files changed

+379
-48
lines changed

14 files changed

+379
-48
lines changed

packages/augmentation-lambda/pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,7 @@ requires = ["hatchling"]
1212
build-backend = "hatchling.build"
1313

1414
[tool.hatch.build.targets.wheel]
15-
packages = ["src/augmentation_lambda"]
15+
packages = ["src/augmentation_lambda"]
16+
17+
[tool.uv.sources]
18+
lambda-handler = { workspace = true }

packages/index-lambda/pyproject.toml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,14 @@
22
name = "index-lambda"
33
version = "0.1.0"
44
readme = "README.md"
5-
dependencies = ["boto3", "opensearch-py", "requests_aws4auth", "aws_lambda_typing>=2.20.0", "botocore"]
5+
dependencies = [
6+
"boto3",
7+
"opensearch-py",
8+
"requests-aws4auth",
9+
"aws-lambda-typing>=2.20.0",
10+
"botocore",
11+
"lambda-handler",
12+
]
613

714
[dependency-groups]
815
dev = []
@@ -12,4 +19,7 @@ requires = ["hatchling"]
1219
build-backend = "hatchling.build"
1320

1421
[tool.hatch.build.targets.wheel]
15-
packages = ["src/index_lambda"]
22+
packages = ["src/index_lambda"]
23+
24+
[tool.uv.sources]
25+
lambda-handler = { workspace = true }

packages/index-lambda/src/index_lambda/lambda_function.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
import os
2-
import sys
3-
4-
sys.path.append(os.path.join(os.path.dirname(__file__), "../../../text-to-code-lambda/src"))
5-
from text_to_code_lambda import s3_handler
1+
import lambda_handler
62

73

84
def handler(event: dict, context: dict) -> dict:
@@ -42,9 +38,9 @@ def handler(event: dict, context: dict) -> dict:
4238
}
4339

4440
# Configure OpenSearch client
45-
aws_auth = s3_handler.create_aws_auth()
46-
os_client = s3_handler.create_opensearch_client(aws_auth)
47-
index_name = s3_handler.require_env("INDEX_NAME")
41+
aws_auth = lambda_handler.create_aws_auth()
42+
os_client = lambda_handler.create_opensearch_client(aws_auth)
43+
index_name = lambda_handler.require_env("INDEX_NAME")
4844

4945
# Create index if it doesn't already exist
5046
if not os_client.indices.exists(index=index_name):

packages/index-lambda/tests/test_index_lambda_function.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def __init__(self, description_vector_type: str) -> None:
4343
self.indices = MockIndices(description_vector_type)
4444

4545

46-
def patch_s3_handler(monkeypatch, description_vector_type: str) -> None:
46+
def patch_lambda_handler(monkeypatch, description_vector_type: str) -> None:
4747
def mock_create_aws_auth() -> object:
4848
"""Mock create_aws_auth function that returns a dummy AWS auth object."""
4949
return object()
@@ -58,19 +58,19 @@ def mock_require_env(name: str) -> str:
5858
return INDEX_NAME
5959
raise ValueError(f"Unexpected env var requested: {name}")
6060

61-
monkeypatch.setattr(lambda_function.s3_handler, "create_aws_auth", mock_create_aws_auth)
61+
monkeypatch.setattr(lambda_function.lambda_handler, "create_aws_auth", mock_create_aws_auth)
6262
monkeypatch.setattr(
63-
lambda_function.s3_handler,
63+
lambda_function.lambda_handler,
6464
"create_opensearch_client",
6565
mock_create_opensearch_client,
6666
)
67-
monkeypatch.setattr(lambda_function.s3_handler, "require_env", mock_require_env)
67+
monkeypatch.setattr(lambda_function.lambda_handler, "require_env", mock_require_env)
6868

6969

7070
class TestHandler:
7171
def test_handler_success(self, monkeypatch):
7272
"""Test handler creates the index when it does not exist and returns expected response."""
73-
patch_s3_handler(monkeypatch, "knn_vector")
73+
patch_lambda_handler(monkeypatch, "knn_vector")
7474

7575
resp = lambda_function.handler({}, {})
7676

@@ -88,7 +88,7 @@ def test_handler_success(self, monkeypatch):
8888

8989
def test_handler_recreates_index_when_vector_mapping_incorrect(self, monkeypatch):
9090
"""Test handler recreates the index when description_vector mapping is not knn_vector."""
91-
patch_s3_handler(monkeypatch, "keyword")
91+
patch_lambda_handler(monkeypatch, "keyword")
9292

9393
resp = lambda_function.handler({}, {})
9494

packages/lambda-handler/README.md

Whitespace-only changes.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
[project]
2+
name = "lambda-handler"
3+
version = "0.1.0"
4+
readme = "README.md"
5+
dependencies = [
6+
"boto3>=1.40.60",
7+
]
8+
9+
[dependency-groups]
10+
dev = ["moto"]
11+
12+
[build-system]
13+
requires = ["hatchling"]
14+
build-backend = "hatchling.build"
15+
16+
[tool.hatch.build.targets.wheel]
17+
packages = ["src/lambda_handler"]
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from .lambda_handler import check_s3_object_exists as check_s3_object_exists
2+
from .lambda_handler import create_aws_auth as create_aws_auth
3+
from .lambda_handler import create_opensearch_client as create_opensearch_client
4+
from .lambda_handler import create_s3_client as create_s3_client
5+
from .lambda_handler import get_eventbridge_data_from_s3_event as get_eventbridge_data_from_s3_event
6+
from .lambda_handler import get_file_content_from_s3 as get_file_content_from_s3
7+
from .lambda_handler import get_persistence_id as get_persistence_id
8+
from .lambda_handler import get_s3_credentials as get_s3_credentials
9+
from .lambda_handler import put_file as put_file
10+
from .lambda_handler import require_env as require_env
11+
from .lambda_handler import strip_protocol as strip_protocol
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
import os
2+
import typing
3+
4+
import boto3
5+
from aws_lambda_typing import events as lambda_events
6+
from botocore.client import BaseClient
7+
from botocore.credentials import Credentials
8+
from botocore.exceptions import ClientError
9+
from opensearchpy import OpenSearch
10+
from opensearchpy import RequestsHttpConnection
11+
from requests_aws4auth import AWS4Auth
12+
13+
14+
def require_env(name: str) -> str:
15+
"""Fetch a required environment variable or raise a clear error.
16+
17+
:param name: The name of the environment variable to fetch.
18+
:return: The value of the environment variable.
19+
"""
20+
value = os.getenv(name)
21+
if not value:
22+
raise ValueError(f"{name} not set as an environment variable.")
23+
return value
24+
25+
26+
def strip_protocol(url: str) -> str:
27+
"""Remove http/https from a URL.
28+
29+
This is sometimes needed for AWS service
30+
endpoints (like OpenSearch) that require the URL without the protocol.
31+
:param url: The URL to strip.
32+
:return: The URL without the protocol.
33+
"""
34+
return url.removeprefix("https://").removeprefix("http://")
35+
36+
37+
def get_s3_credentials() -> Credentials:
38+
"""Fetch AWS credentials from the environment."""
39+
return boto3.Session().get_credentials()
40+
41+
42+
def create_aws_auth() -> AWS4Auth:
43+
"""Creates an AWS4Auth object for authenticating with AWS services.
44+
45+
:return: AWS4Auth object
46+
"""
47+
credentials = get_s3_credentials()
48+
return AWS4Auth(
49+
credentials.access_key,
50+
credentials.secret_key,
51+
require_env("AWS_REGION"),
52+
"es",
53+
session_token=credentials.token,
54+
)
55+
56+
57+
def create_s3_client() -> BaseClient:
58+
"""Creates an S3 client.
59+
60+
:return: S3 client
61+
"""
62+
endpoint_url = os.getenv("S3_ENDPOINT_URL")
63+
region_name = require_env("AWS_REGION")
64+
65+
return boto3.client("s3", endpoint_url=endpoint_url, region_name=region_name)
66+
67+
68+
def create_opensearch_client(aws_auth: AWS4Auth) -> OpenSearch:
69+
"""Creates an OpenSearch client.
70+
71+
:param aws_auth: AWS4Auth object for authentication
72+
:return: OpenSearch client
73+
"""
74+
endpoint_url = require_env("OPENSEARCH_ENDPOINT_URL")
75+
return OpenSearch(
76+
hosts=[{"host": strip_protocol(endpoint_url), "port": 443}],
77+
http_auth=aws_auth,
78+
use_ssl=True,
79+
verify_certs=True,
80+
connection_class=RequestsHttpConnection,
81+
)
82+
83+
84+
def get_file_content_from_s3(bucket_name: str, object_key: str) -> str:
85+
"""Extracts the file content from an S3 bucket.
86+
87+
:param bucket_name: The name of the S3 bucket.
88+
:param object_key: The key of the S3 object.
89+
:return: The content of the file as a string.
90+
"""
91+
client = create_s3_client()
92+
93+
# Check if object exists
94+
if not check_s3_object_exists(client, bucket_name, object_key):
95+
raise FileNotFoundError(f"S3 object not found: {bucket_name}/{object_key}")
96+
97+
response = client.get_object(Bucket=bucket_name, Key=object_key)
98+
return response["Body"].read().decode("utf-8")
99+
100+
101+
def get_eventbridge_data_from_s3_event(event: lambda_events.EventBridgeEvent) -> dict:
102+
"""Extracts the file metadata from an S3 event triggered by a Lambda function.
103+
104+
:param event: The S3 event containing the bucket and object key information.
105+
:return: A dictionary containing the bucket name and object key.
106+
"""
107+
bucket_name = event["detail"]["bucket"]["name"]
108+
object_key = event["detail"]["object"]["key"]
109+
110+
return {"bucket_name": bucket_name, "object_key": object_key}
111+
112+
113+
def put_file(file_obj: typing.BinaryIO, bucket_name: str, object_key: str) -> None:
114+
"""Uploads a file object to a S3 bucket.
115+
116+
:param file_obj: The file object to upload.
117+
:param bucket_name: The name of the S3 bucket to upload to.
118+
:param object_key: The key to assign to the uploaded object in S3.
119+
"""
120+
client = create_s3_client()
121+
client.put_object(Body=file_obj, Bucket=bucket_name, Key=object_key)
122+
123+
124+
def check_s3_object_exists(s3_client: BaseClient, bucket: str, key: str) -> bool:
125+
"""Checks that an S3 object exists.
126+
127+
:param s3_client: The S3 client.
128+
:param bucket: The name of the S3 bucket.
129+
:param key: The key of the S3 object.
130+
:raises Exception: If an unexpected error occurs while fetching the S3 object.
131+
:return: True if the S3 object exists, False otherwise.
132+
"""
133+
try:
134+
s3_client.head_object(Bucket=bucket, Key=key)
135+
return True
136+
except ClientError as e:
137+
error_code = e.response["Error"]["Code"]
138+
139+
if error_code in ("404", "NoSuchKey"):
140+
return False
141+
142+
msg = f"Unexpected error while fetching file from S3: {bucket}/{key}. Error: {e.response['Error']['Message']}"
143+
raise Exception(msg) from e
144+
145+
146+
def get_persistence_id(object_key: str, input_prefix: str) -> str:
147+
"""Get the persistence_id from an S3 object key.
148+
149+
Object key format: <pipeline-step>/<persistance_id>
150+
Example: TTCInput/2026/01/01/0026b704-f510-4494-8d21-11d27217d96e
151+
Returns: 2026/01/01/0026b704-f510-4494-8d21-11d27217d96e
152+
153+
:param object_key: The S3 object key
154+
:param input_prefix: The pipeline step prefix (e.g., "TTCInput/")
155+
:return: The persistence_id portion of the key
156+
157+
"""
158+
if not object_key.startswith(input_prefix):
159+
raise ValueError(
160+
f"Object key '{object_key}' does not start with expected prefix '{input_prefix}'"
161+
)
162+
return object_key[len(input_prefix) :]
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import json
2+
import logging
3+
import os
4+
5+
import boto3
6+
import moto
7+
import pytest
8+
9+
10+
@pytest.fixture(scope="function")
11+
def moto_setup(monkeypatch: pytest.MonkeyPatch) -> boto3.client:
12+
"""Setup test AWS."""
13+
with moto.mock_aws():
14+
monkeypatch.setenv("AWS_REGION", "us-east-1")
15+
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "test_access_key_id")
16+
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "test_secret_access_key")
17+
bucket_name = "test-bucket"
18+
monkeypatch.setenv("OPENSEARCH_ENDPOINT_URL", "https://test-opensearch-endpoint.com")
19+
20+
# Create the fake S3 bucket
21+
s3 = boto3.client(
22+
"s3",
23+
region_name=os.environ["AWS_REGION"],
24+
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
25+
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
26+
)
27+
s3.create_bucket(Bucket=bucket_name)
28+
29+
# Add convenience attribute for tests
30+
s3.bucket_name = bucket_name
31+
32+
yield s3
33+
34+
35+
@pytest.fixture
36+
def example_s3_event_payload() -> dict:
37+
"""Inner S3 event payload (what SQS body contains as JSON string).
38+
39+
This example content comes from APHL
40+
"""
41+
return {
42+
"version": "0",
43+
"id": "12345678-1234-5678-9012-123456789012",
44+
"detail-type": "Object Created",
45+
"source": "aws.s3",
46+
"account": "111122223333",
47+
"time": "2025-09-03T12:34:56Z",
48+
"region": "us-east-1",
49+
"resources": ["arn:aws:s3:::my-bucket-name"],
50+
"detail": {
51+
"version": "0",
52+
"bucket": {"name": "ecr-bucket"},
53+
"object": {
54+
"key": "TextToCodeSubmission/2025/09/03/1-5f84c7a5-91d7f5c6a2b7c9e08f0d1234",
55+
"size": 1024,
56+
"etag": "0123456789abcdef0123456789abcdef",
57+
"sequencer": "0055AED6DCD90281E5",
58+
},
59+
"request-id": "C3D13FE58DE4C810",
60+
"requester": "arn:aws:iam::111122223333:user/example-user",
61+
"reason": "PutObject",
62+
},
63+
}
64+
65+
66+
@pytest.fixture
67+
def example_sqs_event(example_s3_event_payload: dict) -> dict:
68+
"""Full SQS event that mimics real Lambda input."""
69+
return {
70+
"Records": [
71+
{
72+
"messageId": "f9ccdff5-0acb-4933-8995-bd7f0ab5f2f7",
73+
"receiptHandle": "test-receipt-handle",
74+
"body": json.dumps(example_s3_event_payload),
75+
"attributes": {
76+
"ApproximateReceiveCount": "1",
77+
"SentTimestamp": "1752691260451",
78+
"SenderId": "AIDAJXNJGGKNS7OSV23OI",
79+
"ApproximateFirstReceiveTimestamp": "1752691260458",
80+
},
81+
"messageAttributes": {},
82+
"md5OfBody": "dummy-md5",
83+
"eventSource": "aws:sqs",
84+
"eventSourceARN": "arn:aws:sqs:us-east-1:123456789012:queue-name",
85+
"awsRegion": "us-east-1",
86+
}
87+
]
88+
}
89+
90+
91+
@pytest.fixture
92+
def caplog_warning(caplog: pytest.LogCaptureFixture) -> logging.Logger:
93+
"""Capture log warnings for tests.
94+
95+
:param caplog: Pytest fixture for capturing log output
96+
:return: Caplog instance with warning level set
97+
"""
98+
caplog.set_level(logging.WARNING)
99+
return caplog

0 commit comments

Comments
 (0)