Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions .github/workflows/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
- name: Create ECR repositories
if: ${{ inputs.apply && !inputs.destroy }}
working-directory: terraform
run: terraform apply -auto-approve -target=aws_ecr_repository.index_lambda -target=aws_ecr_repository.ttc_lambda
run: terraform apply -auto-approve -target=aws_ecr_repository.index_lambda -target=aws_ecr_repository.ttc_lambda -target=aws_ecr_repository.augmentation_lambda

- name: Login to Amazon ECR
if: ${{ inputs.apply && !inputs.destroy }}
Expand All @@ -76,22 +76,36 @@ jobs:
echo "index_ecr_url=$INDEX_ECR_URL" >> "$GITHUB_OUTPUT"
ECR_URL=$(terraform output -raw ecr_repository_url)
echo "ecr_url=$ECR_URL" >> "$GITHUB_OUTPUT"
AUG_ECR_URL=$(terraform output -raw augmentation_ecr_repository_url)
echo "aug_ecr_url=$AUG_ECR_URL" >> "$GITHUB_OUTPUT"

- name: Build and push Index Docker image
if: ${{ inputs.apply && !inputs.destroy }}
run: |
INDEX_ECR_URL="${{ steps.ecr-url.outputs.index_ecr_url }}"
docker build -f Dockerfile.index -t "$INDEX_ECR_URL:${{ github.sha }}" -t "$INDEX_ECR_URL:latest" .
docker build -f Dockerfile.index -t "$INDEX_ECR_URL:${{ github.sha }}" -t "$INDEX_ECR_URL:latest" --secret id=huggingface_token,env=HF_TOKEN .
docker push "$INDEX_ECR_URL:${{ github.sha }}"
docker push "$INDEX_ECR_URL:latest"
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}

- name: Build and push TTC Docker image
if: ${{ inputs.apply && !inputs.destroy }}
run: |
ECR_URL="${{ steps.ecr-url.outputs.ecr_url }}"
docker build -f Dockerfile.ttc -t "$ECR_URL:${{ github.sha }}" -t "$ECR_URL:latest" .
docker build -f Dockerfile.ttc -t "$ECR_URL:${{ github.sha }}" -t "$ECR_URL:latest" --secret id=huggingface_token,env=HF_TOKEN .
docker push "$ECR_URL:${{ github.sha }}"
docker push "$ECR_URL:latest"
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}

- name: Build and push Augmentation Docker image
if: ${{ inputs.apply && !inputs.destroy }}
run: |
AUG_ECR_URL="${{ steps.ecr-url.outputs.aug_ecr_url }}"
docker build -f Dockerfile.augmentation -t "$AUG_ECR_URL:${{ github.sha }}" -t "$AUG_ECR_URL:latest" .
docker push "$AUG_ECR_URL:${{ github.sha }}"
docker push "$AUG_ECR_URL:latest"

- name: Terraform Plan
if: ${{ !inputs.apply && !inputs.destroy }}
Expand All @@ -101,7 +115,7 @@ jobs:
- name: Terraform Apply
if: ${{ inputs.apply && !inputs.destroy }}
working-directory: terraform
run: terraform apply -auto-approve -var="index_lambda_image_tag=${{ github.sha }}" -var="ttc_lambda_image_tag=${{ github.sha }}"
run: terraform apply -auto-approve -var="index_lambda_image_tag=${{ github.sha }}" -var="ttc_lambda_image_tag=${{ github.sha }}" -var="augmentation_lambda_image_tag=${{ github.sha }}"

- name: Terraform Destroy
if: ${{ inputs.destroy }}
Expand Down
13 changes: 11 additions & 2 deletions Dockerfile.ttc
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,26 @@ COPY ./packages/lambda-handler ${LAMBDA_TASK_ROOT}/lambda-handler
RUN pip install --no-cache-dir "${LAMBDA_TASK_ROOT}/lambda-handler"

COPY ./packages/text-to-code ${LAMBDA_TASK_ROOT}/text-to-code
# Install CPU-only PyTorch to reduce image size (Lambda doesn't have GPUs)
RUN pip install --no-cache-dir torch==2.9.1 --index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir "${LAMBDA_TASK_ROOT}/text-to-code"

COPY ./packages/text-to-code-lambda ${LAMBDA_TASK_ROOT}/text-to-code-lambda
RUN pip install --no-cache-dir "${LAMBDA_TASK_ROOT}/text-to-code-lambda"

# Remove build tools no longer needed at runtime
RUN rpm -e --nodeps gcc-c++ gcc cpp make && microdnf clean all && rm -rf /var/cache/*

# Download retriever at build time (private repo, needs token)
RUN --mount=type=secret,id=huggingface_token,env=HF_TOKEN \
python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='NCHS/ttc-retriever-mvp', local_dir='/opt/retriever_model', ignore_patterns=['*.git*', '*.md', 'onnx/*', 'openvino/*', 'pytorch_model.bin'])"
python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='NCHS/ttc-retriever-mvp', local_dir='/opt/retriever_model', ignore_patterns=['*.git*', '*.md', 'onnx/*', 'openvino/*', 'pytorch_model.bin', 'tf_model.h5', 'flax_model.msgpack', 'model.onnx'])"

# Download reranker at build time (public repo)
RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='cross-encoder/stsb-roberta-large', local_dir='/opt/reranker_model', ignore_patterns=['*.git*', '*.md', 'onnx/*', 'openvino/*', 'pytorch_model.bin'])"
RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='cross-encoder/stsb-roberta-large', local_dir='/opt/reranker_model', ignore_patterns=['*.git*', '*.md', 'onnx/*', 'openvino/*', 'pytorch_model.bin', 'tf_model.h5', 'flax_model.msgpack', 'model.onnx'])" \
&& rm -rf /root/.cache/huggingface

# Clean up package source copies
RUN rm -rf ${LAMBDA_TASK_ROOT}/shared-models ${LAMBDA_TASK_ROOT}/lambda-handler ${LAMBDA_TASK_ROOT}/text-to-code ${LAMBDA_TASK_ROOT}/text-to-code-lambda

ENV RETRIEVER_MODEL_PATH="/opt/retriever_model"
ENV RERANKER_MODEL_PATH="/opt/reranker_model"
Expand Down
23 changes: 20 additions & 3 deletions terraform/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ All components live inside a private VPC (no NAT gateway, no internet gateway).

- **ECR Repository** (`aws_ecr_repository.ttc_lambda`): Stores the Docker container image for the main TTC Lambda. The image installs all workspace Python packages (`shared-models`, `lambda-handler`, `text-to-code`, `text-to-code-lambda`) and bakes in the SentenceTransformer model (`intfloat/e5-large-v2`) at build time. Images are built and pushed by CI/CD during `terraform apply`.
- **ECR Repository** (`aws_ecr_repository.index_lambda`): Stores the Docker container image for the index bootstrap Lambda, built from `Dockerfile.index` at repo root.
- **ECR Repository** (`aws_ecr_repository.augmentation_lambda`): Stores the Docker container image for the augmentation Lambda, built from `Dockerfile.augmentation` at repo root.

### IAM (`main.tf`)

- **Lambda IAM Role** (`aws_iam_role.lambda_role`): Shared by both Lambda functions. Attached policies:
- **Lambda IAM Role** (`aws_iam_role.lambda_role`): Shared by all Lambda functions (TTC, index, and augmentation). Attached policies:
- `AWSLambdaVPCAccessExecutionRole` — allows ENI creation for VPC placement
- `AWSLambdaBasicExecutionRole` — allows CloudWatch Logs writes
- `AmazonS3FullAccess` — S3 read/write (TODO: scope down to specific bucket/prefix)
Expand Down Expand Up @@ -74,6 +75,21 @@ At runtime, the Lambda runs the real `text_to_code_lambda.lambda_function.handle

Environment variables injected at deploy time: `OPENSEARCH_ENDPOINT_URL`, `OPENSEARCH_INDEX`, `REGION`, `S3_BUCKET`, `RETRIEVER_MODEL_PATH`, `RERANKER_MODEL_PATH`, `EICR_INPUT_PREFIX`, `SCHEMATRON_ERROR_PREFIX`, `TTC_INPUT_PREFIX`, `TTC_OUTPUT_PREFIX`, `TTC_METADATA_PREFIX`.

#### Augmentation Lambda (`ttc-augmentation-lambda`, `Dockerfile.augmentation`)

Deployed as a **container image** from ECR (`package_type = "Image"`). The Docker image (`Dockerfile.augmentation` at repo root) installs the `augmentation-lambda` package along with its workspace dependencies (`shared-models`, `lambda-handler`, `augmentation`).

At runtime, the Lambda processes augmentation requests containing eICR XML and nonstandard code mappings from the TTC Lambda. It:

1. Parses incoming eICR XML and nonstandard code instances
2. Inserts standardized LOINC/SNOMED `<translation>` elements into the eICR
3. Updates document headers (ID, effectiveTime, setId, versionNumber) and adds author/provenance metadata
4. Writes the augmented eICR XML and metadata JSON to S3

The augmentation Lambda uses only the Lambda security group (not the OpenSearch security group) since it does not require OpenSearch access. It is configured with lower memory (512 MB) and timeout (300s) defaults compared to the TTC Lambda, as it does not load ML models.

Environment variables injected at deploy time: `S3_BUCKET`, `AUGMENTED_EICR_PREFIX`, `AUGMENTATION_METADATA_PREFIX`, `REGION`.

### OpenSearch Ingestion Pipeline (`main.tf`)

An **AWS OpenSearch Ingestion Service (OSIS)** pipeline (`aws_osis_pipeline.ttc_ingestion_pipeline`) that:
Expand All @@ -91,13 +107,14 @@ The pipeline **depends on** the index bootstrap invocation completing first, ens
Terraform manages dependency ordering automatically, but conceptually the sequence is:

1. VPC, subnets, security groups, S3 endpoint created
2. ECR repositories created (TTC lambda + index lambda)
2. ECR repositories created (TTC lambda, index lambda, augmentation lambda)
3. Docker images built and pushed to ECR (in CI/CD, before full `terraform apply`)
4. OpenSearch domain and VPC endpoint created
5. Lambda IAM role created
6. Index bootstrap Lambda deployed and **immediately invoked** — creates the KNN index in OpenSearch
7. Ingestion pipeline deployed — begins polling S3 for NDJSON embeddings to load
8. Main TTC Lambda deployed with container image from ECR — loads model at cold start, ready to serve KNN queries
9. Augmentation Lambda deployed with container image from ECR — ready to process augmentation requests

## State Backend

Expand Down Expand Up @@ -134,7 +151,7 @@ Before running `terraform apply`:

1. **Bootstrap**: Run `terraform apply` in `bootstrap/` first to create the S3 state bucket and DynamoDB lock table.
2. **Embedding files**: Upload NDJSON embedding files to `s3://dibbs-text-to-code/ingestion/`. The OSIS pipeline will ingest these into OpenSearch.
3. **Docker**: CI/CD builds both container images (`Dockerfile.ttc` for TTC lambda, `Dockerfile.index` for index lambda) automatically. For local development, Docker must be available to build the images.
3. **Docker**: CI/CD builds all container images (`Dockerfile.ttc` for TTC lambda, `Dockerfile.index` for index lambda, `Dockerfile.augmentation` for augmentation lambda) automatically. For local development, Docker must be available to build the images.

> **Note:** The SentenceTransformer model and heavy Python dependencies (sentence-transformers, torch) are baked into the Lambda container image at build time via the Dockerfile. The Dockerfile installs the real `text-to-code-lambda` package and all its workspace dependencies.

Expand Down
10 changes: 10 additions & 0 deletions terraform/_outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,13 @@ output "index_ecr_repository_url" {
value = aws_ecr_repository.index_lambda.repository_url
description = "The URL of the ECR repository for the index Lambda container image"
}

output "augmentation_ecr_repository_url" {
value = aws_ecr_repository.augmentation_lambda.repository_url
description = "The URL of the ECR repository for the augmentation Lambda container image"
}

output "augmentation_lambda_function_name" {
value = aws_lambda_function.augmentation_lambda.function_name
description = "The name of the augmentation lambda function"
}
25 changes: 25 additions & 0 deletions terraform/_variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,25 @@ variable "augmentation_metadata_prefix" {
description = "S3 prefix for augmentation metadata files"
}

### Augmentation Lambda Variables
variable "augmentation_lambda_function_name" {
type = string
default = "ttc-augmentation-lambda"
description = "The name of the augmentation lambda function"
}

variable "augmentation_lambda_memory_size" {
type = number
default = 512
description = "Memory allocation in MB for the augmentation lambda. Lower than the TTC lambda since no ML models are loaded."
}

variable "augmentation_lambda_timeout" {
type = number
default = 300
description = "Timeout in seconds for the augmentation lambda function"
}

### Container Image Variables
variable "ttc_lambda_image_tag" {
type = string
Expand All @@ -160,3 +179,9 @@ variable "index_lambda_image_tag" {
description = "The image tag for the index Lambda container image in ECR"
}

variable "augmentation_lambda_image_tag" {
type = string
default = "latest"
description = "The image tag for the augmentation Lambda container image in ECR"
}

40 changes: 40 additions & 0 deletions terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ resource "aws_ecr_repository" "index_lambda" {
tags = local.tags
}

resource "aws_ecr_repository" "augmentation_lambda" {
name = "ttc-augmentation-lambda"
force_delete = true

image_scanning_configuration {
scan_on_push = true
}

tags = local.tags
}

#############
# VPC
# Note: If APHL wants to use their own VPC without this module, they will need to provide
Expand Down Expand Up @@ -480,3 +491,32 @@ resource "aws_lambda_function" "index_lambda" {
tags = { Name = var.index_lambda_function_name }
}

#############
# Augmentation Lambda
#############

resource "aws_lambda_function" "augmentation_lambda" {
function_name = var.augmentation_lambda_function_name
role = aws_iam_role.lambda_role.arn
package_type = "Image"
image_uri = "${aws_ecr_repository.augmentation_lambda.repository_url}:${var.augmentation_lambda_image_tag}"
timeout = var.augmentation_lambda_timeout
memory_size = var.augmentation_lambda_memory_size

vpc_config {
subnet_ids = module.vpc.private_subnets
security_group_ids = [aws_security_group.lambda_sg.id]
}

environment {
variables = {
S3_BUCKET = var.s3_bucket
AUGMENTED_EICR_PREFIX = var.augmented_eicr_prefix
AUGMENTATION_METADATA_PREFIX = var.augmentation_metadata_prefix
REGION = var.region
}
}

tags = { Name = var.augmentation_lambda_function_name }
}

Loading