Skip to content

Commit 2a895f8

Browse files
committed
Merge dev into stage
2 parents a29c10f + 2620287 commit 2a895f8

File tree

13 files changed

+251
-32
lines changed

13 files changed

+251
-32
lines changed

.devcontainer/devcontainer.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
"ghcr.io/devcontainers/features/python:1.6.3": {
99
"version": "3.12.0"
1010
},
11-
"ghcr.io/devcontainers/features/aws-cli:1": {}
11+
"ghcr.io/devcontainers/features/aws-cli:1": {},
12+
"ghcr.io/devcontainers/features/docker-in-docker:2.12.0": {
13+
"version": "27.0.3",
14+
"moby": false
15+
}
1216
},
1317
"postCreateCommand": "./tools/setup.sh",
1418
"shutdownAction": "stopContainer"

.github/workflows/aws-deploy.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
role-session-name: ${{ inputs.role-session-name }}
5353
role-duration-seconds: ${{ inputs.role-duration-seconds }}
5454
- name: CDK deploy
55-
run: cdk deploy --all --require-approval never
55+
run: cdk deploy --all --concurrency 5 --require-approval never
5656
env:
5757
ENV: ${{ inputs.environment }}
5858
SECRETS: ${{ inputs.secrets-location }}

.pre-commit-config.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
ci:
2+
autoupdate_schedule: monthly
3+
14
default_language_version:
25
python: python3
36

@@ -17,7 +20,7 @@ repos:
1720
hooks:
1821
- id: yamllint
1922
- repo: https://github.com/awslabs/cfn-python-lint
20-
rev: v1.16.0
23+
rev: v1.20.1
2124
hooks:
2225
- id: cfn-python-lint
2326
args:
@@ -36,7 +39,7 @@ repos:
3639
hooks:
3740
- id: black
3841
- repo: https://github.com/sirosen/check-jsonschema
39-
rev: 0.29.3
42+
rev: 0.30.0
4043
hooks:
4144
- id: check-github-workflows
4245
- id: check-github-actions

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,17 @@ also include a Python virtual environment where all the Python packages needed
3838
are already installed.
3939

4040
If you decide the develop outside of the dev container, some of the development
41-
tools can be installed by running:
41+
tools can be installed manually by running:
4242

4343
```console
4444
./tools/setup.sh
4545
```
4646

47+
When developing outside the dev container, the following tools must be installed
48+
manually.
49+
50+
- [Docker](https://docs.docker.com/engine/install/) >= v27
51+
4752
Development requires the activation of the Python virtual environment:
4853

4954
```

app.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
import aws_cdk as cdk
2+
from aws_cdk.aws_scheduler_alpha import ScheduleExpression
23

34
from openchallenges.bucket_stack import BucketStack
45
from openchallenges.network_stack import NetworkStack
56
from openchallenges.ecs_stack import EcsStack
67
from openchallenges.service_stack import ServiceStack
78
from openchallenges.service_stack import LoadBalancedServiceStack
89
from openchallenges.load_balancer_stack import LoadBalancerStack
9-
from openchallenges.service_props import ServiceProps
10+
from openchallenges.service_props import ServiceProps, ContainerVolume
11+
from openchallenges.data_integration_stack import DataIntegrationStack
12+
from openchallenges.data_integration_props import DataIntegrationProps
1013
import openchallenges.utils as utils
1114

1215
app = cdk.App()
1316

1417
# get the environment
1518
environment = utils.get_environment()
1619
stack_name_prefix = f"openchallenges-{environment}"
17-
image_version = "0.0.11"
20+
image_version = "1.1.1"
1821

1922
# get VARS from cdk.json
2023
env_vars = app.node.try_get_context(environment)
@@ -45,6 +48,12 @@
4548
"MARIADB_PASSWORD": secrets["MARIADB_PASSWORD"],
4649
"MARIADB_ROOT_PASSWORD": secrets["MARIADB_ROOT_PASSWORD"],
4750
},
51+
container_volumes=[
52+
ContainerVolume(
53+
path="/data/db",
54+
size=30,
55+
)
56+
],
4857
)
4958

5059
mariadb_stack = ServiceStack(
@@ -297,9 +306,9 @@
297306
f"ghcr.io/sage-bionetworks/openchallenges-app:{image_version}",
298307
{
299308
"API_DOCS_URL": f"https://{fully_qualified_domain_name}/api-docs",
300-
"APP_VERSION": "1.0.0-alpha",
309+
"APP_VERSION": image_version,
301310
"CSR_API_URL": f"https://{fully_qualified_domain_name}/api/v1",
302-
"DATA_UPDATED_ON": "2024-10-11",
311+
"DATA_UPDATED_ON": "2024-11-27",
303312
"ENVIRONMENT": "production",
304313
"GOOGLE_TAG_MANAGER_ID": "GTM-NBR5XD8C",
305314
"SSR_API_URL": "http://openchallenges-api-gateway:8082/api/v1",
@@ -322,6 +331,20 @@
322331
app, f"{stack_name_prefix}-load-balancer", network_stack.vpc
323332
)
324333

334+
data_integration_props = DataIntegrationProps(
335+
schedule=ScheduleExpression.cron(
336+
minute="*/5",
337+
hour="*",
338+
day="*",
339+
month="*",
340+
time_zone=cdk.TimeZone.AMERICA_LOS_ANGELES,
341+
),
342+
schedule_description="This is a cron-based schedule that will run every 5 minutes",
343+
)
344+
data_integration_stack = DataIntegrationStack(
345+
app, f"{stack_name_prefix}-data-integration", data_integration_props
346+
)
347+
325348
api_docs_props = ServiceProps(
326349
"openchallenges-api-docs",
327350
8010,
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
FROM ghcr.io/sage-bionetworks/sandbox-lambda-python:sha-b38dc22
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
from aws_cdk import aws_iam as iam
2+
from aws_cdk import aws_lambda as lambda_
3+
from constructs import Construct
4+
5+
6+
class DataIntegrationLambda(Construct):
7+
"""
8+
A CDK construct to define an AWS Lambda function for data integration.
9+
10+
This construct creates an IAM role with the necessary permissions and a Docker-based
11+
Lambda function for handling data integration tasks.
12+
"""
13+
14+
def __init__(self, scope: Construct, id: str) -> None:
15+
"""
16+
Builds the IAM role for the Lambda function.
17+
18+
This role allows the Lambda function to execute basic AWS operations.
19+
20+
Returns:
21+
iam.Role: The IAM role for the Lambda function.
22+
"""
23+
super().__init__(scope, id)
24+
25+
self.lambda_role = self._build_lambda_role()
26+
self.lambda_function = self._build_lambda_function(self.lambda_role)
27+
28+
def _build_lambda_role(self) -> iam.Role:
29+
return iam.Role(
30+
self,
31+
"LambdaRole",
32+
assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"),
33+
managed_policies=[
34+
iam.ManagedPolicy.from_aws_managed_policy_name(
35+
managed_policy_name=("service-role/AWSLambdaBasicExecutionRole")
36+
)
37+
],
38+
)
39+
40+
def _build_lambda_function(self, role: iam.Role) -> lambda_.Function:
41+
"""
42+
Builds the Docker-based AWS Lambda function.
43+
44+
The Lambda function uses a Docker image built from a local directory.
45+
46+
Args:
47+
role (iam.Role): The IAM role to associate with the Lambda function.
48+
49+
Returns:
50+
_lambda.Function: The Docker-based AWS Lambda function.
51+
"""
52+
return lambda_.DockerImageFunction(
53+
self,
54+
"LambdaFunction",
55+
code=lambda_.DockerImageCode.from_image_asset(
56+
# Directory relative to where you execute cdk deploy contains a
57+
# Dockerfile with build instructions.
58+
directory="cdk_docker/data-integration-lambda"
59+
),
60+
role=role,
61+
memory_size=128,
62+
)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from dataclasses import dataclass
2+
from aws_cdk.aws_scheduler_alpha import ScheduleExpression
3+
4+
5+
@dataclass
6+
class DataIntegrationProps:
7+
"""
8+
Data integration properties.
9+
10+
Attributes:
11+
schedule (ScheduleExpression): The schedule for triggering the data integration.
12+
schedule_description (str): The description of the schedule.
13+
"""
14+
15+
schedule: ScheduleExpression
16+
"""The schedule for triggering the data integration."""
17+
18+
schedule_description: str
19+
"""The description of the schedule."""
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import aws_cdk as cdk
2+
from aws_cdk import (
3+
aws_scheduler_alpha as scheduler_alpha,
4+
aws_scheduler_targets_alpha as scheduler_targets,
5+
)
6+
from openchallenges.data_integration_lambda import DataIntegrationLambda
7+
from openchallenges.data_integration_props import DataIntegrationProps
8+
from constructs import Construct
9+
10+
11+
class DataIntegrationStack(cdk.Stack):
12+
"""
13+
Defines an AWS CDK stack for data integration.
14+
15+
This stack sets up the resources required for scheduling and executing
16+
data integration tasks using AWS Lambda and EventBridge Scheduler.
17+
18+
The stack includes:
19+
- A Lambda function for data integration.
20+
- An EventBridge Scheduler schedule to trigger the Lambda function.
21+
- An EventBridge Scheduler group for organizing schedules.
22+
23+
Attributes:
24+
scope (Construct): The parent construct.
25+
id (str): The unique identifier for this stack.
26+
props (DataIntegrationProps): The properties for the data integration, including the schedule.
27+
"""
28+
29+
def __init__(
30+
self, scope: Construct, id: str, props: DataIntegrationProps, **kwargs
31+
) -> None:
32+
"""
33+
Initializes the DataIntegrationStack.
34+
35+
Arguments:
36+
scope (Construct): The parent construct for this stack.
37+
id (str): The unique identifier for this stack.
38+
props (DataIntegrationProps): The properties required for data integration,
39+
including the schedule.
40+
**kwargs: Additional arguments passed to the base `cdk.Stack` class.
41+
"""
42+
super().__init__(scope, id, **kwargs)
43+
44+
data_integration_lambda = DataIntegrationLambda(self, "data-integration-lambda")
45+
46+
target = scheduler_targets.LambdaInvoke(
47+
data_integration_lambda.lambda_function,
48+
input=scheduler_alpha.ScheduleTargetInput.from_object({}),
49+
)
50+
51+
# Create a group for the schedule (maybe we want to add more schedules
52+
# to this group the future)
53+
schedule_group = scheduler_alpha.Group(
54+
self,
55+
"group",
56+
group_name="schedule-group",
57+
)
58+
59+
scheduler_alpha.Schedule(
60+
self,
61+
"schedule",
62+
schedule=props.schedule,
63+
target=target,
64+
group=schedule_group,
65+
description=props.schedule_description,
66+
)

openchallenges/service_props.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,25 @@
1+
from dataclasses import dataclass
2+
from typing import List
3+
14
CONTAINER_LOCATION_PATH_ID = "path://"
25

36

7+
@dataclass
8+
class ContainerVolume:
9+
"""
10+
Holds onto configuration for a volume used in the container.
11+
12+
Attributes:
13+
path: The path on the container to mount the host volume at.
14+
size: The size of the volume in GiB.
15+
read_only: Container has read-only access to the volume, set to `false` for write access.
16+
"""
17+
18+
path: str
19+
size: int = 15
20+
read_only: bool = False
21+
22+
423
class ServiceProps:
524
"""
625
ECS service properties
@@ -13,6 +32,7 @@ class ServiceProps:
1332
supports docker registry references (i.e. ghcr.io/sage-bionetworks/openchallenges-thumbor:latest)
1433
container_env_vars: a json dictionary of environment variables to pass into the container
1534
i.e. {"EnvA": "EnvValueA", "EnvB": "EnvValueB"}
35+
container_volumes: List of `ContainerVolume` resources to mount into the container
1636
"""
1737

1838
def __init__(
@@ -22,6 +42,7 @@ def __init__(
2242
container_memory: int,
2343
container_location: str,
2444
container_env_vars: dict,
45+
container_volumes: List[ContainerVolume] = None,
2546
) -> None:
2647
self.container_name = container_name
2748
self.container_port = container_port
@@ -32,3 +53,7 @@ def __init__(
3253
)
3354
self.container_location = container_location
3455
self.container_env_vars = container_env_vars
56+
if container_volumes is None:
57+
self.container_volumes = []
58+
else:
59+
self.container_volumes = container_volumes

0 commit comments

Comments
 (0)