Skip to content

Commit 53eaafe

Browse files
authored
feat(migration): Enable using AWS CodeArtifact as maven mirror in Migration agent (#58)
1 parent 2c76cff commit 53eaafe

5 files changed

Lines changed: 297 additions & 2 deletions

File tree

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Dockerfile for the Migration agent, configured to use an AWS CodeArtifact
2+
# mirror of Maven Central instead of hitting repo.maven.apache.org directly.
3+
# This eliminates the HTTP 429 rate-limit failures seen under high-concurrency
4+
# RL training.
5+
#
6+
# Prerequisites (run once outside this Dockerfile):
7+
# aws codeartifact create-domain --domain migration-aws-maven-mirror --region us-west-2
8+
# aws codeartifact create-repository --domain migration-aws-maven-mirror --repository maven-central-cache --region us-west-2
9+
# aws codeartifact associate-external-connection \
10+
# --domain migration-aws-maven-mirror --repository maven-central-cache \
11+
# --external-connection public:maven-central --region us-west-2
12+
# # Grant the AgentCoreRuntime IAM role codeartifact:GetAuthorizationToken
13+
# # and codeartifact:ReadFromRepository:
14+
# aws iam put-role-policy \
15+
# --role-name AgentCoreRuntime \
16+
# --policy-name CodeArtifactReadAccess \
17+
# --policy-document '{
18+
# "Version": "2012-10-17",
19+
# "Statement": [
20+
# {
21+
# "Sid": "CodeArtifactRead",
22+
# "Effect": "Allow",
23+
# "Action": [
24+
# "codeartifact:GetAuthorizationToken",
25+
# "codeartifact:ReadFromRepository",
26+
# "codeartifact:GetRepositoryEndpoint"
27+
# ],
28+
# "Resource": "*"
29+
# },
30+
# {
31+
# "Sid": "STSServiceBearerToken",
32+
# "Effect": "Allow",
33+
# "Action": "sts:GetServiceBearerToken",
34+
# "Resource": "*",
35+
# "Condition": {
36+
# "StringEquals": {
37+
# "sts:AWSServiceName": "codeartifact.amazonaws.com"
38+
# }
39+
# }
40+
# }
41+
# ]
42+
# }'
43+
44+
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
45+
WORKDIR /app
46+
47+
# All environment variables in one layer
48+
ENV UV_SYSTEM_PYTHON=1 \
49+
UV_COMPILE_BYTECODE=1 \
50+
UV_NO_PROGRESS=1 \
51+
PYTHONUNBUFFERED=1 \
52+
DOCKER_CONTAINER=1 \
53+
AWS_REGION=us-west-2 \
54+
AWS_DEFAULT_REGION=us-west-2 \
55+
CODEARTIFACT_DOMAIN=migration-aws-maven-mirror \
56+
CODEARTIFACT_DOMAIN_OWNER={your-aws-account-number}
57+
58+
# ---------- Java code migration specific requirements ----------
59+
# 1. Install Java 17 as root user
60+
RUN apt-get update && \
61+
apt-get install -y openjdk-17-jdk && \
62+
apt-get clean && \
63+
rm -rf /var/lib/apt/lists/*
64+
65+
# Verify Java installation
66+
RUN echo "=== Java Installation Verification ===" && \
67+
java --version && \
68+
which java
69+
70+
# 2. Install maven
71+
# install tools needed for Maven first
72+
RUN apt-get update && \
73+
apt-get install -y curl unzip && \
74+
apt-get clean && \
75+
rm -rf /var/lib/apt/lists/*
76+
77+
RUN curl -O https://archive.apache.org/dist/maven/maven-3/3.9.6/binaries/apache-maven-3.9.6-bin.zip && \
78+
unzip apache-maven-3.9.6-bin.zip -d /opt/ && \
79+
rm apache-maven-3.9.6-bin.zip && \
80+
ln -s /opt/apache-maven-3.9.6 /opt/maven
81+
82+
# Set Maven environment variables
83+
ENV MAVEN_HOME=/opt/maven
84+
ENV PATH=$MAVEN_HOME/bin:$PATH
85+
86+
RUN echo "=== Maven Installation Verification ===" && mvn --version
87+
88+
# 3. Install Node.js (prevents some frontend plugins from downloading it
89+
# at runtime, which incurs latency & introduces spammy logs)
90+
RUN apt-get update && \
91+
apt-get install -y --no-install-recommends nodejs && \
92+
apt-get clean && \
93+
rm -rf /var/lib/apt/lists/*
94+
95+
# 4. Install git
96+
RUN apt-get update && \
97+
apt-get install -y git && \
98+
apt-get clean && \
99+
rm -rf /var/lib/apt/lists/* && \
100+
git config --system --add safe.directory '*' && \
101+
git config --system user.email "no-reply@amazon.com" && \
102+
git config --system user.name "NoReply Amazon"
103+
104+
# 5. Install AWS CLI v2 (needed at runtime to fetch CodeArtifact auth tokens).
105+
# The uv base image doesn't include it.
106+
RUN ARCH=$(uname -m) && \
107+
case "$ARCH" in \
108+
x86_64) AWS_URL="https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" ;; \
109+
aarch64) AWS_URL="https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" ;; \
110+
*) echo "Unsupported architecture: $ARCH"; exit 1 ;; \
111+
esac && \
112+
curl -sSL "$AWS_URL" -o /tmp/awscliv2.zip && \
113+
unzip -q /tmp/awscliv2.zip -d /tmp && \
114+
/tmp/aws/install && \
115+
rm -rf /tmp/awscliv2.zip /tmp/aws && \
116+
aws --version
117+
118+
# ----------
119+
120+
COPY . .
121+
# Install local toolkit from build context, then install example deps
122+
RUN --mount=type=bind,from=toolkit,source=.,target=/toolkit \
123+
uv pip install /toolkit && uv pip install .
124+
125+
126+
127+
RUN uv pip install aws-opentelemetry-distro==0.12.2
128+
129+
# 6. Install Maven settings.xml that mirrors all traffic through AWS CodeArtifact.
130+
# Placed at two locations:
131+
# - /opt/maven/conf/settings.xml (global, used by any user running mvn)
132+
# - /home/bedrock_agentcore/.m2/settings.xml (per-user, explicit)
133+
# Both point to the same file; Maven reads the user location if present,
134+
# otherwise the global.
135+
COPY maven-settings.xml /opt/maven/conf/settings.xml
136+
137+
# 7. Install entrypoint script that fetches a CodeArtifact auth token at
138+
# container startup. No in-container refresh loop is needed because
139+
# AgentCore spawns a fresh container per session (<= 30min lifetime),
140+
# always within a single 12h token TTL. Future containers spawned hours
141+
# or days later will fetch their own fresh tokens at their own startup.
142+
COPY entrypoint.sh /app/entrypoint.sh
143+
RUN chmod +x /app/entrypoint.sh
144+
145+
# 8. Create non-root user and set up their Maven config.
146+
# The symlink points the user-level settings at the global one so there's
147+
# only one file to maintain.
148+
RUN useradd -m -u 1000 bedrock_agentcore && \
149+
mkdir -p /home/bedrock_agentcore/.m2 && \
150+
ln -sf /opt/maven/conf/settings.xml /home/bedrock_agentcore/.m2/settings.xml && \
151+
chown -R bedrock_agentcore:bedrock_agentcore /home/bedrock_agentcore
152+
153+
USER bedrock_agentcore
154+
155+
EXPOSE 9000
156+
EXPOSE 8000
157+
EXPOSE 8080
158+
159+
160+
# Use the full module path
161+
# Entrypoint obtains the CodeArtifact auth token, starts a background
162+
# token-refresh loop, then execs the original agent command.
163+
CMD ["/app/entrypoint.sh"]

examples/strands_migration_agent/Dockerfile renamed to examples/strands_migration_agent/.bedrock_agentcore/public_maven/Dockerfile

File renamed without changes.

examples/strands_migration_agent/README.md

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ Build the docker image:
168168
docker buildx build \
169169
--build-context toolkit=$TOOLKIT_ROOT \
170170
-t migration:dev --load \
171-
-f $MIGRATION_DIR/Dockerfile \
171+
-f $MIGRATION_DIR/.bedrock_agentcore/public_maven/Dockerfile \
172172
$MIGRATION_DIR
173173
```
174174

@@ -207,12 +207,57 @@ cp .env.example .env
207207
# Make sure this is configured (e.g., run `aws configure`) before proceeding.
208208

209209
./scripts/build_docker_image_and_push_to_ecr.sh \
210-
--dockerfile=$MIGRATION_DIR/Dockerfile \
210+
--dockerfile=$MIGRATION_DIR/.bedrock_agentcore/public_maven/Dockerfile \
211211
--tag=dev \
212212
--context=$MIGRATION_DIR \
213213
--additional-context=toolkit=$TOOLKIT_ROOT
214214
```
215215

216+
### Maven Mirror
217+
218+
The docker file `$MIGRATION_DIR/.bedrock_agentcore/public_maven/Dockerfile` in above commands uses public maven [source](https://repo.maven.apache.org/) to download Java dependencies. While it works reliably at most scenarios, we found sometimes in RL training, public maven source may restrict Internet acess as too many AgentCore Runtime sessions are downloading from maven at the same time, causing these sessions fail due to timeout. If you meet the same issue, please use the docker file `$MIGRATION_DIR/.bedrock_agentcore/aws_maven_mirror/Dockerfile` to build Migration agent instead. It creates a maven download mirror source at AWS CodeArtifact, which caches all downloaded Java dependencies so AgentCore Runtime sessions can directly fetch them instead of always downloading them from public maven.
219+
220+
First run the following commands to setup your AWS CodeArtifact repo for maven mirror source:
221+
```bash
222+
aws codeartifact create-domain --domain migration-aws-maven-mirror --region us-west-2
223+
aws codeartifact create-repository --domain migration-aws-maven-mirror --repository maven-central-cache --region us-west-2
224+
aws codeartifact associate-external-connection \
225+
--domain migration-aws-maven-mirror --repository maven-central-cache \
226+
--external-connection public:maven-central --region us-west-2
227+
# Grant the AgentCoreRuntime IAM role codeartifact:GetAuthorizationToken
228+
# and codeartifact:ReadFromRepository:
229+
aws iam put-role-policy \
230+
--role-name AgentCoreRuntime \
231+
--policy-name CodeArtifactReadAccess \
232+
--policy-document '{
233+
"Version": "2012-10-17",
234+
"Statement": [
235+
{
236+
"Sid": "CodeArtifactRead",
237+
"Effect": "Allow",
238+
"Action": [
239+
"codeartifact:GetAuthorizationToken",
240+
"codeartifact:ReadFromRepository",
241+
"codeartifact:GetRepositoryEndpoint"
242+
],
243+
"Resource": "*"
244+
},
245+
{
246+
"Sid": "STSServiceBearerToken",
247+
"Effect": "Allow",
248+
"Action": "sts:GetServiceBearerToken",
249+
"Resource": "*",
250+
"Condition": {
251+
"StringEquals": {
252+
"sts:AWSServiceName": "codeartifact.amazonaws.com"
253+
}
254+
}
255+
}
256+
]
257+
}'
258+
```
259+
Then replace all `{your-aws-account-number}` in maven-setting.xml, entrypoint.sh and .bedrock_agentcore/aws_maven_mirror/Dockerfile with your AWS account number, finally follow the same commands but changing the docker file path in above sections to build Migration agent.
260+
216261
## Deploy
217262

218263
Create your `config.toml` file and fill in the `[agentcore]` section:
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/bin/bash
2+
# Container entrypoint: obtain a CodeArtifact auth token, then exec the agent.
3+
4+
set -e
5+
6+
DOMAIN="${CODEARTIFACT_DOMAIN:-migration-aws-maven-mirror}"
7+
DOMAIN_OWNER="${CODEARTIFACT_DOMAIN_OWNER:-{your-aws-account-number}}"
8+
REGION="${AWS_REGION:-us-west-2}"
9+
10+
echo "$(date -Iseconds) [entrypoint] fetching CodeArtifact auth token (domain=${DOMAIN}, region=${REGION})" >&2
11+
12+
# Fetch token. If this fails, Maven will get 401/403 from CodeArtifact and
13+
# all builds in the session will fail. A hard exit here surfaces the problem
14+
# early rather than causing confusing Maven errors later.
15+
if ! CODEARTIFACT_AUTH_TOKEN=$(aws codeartifact get-authorization-token \
16+
--domain "${DOMAIN}" \
17+
--domain-owner "${DOMAIN_OWNER}" \
18+
--region "${REGION}" \
19+
--query authorizationToken \
20+
--output text); then
21+
echo "$(date -Iseconds) [entrypoint] ERROR: failed to obtain CodeArtifact token" >&2
22+
echo "$(date -Iseconds) [entrypoint] Check that the runtime IAM role has codeartifact:GetAuthorizationToken permission" >&2
23+
echo "$(date -Iseconds) [entrypoint] and that CodeArtifact domain '${DOMAIN}' exists in account ${DOMAIN_OWNER}" >&2
24+
exit 1
25+
fi
26+
export CODEARTIFACT_AUTH_TOKEN
27+
28+
echo "$(date -Iseconds) [entrypoint] token obtained, launching agent" >&2
29+
exec opentelemetry-instrument python -m rl_app
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
Maven settings pointing all Maven traffic at an AWS CodeArtifact mirror.
4+
CodeArtifact caches Maven Central artifacts inside AWS, eliminating the
5+
public Maven Central per-IP HTTP 429 rate limit.
6+
Prerequisites and AWS CLI setup commands are in the aws_maven_mirror
7+
Dockerfile header; see also entrypoint.sh which populates
8+
CODEARTIFACT_AUTH_TOKEN at container startup.
9+
-->
10+
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
11+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
12+
xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">
13+
14+
<servers>
15+
<server>
16+
<id>aws-codeartifact</id>
17+
<username>aws</username>
18+
<password>${env.CODEARTIFACT_AUTH_TOKEN}</password>
19+
</server>
20+
</servers>
21+
22+
<mirrors>
23+
<mirror>
24+
<id>aws-codeartifact</id>
25+
<name>AWS CodeArtifact Maven Central cache</name>
26+
<!-- Update domain/repo/account/region here if you use different names -->
27+
<url>https://migration-aws-maven-mirror-{your-aws-account-number}.d.codeartifact.us-west-2.amazonaws.com/maven/maven-central-cache/</url>
28+
<mirrorOf>*</mirrorOf>
29+
</mirror>
30+
</mirrors>
31+
32+
<profiles>
33+
<profile>
34+
<id>aws-codeartifact</id>
35+
<repositories>
36+
<repository>
37+
<id>aws-codeartifact</id>
38+
<url>https://migration-aws-maven-mirror-{your-aws-account-number}.d.codeartifact.us-west-2.amazonaws.com/maven/maven-central-cache/</url>
39+
<releases><enabled>true</enabled></releases>
40+
<snapshots><enabled>true</enabled></snapshots>
41+
</repository>
42+
</repositories>
43+
<pluginRepositories>
44+
<pluginRepository>
45+
<id>aws-codeartifact</id>
46+
<url>https://migration-aws-maven-mirror-{your-aws-account-number}.d.codeartifact.us-west-2.amazonaws.com/maven/maven-central-cache/</url>
47+
<releases><enabled>true</enabled></releases>
48+
<snapshots><enabled>true</enabled></snapshots>
49+
</pluginRepository>
50+
</pluginRepositories>
51+
</profile>
52+
</profiles>
53+
54+
<activeProfiles>
55+
<activeProfile>aws-codeartifact</activeProfile>
56+
</activeProfiles>
57+
58+
</settings>

0 commit comments

Comments
 (0)