Skip to content

Commit 81385ec

Browse files
feat(wanda): implement artifact copying from built images
Extract artifacts from built images using `docker cp`. This approach is cross-platform reliable on Windows. - Creates container from image without starting it - Copies each artifact using `docker cp` - Cleans up container when done - Optional artifacts log warning instead of failing - Extraction runs for root spec only, even on cache hit - Logs extraction duration for performance monitoring Topic: wanda-artifact-copy Relative: wanda-artifact-spec Labels: draft Signed-off-by: andrew <andrew@anyscale.com>
1 parent 25c63e1 commit 81385ec

11 files changed

+349
-2
lines changed

wanda/docker_cmd.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,28 @@ func (c *dockerCmd) tag(src, asTag string) error {
126126
return c.run("tag", src, asTag)
127127
}
128128

129+
// createContainer creates a container from an image without starting it.
130+
// Returns the container ID.
131+
func (c *dockerCmd) createContainer(image string) (string, error) {
132+
cmd := c.cmd("create", image)
133+
buf := new(bytes.Buffer)
134+
cmd.Stdout = buf
135+
if err := cmd.Run(); err != nil {
136+
return "", err
137+
}
138+
return strings.TrimSpace(buf.String()), nil
139+
}
140+
141+
// copyFromContainer copies a file or directory from a container to the host.
142+
func (c *dockerCmd) copyFromContainer(containerID, src, dst string) error {
143+
return c.run("cp", containerID+":"+src, dst)
144+
}
145+
146+
// removeContainer removes a container.
147+
func (c *dockerCmd) removeContainer(containerID string) error {
148+
return c.run("rm", containerID)
149+
}
150+
129151
func (c *dockerCmd) build(in *buildInput, core *buildInputCore, hints *buildInputHints) error {
130152
if hints == nil {
131153
hints = newBuildInputHints(nil, nil)

wanda/docker_cmd_test.go

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
package wanda
22

33
import (
4-
"testing"
5-
4+
"os"
5+
"path/filepath"
66
"strings"
7+
"testing"
78

89
"github.com/google/go-containerregistry/pkg/name"
910
"github.com/google/go-containerregistry/pkg/v1/daemon"
@@ -135,3 +136,80 @@ func TestDockerCmdBuild_withHints(t *testing.T) {
135136
t.Errorf("MESSAGE env got %q, want `MESSAGE=hint message`", messageEnv)
136137
}
137138
}
139+
140+
func TestDockerCmdCopyFromContainer(t *testing.T) {
141+
cmd := newDockerCmd(&dockerCmdConfig{})
142+
143+
const testImage = "alpine:latest"
144+
145+
if err := cmd.run("pull", testImage); err != nil {
146+
t.Fatalf("pull image: %v", err)
147+
}
148+
149+
containerID, err := cmd.createContainer(testImage)
150+
if err != nil {
151+
t.Fatalf("createContainer: %v", err)
152+
}
153+
defer cmd.removeContainer(containerID)
154+
155+
tmpDir := t.TempDir()
156+
157+
// Copy a known file from the container
158+
if err := cmd.copyFromContainer(containerID, "/etc/alpine-release", filepath.Join(tmpDir, "alpine-release")); err != nil {
159+
t.Fatalf("copyFromContainer: %v", err)
160+
}
161+
162+
if _, err := os.Stat(filepath.Join(tmpDir, "alpine-release")); os.IsNotExist(err) {
163+
t.Error("alpine-release was not copied")
164+
}
165+
}
166+
167+
func TestDockerCmdCopyFromContainer_directory(t *testing.T) {
168+
cmd := newDockerCmd(&dockerCmdConfig{})
169+
170+
const testImage = "alpine:latest"
171+
172+
if err := cmd.run("pull", testImage); err != nil {
173+
t.Fatalf("pull image: %v", err)
174+
}
175+
176+
containerID, err := cmd.createContainer(testImage)
177+
if err != nil {
178+
t.Fatalf("createContainer: %v", err)
179+
}
180+
defer cmd.removeContainer(containerID)
181+
182+
tmpDir := t.TempDir()
183+
184+
// Copy a directory from the container
185+
if err := cmd.copyFromContainer(containerID, "/etc", filepath.Join(tmpDir, "etc")); err != nil {
186+
t.Fatalf("copyFromContainer: %v", err)
187+
}
188+
189+
if _, err := os.Stat(filepath.Join(tmpDir, "etc", "alpine-release")); os.IsNotExist(err) {
190+
t.Error("alpine-release was not copied from /etc directory")
191+
}
192+
}
193+
194+
func TestDockerCmdCopyFromContainer_notFound(t *testing.T) {
195+
cmd := newDockerCmd(&dockerCmdConfig{})
196+
197+
const testImage = "alpine:latest"
198+
199+
if err := cmd.run("pull", testImage); err != nil {
200+
t.Fatalf("pull image: %v", err)
201+
}
202+
203+
containerID, err := cmd.createContainer(testImage)
204+
if err != nil {
205+
t.Fatalf("createContainer: %v", err)
206+
}
207+
defer cmd.removeContainer(containerID)
208+
209+
tmpDir := t.TempDir()
210+
211+
// Copying a non-existent file should fail
212+
if err := cmd.copyFromContainer(containerID, "/nonexistent/file", filepath.Join(tmpDir, "file")); err == nil {
213+
t.Error("copyFromContainer should fail for non-existent file")
214+
}
215+
}

wanda/forge.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"path/filepath"
88
"runtime"
99
"strings"
10+
"time"
1011

1112
"github.com/google/go-containerregistry/pkg/authn"
1213
cranename "github.com/google/go-containerregistry/pkg/name"
@@ -69,6 +70,17 @@ func Build(specFile string, config *ForgeConfig) error {
6970
}
7071
}
7172

73+
// Extract artifacts only for the root spec.
74+
if config.ArtifactsDir != "" {
75+
rootSpec := graph.Specs[graph.Root].Spec
76+
if len(rootSpec.Artifacts) > 0 {
77+
rootTag := forge.workTag(rootSpec.Name)
78+
if err := forge.ExtractArtifacts(rootSpec, rootTag); err != nil {
79+
return fmt.Errorf("extract artifacts: %w", err)
80+
}
81+
}
82+
}
83+
7284
return nil
7385
}
7486

@@ -182,6 +194,66 @@ func (f *Forge) resolveBases(froms []string) (map[string]*imageSource, error) {
182194
return m, nil
183195
}
184196

197+
// ExtractArtifacts extracts artifacts from a built image to the artifacts directory.
198+
// NOTE(andrew-anyscale): We use individual `docker cp` commands rather than
199+
// `docker export | tar` or any other potentially faster method currently because
200+
// docker cp takes care of many cross-platform headache inducing issues.
201+
// If this becomes a bottleneck, we can consider using a different approach.
202+
func (f *Forge) ExtractArtifacts(spec *Spec, imageTag string) error {
203+
d := f.newDockerCmd()
204+
artifactsDir := f.config.ArtifactsDir
205+
206+
if err := os.MkdirAll(artifactsDir, 0755); err != nil {
207+
return fmt.Errorf("create artifacts dir: %w", err)
208+
}
209+
210+
log.Printf("extracting %d artifact(s) from %s", len(spec.Artifacts), imageTag)
211+
extractStart := time.Now()
212+
213+
containerID, err := d.createContainer(imageTag)
214+
if err != nil {
215+
return fmt.Errorf("create container: %w", err)
216+
}
217+
defer func() {
218+
if err := d.removeContainer(containerID); err != nil {
219+
log.Printf("warning: failed to remove container %s: %v", containerID, err)
220+
}
221+
}()
222+
223+
for _, a := range spec.Artifacts {
224+
dst := resolveArtifactDst(a.Dst, artifactsDir)
225+
log.Printf(" %s -> %s", a.Src, dst)
226+
227+
// Create parent directory for the destination.
228+
dstDir := filepath.Dir(dst)
229+
if strings.HasSuffix(a.Dst, "/") {
230+
dstDir = dst
231+
}
232+
if err := os.MkdirAll(dstDir, 0755); err != nil {
233+
return fmt.Errorf("create dir for artifact %s: %w", a.Dst, err)
234+
}
235+
236+
if err := d.copyFromContainer(containerID, a.Src, dst); err != nil {
237+
if a.Optional {
238+
log.Printf("warning: optional artifact not found: %s", a.Src)
239+
continue
240+
}
241+
return fmt.Errorf("copy artifact %s: %w", a.Src, err)
242+
}
243+
}
244+
245+
log.Printf("extraction completed in %v", time.Since(extractStart))
246+
return nil
247+
}
248+
249+
// resolveArtifactDst resolves the destination path for an artifact.
250+
func resolveArtifactDst(dst, artifactsDir string) string {
251+
if filepath.IsAbs(dst) {
252+
return dst
253+
}
254+
return filepath.Join(artifactsDir, dst)
255+
}
256+
185257
// Build builds a container image from the given specification.
186258
func (f *Forge) Build(spec *Spec) error {
187259
// Prepare the tar stream.
@@ -282,6 +354,7 @@ func (f *Forge) Build(spec *Spec) error {
282354
}
283355
}
284356
}
357+
285358
return nil // and we are done.
286359
}
287360
}

wanda/forge_config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ type ForgeConfig struct {
1515
Epoch string
1616
WandaSpecsFile string
1717
EnvFile string
18+
ArtifactsDir string
1819

1920
RayCI bool
2021
Rebuild bool

wanda/forge_test.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,3 +846,137 @@ func TestBuild_EnvfileCacheInvalidation(t *testing.T) {
846846
t.Errorf("expected cache miss on changed envfile, got %d hits", forge2.cacheHit())
847847
}
848848
}
849+
850+
func TestForgeConfigArtifactsDir(t *testing.T) {
851+
config := &ForgeConfig{ArtifactsDir: "/custom/artifacts", WorkDir: "/work"}
852+
if got := config.ArtifactsDir; got != "/custom/artifacts" {
853+
t.Errorf("ArtifactsDir = %q, want %q", got, "/custom/artifacts")
854+
}
855+
}
856+
857+
func TestBuild_WithArtifacts_exact(t *testing.T) {
858+
tmpDir := t.TempDir()
859+
artifactsDir := filepath.Join(tmpDir, "artifacts")
860+
861+
config := &ForgeConfig{
862+
WorkDir: "testdata",
863+
NamePrefix: "cr.ray.io/rayproject/",
864+
ArtifactsDir: artifactsDir,
865+
}
866+
867+
if err := Build("testdata/artifact-exact.wanda.yaml", config); err != nil {
868+
t.Fatalf("build: %v", err)
869+
}
870+
871+
extractedFile := filepath.Join(artifactsDir, "bin/myapp")
872+
content, err := os.ReadFile(extractedFile)
873+
if err != nil {
874+
t.Fatalf("read extracted file: %v", err)
875+
}
876+
877+
want := "binary-content\n"
878+
if got := string(content); got != want {
879+
t.Errorf("extracted content = %q, want %q", got, want)
880+
}
881+
}
882+
883+
func TestBuild_WithArtifacts_optional(t *testing.T) {
884+
tmpDir := t.TempDir()
885+
artifactsDir := filepath.Join(tmpDir, "artifacts")
886+
887+
config := &ForgeConfig{
888+
WorkDir: "testdata",
889+
NamePrefix: "cr.ray.io/rayproject/",
890+
ArtifactsDir: artifactsDir,
891+
}
892+
893+
// Build should succeed even though the optional artifact doesn't exist
894+
if err := Build("testdata/artifact-optional.wanda.yaml", config); err != nil {
895+
t.Fatalf("build: %v", err)
896+
}
897+
898+
// Required artifact should be extracted
899+
extractedFile := filepath.Join(artifactsDir, "bin/myapp")
900+
content, err := os.ReadFile(extractedFile)
901+
if err != nil {
902+
t.Fatalf("read extracted file: %v", err)
903+
}
904+
905+
want := "binary-content\n"
906+
if got := string(content); got != want {
907+
t.Errorf("extracted content = %q, want %q", got, want)
908+
}
909+
910+
// Optional artifact should not exist (since source doesn't exist)
911+
optionalFile := filepath.Join(artifactsDir, "optional.txt")
912+
if _, err := os.Stat(optionalFile); !os.IsNotExist(err) {
913+
t.Errorf("optional file should not exist, but got err: %v", err)
914+
}
915+
}
916+
917+
func TestBuild_WithArtifacts_rootOnly(t *testing.T) {
918+
wandaSpecs := filepath.Join(t.TempDir(), ".wandaspecs")
919+
absTestdata, err := filepath.Abs("testdata")
920+
if err != nil {
921+
t.Fatalf("abs testdata: %v", err)
922+
}
923+
if err := os.WriteFile(wandaSpecs, []byte(absTestdata), 0644); err != nil {
924+
t.Fatalf("write wandaspecs: %v", err)
925+
}
926+
927+
tmpDir := t.TempDir()
928+
artifactsDir := filepath.Join(tmpDir, "artifacts")
929+
930+
config := &ForgeConfig{
931+
WorkDir: "testdata",
932+
NamePrefix: "cr.ray.io/rayproject/",
933+
WandaSpecsFile: wandaSpecs,
934+
ArtifactsDir: artifactsDir,
935+
}
936+
937+
if err := Build("testdata/artifact-dep-top.wanda.yaml", config); err != nil {
938+
t.Fatalf("build with deps: %v", err)
939+
}
940+
941+
topFile := filepath.Join(artifactsDir, "top.txt")
942+
if _, err := os.Stat(topFile); os.IsNotExist(err) {
943+
t.Error("root spec artifact should have been extracted")
944+
}
945+
946+
depDocsFile := filepath.Join(artifactsDir, "docs/readme.md")
947+
if _, err := os.Stat(depDocsFile); !os.IsNotExist(err) {
948+
t.Error("dependency artifact should NOT have been extracted")
949+
}
950+
}
951+
952+
func TestBuild_WithArtifacts_cacheHit(t *testing.T) {
953+
tmpDir := t.TempDir()
954+
artifactsDir1 := filepath.Join(tmpDir, "artifacts1")
955+
artifactsDir2 := filepath.Join(tmpDir, "artifacts2")
956+
957+
config := &ForgeConfig{
958+
WorkDir: "testdata",
959+
NamePrefix: "cr.ray.io/rayproject/",
960+
ArtifactsDir: artifactsDir1,
961+
}
962+
963+
if err := Build("testdata/artifact-exact.wanda.yaml", config); err != nil {
964+
t.Fatalf("first build: %v", err)
965+
}
966+
967+
extractedFile1 := filepath.Join(artifactsDir1, "bin/myapp")
968+
if _, err := os.Stat(extractedFile1); os.IsNotExist(err) {
969+
t.Fatal("first build should have extracted artifact")
970+
}
971+
972+
config.ArtifactsDir = artifactsDir2
973+
974+
if err := Build("testdata/artifact-exact.wanda.yaml", config); err != nil {
975+
t.Fatalf("second build: %v", err)
976+
}
977+
978+
extractedFile2 := filepath.Join(artifactsDir2, "bin/myapp")
979+
if _, err := os.Stat(extractedFile2); os.IsNotExist(err) {
980+
t.Error("artifact should be extracted even on cache hit")
981+
}
982+
}

wanda/testdata/Dockerfile.artifact

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
FROM alpine:latest
2+
3+
RUN mkdir -p /build/dist /build/docs /app/bin \
4+
&& echo "wheel-content" > /build/dist/mypackage-1.0.0.whl \
5+
&& echo "wheel-content2" > /build/dist/mypackage-1.0.1.whl \
6+
&& echo "docs-content" > /build/docs/README.md \
7+
&& echo "binary-content" > /app/bin/myapp \
8+
&& chmod +x /app/bin/myapp
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
ARG BASE
2+
FROM ${BASE}
3+
4+
RUN echo "top-layer-file" > /top-layer.txt
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
name: artifact-dep-base
2+
dockerfile: Dockerfile.artifact
3+
artifacts:
4+
- src: /build/docs/README.md
5+
dst: docs/readme.md
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
name: artifact-dep-top
2+
dockerfile: Dockerfile.artifact-top
3+
froms:
4+
- cr.ray.io/rayproject/artifact-dep-base
5+
build_args:
6+
- BASE=cr.ray.io/rayproject/artifact-dep-base
7+
artifacts:
8+
- src: /top-layer.txt
9+
dst: top.txt

0 commit comments

Comments
 (0)