diff --git a/wanda/docker_cmd.go b/wanda/docker_cmd.go index 51fba272..6ff1ea5d 100644 --- a/wanda/docker_cmd.go +++ b/wanda/docker_cmd.go @@ -1,9 +1,11 @@ package wanda import ( + "archive/tar" "bytes" "encoding/json" "fmt" + "io" "log" "os" "os/exec" @@ -126,6 +128,68 @@ func (c *dockerCmd) tag(src, asTag string) error { return c.run("tag", src, asTag) } +// createContainer creates a container from an image without starting it. +// Returns the container ID. A dummy command is provided for images without +// CMD/ENTRYPOINT. The command doesn't need to exist since the container is +// never started. +func (c *dockerCmd) createContainer(image string) (string, error) { + cmd := c.cmd("create", image, "unused") + buf := new(bytes.Buffer) + cmd.Stdout = buf + if err := cmd.Run(); err != nil { + return "", err + } + return strings.TrimSpace(buf.String()), nil +} + +// copyFromContainer copies a file or directory from a container to the host. +func (c *dockerCmd) copyFromContainer(containerID, src, dst string) error { + return c.run("cp", containerID+":"+src, dst) +} + +// removeContainer removes a container quietly (no stdout). +func (c *dockerCmd) removeContainer(containerID string) error { + cmd := exec.Command(c.bin, "rm", containerID) + cmd.Env = c.envs + cmd.Stderr = os.Stderr + return cmd.Run() +} + +// listContainerFiles lists all files in a container using docker export. +func (c *dockerCmd) listContainerFiles(containerID string) ([]string, error) { + exportCmd := exec.Command(c.bin, "export", containerID) + exportCmd.Env = c.envs + + stdout, err := exportCmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("create stdout pipe: %w", err) + } + + if err := exportCmd.Start(); err != nil { + return nil, fmt.Errorf("start docker export: %w", err) + } + + var files []string + tr := tar.NewReader(stdout) + for { + header, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + exportCmd.Process.Kill() + return nil, fmt.Errorf("read tar stream: %w", err) + } + files = append(files, "/"+strings.TrimPrefix(header.Name, "/")) + } + + if err := exportCmd.Wait(); err != nil { + return nil, fmt.Errorf("docker export: %w", err) + } + + return files, nil +} + func (c *dockerCmd) build(in *buildInput, core *buildInputCore, hints *buildInputHints) error { if hints == nil { hints = newBuildInputHints(nil, nil) diff --git a/wanda/docker_cmd_test.go b/wanda/docker_cmd_test.go index 2de2fb69..88561362 100644 --- a/wanda/docker_cmd_test.go +++ b/wanda/docker_cmd_test.go @@ -1,9 +1,10 @@ package wanda import ( - "testing" - + "os" + "path/filepath" "strings" + "testing" "github.com/google/go-containerregistry/pkg/name" "github.com/google/go-containerregistry/pkg/v1/daemon" @@ -135,3 +136,113 @@ func TestDockerCmdBuild_withHints(t *testing.T) { t.Errorf("MESSAGE env got %q, want `MESSAGE=hint message`", messageEnv) } } + +func TestDockerCmdCopyFromContainer(t *testing.T) { + cmd := newDockerCmd(&dockerCmdConfig{}) + + const testImage = "alpine:latest" + + if err := cmd.run("pull", testImage); err != nil { + t.Fatalf("pull image: %v", err) + } + + containerID, err := cmd.createContainer(testImage) + if err != nil { + t.Fatalf("createContainer: %v", err) + } + defer cmd.removeContainer(containerID) + + tmpDir := t.TempDir() + + // Copy a known file from the container + if err := cmd.copyFromContainer(containerID, "/etc/alpine-release", filepath.Join(tmpDir, "alpine-release")); err != nil { + t.Fatalf("copyFromContainer: %v", err) + } + + if _, err := os.Stat(filepath.Join(tmpDir, "alpine-release")); os.IsNotExist(err) { + t.Error("alpine-release was not copied") + } +} + +func TestDockerCmdCopyFromContainer_directory(t *testing.T) { + cmd := newDockerCmd(&dockerCmdConfig{}) + + const testImage = "alpine:latest" + + if err := cmd.run("pull", testImage); err != nil { + t.Fatalf("pull image: %v", err) + } + + containerID, err := cmd.createContainer(testImage) + if err != nil { + t.Fatalf("createContainer: %v", err) + } + defer cmd.removeContainer(containerID) + + tmpDir := t.TempDir() + + // Copy a directory from the container + if err := cmd.copyFromContainer(containerID, "/etc", filepath.Join(tmpDir, "etc")); err != nil { + t.Fatalf("copyFromContainer: %v", err) + } + + if _, err := os.Stat(filepath.Join(tmpDir, "etc", "alpine-release")); os.IsNotExist(err) { + t.Error("alpine-release was not copied from /etc directory") + } +} + +func TestDockerCmdCopyFromContainer_notFound(t *testing.T) { + cmd := newDockerCmd(&dockerCmdConfig{}) + + const testImage = "alpine:latest" + + if err := cmd.run("pull", testImage); err != nil { + t.Fatalf("pull image: %v", err) + } + + containerID, err := cmd.createContainer(testImage) + if err != nil { + t.Fatalf("createContainer: %v", err) + } + defer cmd.removeContainer(containerID) + + tmpDir := t.TempDir() + + // Copying a non-existent file should fail + if err := cmd.copyFromContainer(containerID, "/nonexistent/file", filepath.Join(tmpDir, "file")); err == nil { + t.Error("copyFromContainer should fail for non-existent file") + } +} + +func TestDockerCmdListContainerFiles(t *testing.T) { + cmd := newDockerCmd(&dockerCmdConfig{}) + + const testImage = "alpine:latest" + + if err := cmd.run("pull", testImage); err != nil { + t.Fatalf("pull image: %v", err) + } + + containerID, err := cmd.createContainer(testImage) + if err != nil { + t.Fatalf("createContainer: %v", err) + } + defer cmd.removeContainer(containerID) + + files, err := cmd.listContainerFiles(containerID) + if err != nil { + t.Fatalf("listContainerFiles: %v", err) + } + + // Check that some expected files are present + found := false + for _, f := range files { + if f == "/etc/alpine-release" { + found = true + break + } + } + if !found { + t.Error("/etc/alpine-release not found in file list") + } +} diff --git a/wanda/forge.go b/wanda/forge.go index bfdcb443..cebb05ff 100644 --- a/wanda/forge.go +++ b/wanda/forge.go @@ -7,6 +7,7 @@ import ( "path/filepath" "runtime" "strings" + "time" "github.com/google/go-containerregistry/pkg/authn" cranename "github.com/google/go-containerregistry/pkg/name" @@ -69,6 +70,17 @@ func Build(specFile string, config *ForgeConfig) error { } } + // Extract artifacts only for the root spec. + if config.ArtifactsDir != "" { + rootSpec := graph.Specs[graph.Root].Spec + if len(rootSpec.Artifacts) > 0 { + rootTag := forge.workTag(rootSpec.Name) + if err := forge.ExtractArtifacts(rootSpec, rootTag); err != nil { + return fmt.Errorf("extract artifacts: %w", err) + } + } + } + return nil } @@ -182,6 +194,122 @@ func (f *Forge) resolveBases(froms []string) (map[string]*imageSource, error) { return m, nil } +// ExtractArtifacts copies Artifacts from a built image to ArtifactsDir. +// Supports glob patterns in src paths (e.g., "/*.whl"). +// +// NOTE(andrew-anyscale): We use `docker cp` for copying file-by-file rather than +// a single more efficient method of extracting from `docker export` because +// docker cp handles cross-platform issues reliably. If this becomes a bottleneck +// indicated by the log-line below, we can consider using a different approach. +func (f *Forge) ExtractArtifacts(spec *Spec, imageTag string) error { + d := f.newDockerCmd() + artifactsDir := f.config.ArtifactsDir + + // In RayCI mode, clear the artifacts directory to avoid stale artifacts. + if f.config.RayCI { + if err := os.RemoveAll(artifactsDir); err != nil { + return fmt.Errorf("clear artifacts dir: %w", err) + } + } + + if err := os.MkdirAll(artifactsDir, 0755); err != nil { + return fmt.Errorf("create artifacts dir: %w", err) + } + + log.Printf("extracting %d artifact(s) from %s", len(spec.Artifacts), imageTag) + extractStart := time.Now() + + // In remote mode, pull the image first (it may only exist in registry after + // cache hit). + if f.isRemote() { + if err := d.run("pull", imageTag); err != nil { + return fmt.Errorf("pull image for extraction: %w", err) + } + } + + containerID, err := d.createContainer(imageTag) + if err != nil { + return fmt.Errorf("create container: %w", err) + } + defer func() { + if err := d.removeContainer(containerID); err != nil { + log.Printf("warning: failed to remove container %s: %v", containerID, err) + } + }() + + // Lazily list container files only if needed for glob matching. + var containerFiles []string + var extracted []string + + for _, a := range spec.Artifacts { + if err := a.Validate(); err != nil { + return fmt.Errorf("invalid artifact: %w", err) + } + + if a.HasGlob() && containerFiles == nil { + var err error + containerFiles, err = d.listContainerFiles(containerID) + if err != nil { + return fmt.Errorf("list container files: %w", err) + } + } + + srcs := a.ResolveSrcs(containerFiles) + if len(srcs) == 0 { + if a.Optional { + log.Printf("warning: no files matched pattern: %s", a.Src) + continue + } + return fmt.Errorf("no files matched pattern: %s", a.Src) + } + + dstBase, err := a.ResolveDst(artifactsDir) + if err != nil { + return fmt.Errorf("resolve artifact dst: %w", err) + } + + // Treat dst as a directory (preserving original filenames) when: + // - dst ends with "/" (explicit directory), or + // - multiple source files (can't rename all to one name) + dstIsDir := strings.HasSuffix(a.Dst, "/") || len(srcs) > 1 + + if dstIsDir { + if err := os.MkdirAll(dstBase, 0755); err != nil { + return fmt.Errorf("create dir for artifact %s: %w", a.Dst, err) + } + } else { + if err := os.MkdirAll(filepath.Dir(dstBase), 0755); err != nil { + return fmt.Errorf("create dir for artifact %s: %w", a.Dst, err) + } + } + + for _, src := range srcs { + dst := dstBase + if dstIsDir { + dst = filepath.Join(dstBase, filepath.Base(src)) + } + + if err := d.copyFromContainer(containerID, src, dst); err != nil { + if a.Optional { + log.Printf("warning: optional artifact not found: %s", src) + continue + } + return fmt.Errorf("copy artifact %s: %w", src, err) + } + if abs, err := filepath.Abs(dst); err == nil { + dst = abs + } + extracted = append(extracted, dst) + } + } + + log.Printf("extracted %d artifact(s) in %v:", len(extracted), time.Since(extractStart).Round(time.Millisecond)) + for _, f := range extracted { + log.Printf(" %s", f) + } + return nil +} + // Build builds a container image from the given specification. func (f *Forge) Build(spec *Spec) error { // Prepare the tar stream. diff --git a/wanda/forge_config.go b/wanda/forge_config.go index ec63423c..80d898a1 100644 --- a/wanda/forge_config.go +++ b/wanda/forge_config.go @@ -15,6 +15,7 @@ type ForgeConfig struct { Epoch string WandaSpecsFile string EnvFile string + ArtifactsDir string RayCI bool Rebuild bool diff --git a/wanda/forge_test.go b/wanda/forge_test.go index cd76dc76..f82a960b 100644 --- a/wanda/forge_test.go +++ b/wanda/forge_test.go @@ -846,3 +846,223 @@ func TestBuild_EnvfileCacheInvalidation(t *testing.T) { t.Errorf("expected cache miss on changed envfile, got %d hits", forge2.cacheHit()) } } + +func TestForgeConfigArtifactsDir(t *testing.T) { + config := &ForgeConfig{ArtifactsDir: "/custom/artifacts", WorkDir: "/work"} + if got := config.ArtifactsDir; got != "/custom/artifacts" { + t.Errorf("ArtifactsDir = %q, want %q", got, "/custom/artifacts") + } +} + +func TestBuild_WithArtifacts_exact(t *testing.T) { + tmpDir := t.TempDir() + artifactsDir := filepath.Join(tmpDir, "artifacts") + + config := &ForgeConfig{ + WorkDir: "testdata", + NamePrefix: "cr.ray.io/rayproject/", + ArtifactsDir: artifactsDir, + } + + if err := Build("testdata/artifact-exact.wanda.yaml", config); err != nil { + t.Fatalf("build: %v", err) + } + + extractedFile := filepath.Join(artifactsDir, "bin/myapp") + content, err := os.ReadFile(extractedFile) + if err != nil { + t.Fatalf("read extracted file: %v", err) + } + + want := "binary-content\n" + if got := string(content); got != want { + t.Errorf("extracted content = %q, want %q", got, want) + } +} + +func TestBuild_WithArtifacts_optional(t *testing.T) { + tmpDir := t.TempDir() + artifactsDir := filepath.Join(tmpDir, "artifacts") + + config := &ForgeConfig{ + WorkDir: "testdata", + NamePrefix: "cr.ray.io/rayproject/", + ArtifactsDir: artifactsDir, + } + + // Build should succeed even though the optional artifact doesn't exist + if err := Build("testdata/artifact-optional.wanda.yaml", config); err != nil { + t.Fatalf("build: %v", err) + } + + // Required artifact should be extracted + extractedFile := filepath.Join(artifactsDir, "bin/myapp") + content, err := os.ReadFile(extractedFile) + if err != nil { + t.Fatalf("read extracted file: %v", err) + } + + want := "binary-content\n" + if got := string(content); got != want { + t.Errorf("extracted content = %q, want %q", got, want) + } + + // Optional artifact should not exist (since source doesn't exist) + optionalFile := filepath.Join(artifactsDir, "optional.txt") + if _, err := os.Stat(optionalFile); !os.IsNotExist(err) { + t.Errorf("optional file should not exist, but got err: %v", err) + } +} + +func TestBuild_WithArtifacts_rootOnly(t *testing.T) { + wandaSpecs := filepath.Join(t.TempDir(), ".wandaspecs") + absTestdata, err := filepath.Abs("testdata") + if err != nil { + t.Fatalf("abs testdata: %v", err) + } + if err := os.WriteFile(wandaSpecs, []byte(absTestdata), 0644); err != nil { + t.Fatalf("write wandaspecs: %v", err) + } + + tmpDir := t.TempDir() + artifactsDir := filepath.Join(tmpDir, "artifacts") + + config := &ForgeConfig{ + WorkDir: "testdata", + NamePrefix: "cr.ray.io/rayproject/", + WandaSpecsFile: wandaSpecs, + ArtifactsDir: artifactsDir, + } + + if err := Build("testdata/artifact-dep-top.wanda.yaml", config); err != nil { + t.Fatalf("build with deps: %v", err) + } + + topFile := filepath.Join(artifactsDir, "top.txt") + if _, err := os.Stat(topFile); os.IsNotExist(err) { + t.Error("root spec artifact should have been extracted") + } + + depDocsFile := filepath.Join(artifactsDir, "docs/readme.md") + if _, err := os.Stat(depDocsFile); !os.IsNotExist(err) { + t.Error("dependency artifact should NOT have been extracted") + } +} + +func TestBuild_WithArtifacts_cacheHit(t *testing.T) { + tmpDir := t.TempDir() + artifactsDir1 := filepath.Join(tmpDir, "artifacts1") + artifactsDir2 := filepath.Join(tmpDir, "artifacts2") + + config := &ForgeConfig{ + WorkDir: "testdata", + NamePrefix: "cr.ray.io/rayproject/", + ArtifactsDir: artifactsDir1, + } + + if err := Build("testdata/artifact-exact.wanda.yaml", config); err != nil { + t.Fatalf("first build: %v", err) + } + + extractedFile1 := filepath.Join(artifactsDir1, "bin/myapp") + if _, err := os.Stat(extractedFile1); os.IsNotExist(err) { + t.Fatal("first build should have extracted artifact") + } + + config.ArtifactsDir = artifactsDir2 + + if err := Build("testdata/artifact-exact.wanda.yaml", config); err != nil { + t.Fatalf("second build: %v", err) + } + + extractedFile2 := filepath.Join(artifactsDir2, "bin/myapp") + if _, err := os.Stat(extractedFile2); os.IsNotExist(err) { + t.Error("artifact should be extracted even on cache hit") + } +} + +func TestBuild_WithArtifacts_glob(t *testing.T) { + tmpDir := t.TempDir() + artifactsDir := filepath.Join(tmpDir, "artifacts") + + config := &ForgeConfig{ + WorkDir: "testdata", + NamePrefix: "cr.ray.io/rayproject/", + ArtifactsDir: artifactsDir, + } + + if err := Build("testdata/artifact-glob.wanda.yaml", config); err != nil { + t.Fatalf("build: %v", err) + } + + // Check that both wheel files were extracted + wheels := []string{"mypackage-1.0.0.whl", "mypackage-1.0.1.whl"} + for _, wheel := range wheels { + extractedFile := filepath.Join(artifactsDir, "wheels", wheel) + content, err := os.ReadFile(extractedFile) + if err != nil { + t.Errorf("read extracted file %s: %v", wheel, err) + continue + } + if len(content) == 0 { + t.Errorf("extracted file %s is empty", wheel) + } + } +} + +func TestBuild_WithArtifacts_globNoTrailingSlash(t *testing.T) { + tmpDir := t.TempDir() + artifactsDir := filepath.Join(tmpDir, "artifacts") + + config := &ForgeConfig{ + WorkDir: "testdata", + NamePrefix: "cr.ray.io/rayproject/", + ArtifactsDir: artifactsDir, + } + + // This spec has dst: "wheels" (no trailing slash) but glob matches multiple files. + // The code should automatically treat dst as a directory. + if err := Build("testdata/artifact-glob-notrail.wanda.yaml", config); err != nil { + t.Fatalf("build: %v", err) + } + + // Check that both wheel files were extracted into the wheels directory + wheels := []string{"mypackage-1.0.0.whl", "mypackage-1.0.1.whl"} + for _, wheel := range wheels { + extractedFile := filepath.Join(artifactsDir, "wheels", wheel) + content, err := os.ReadFile(extractedFile) + if err != nil { + t.Errorf("read extracted file %s: %v", wheel, err) + continue + } + if len(content) == 0 { + t.Errorf("extracted file %s is empty", wheel) + } + } +} + +func TestBuild_WithArtifacts_noCmdImage(t *testing.T) { + tmpDir := t.TempDir() + artifactsDir := filepath.Join(tmpDir, "artifacts") + + config := &ForgeConfig{ + WorkDir: "testdata", + NamePrefix: "cr.ray.io/rayproject/", + ArtifactsDir: artifactsDir, + } + + if err := Build("testdata/artifact-nocmd.wanda.yaml", config); err != nil { + t.Fatalf("build: %v", err) + } + + extractedFile := filepath.Join(artifactsDir, "output.txt") + content, err := os.ReadFile(extractedFile) + if err != nil { + t.Fatalf("read extracted file: %v", err) + } + + want := "test-content\n" + if got := string(content); got != want { + t.Errorf("extracted content = %q, want %q", got, want) + } +} diff --git a/wanda/testdata/Dockerfile.artifact b/wanda/testdata/Dockerfile.artifact new file mode 100644 index 00000000..60ae4629 --- /dev/null +++ b/wanda/testdata/Dockerfile.artifact @@ -0,0 +1,8 @@ +FROM alpine:latest + +RUN mkdir -p /build/dist /build/docs /app/bin \ + && echo "wheel-content" > /build/dist/mypackage-1.0.0.whl \ + && echo "wheel-content2" > /build/dist/mypackage-1.0.1.whl \ + && echo "docs-content" > /build/docs/README.md \ + && echo "binary-content" > /app/bin/myapp \ + && chmod +x /app/bin/myapp diff --git a/wanda/testdata/Dockerfile.artifact-top b/wanda/testdata/Dockerfile.artifact-top new file mode 100644 index 00000000..c68dd97d --- /dev/null +++ b/wanda/testdata/Dockerfile.artifact-top @@ -0,0 +1,4 @@ +ARG BASE +FROM ${BASE} + +RUN echo "top-layer-file" > /top-layer.txt diff --git a/wanda/testdata/Dockerfile.nocmd b/wanda/testdata/Dockerfile.nocmd new file mode 100644 index 00000000..32ecf79e --- /dev/null +++ b/wanda/testdata/Dockerfile.nocmd @@ -0,0 +1,5 @@ +FROM alpine:latest AS builder +RUN echo "test-content" > /output.txt + +FROM scratch +COPY --from=builder /output.txt /output.txt diff --git a/wanda/testdata/artifact-dep-base.wanda.yaml b/wanda/testdata/artifact-dep-base.wanda.yaml new file mode 100644 index 00000000..6317fbe5 --- /dev/null +++ b/wanda/testdata/artifact-dep-base.wanda.yaml @@ -0,0 +1,5 @@ +name: artifact-dep-base +dockerfile: Dockerfile.artifact +artifacts: + - src: /build/docs/README.md + dst: docs/readme.md diff --git a/wanda/testdata/artifact-dep-top.wanda.yaml b/wanda/testdata/artifact-dep-top.wanda.yaml new file mode 100644 index 00000000..c6c58688 --- /dev/null +++ b/wanda/testdata/artifact-dep-top.wanda.yaml @@ -0,0 +1,9 @@ +name: artifact-dep-top +dockerfile: Dockerfile.artifact-top +froms: + - cr.ray.io/rayproject/artifact-dep-base +build_args: + - BASE=cr.ray.io/rayproject/artifact-dep-base +artifacts: + - src: /top-layer.txt + dst: top.txt diff --git a/wanda/testdata/artifact-exact.wanda.yaml b/wanda/testdata/artifact-exact.wanda.yaml new file mode 100644 index 00000000..44dd4f75 --- /dev/null +++ b/wanda/testdata/artifact-exact.wanda.yaml @@ -0,0 +1,5 @@ +name: artifact-exact +dockerfile: Dockerfile.artifact +artifacts: + - src: /app/bin/myapp + dst: bin/myapp diff --git a/wanda/testdata/artifact-glob-notrail.wanda.yaml b/wanda/testdata/artifact-glob-notrail.wanda.yaml new file mode 100644 index 00000000..7591b336 --- /dev/null +++ b/wanda/testdata/artifact-glob-notrail.wanda.yaml @@ -0,0 +1,5 @@ +name: artifact-glob-notrail +dockerfile: Dockerfile.artifact +artifacts: + - src: "/build/dist/*.whl" + dst: wheels diff --git a/wanda/testdata/artifact-glob.wanda.yaml b/wanda/testdata/artifact-glob.wanda.yaml new file mode 100644 index 00000000..a9c2d834 --- /dev/null +++ b/wanda/testdata/artifact-glob.wanda.yaml @@ -0,0 +1,5 @@ +name: artifact-glob +dockerfile: Dockerfile.artifact +artifacts: + - src: "/build/dist/*.whl" + dst: wheels/ diff --git a/wanda/testdata/artifact-nocmd.wanda.yaml b/wanda/testdata/artifact-nocmd.wanda.yaml new file mode 100644 index 00000000..e77255ad --- /dev/null +++ b/wanda/testdata/artifact-nocmd.wanda.yaml @@ -0,0 +1,5 @@ +name: artifact-nocmd +dockerfile: Dockerfile.nocmd +artifacts: + - src: /output.txt + dst: output.txt diff --git a/wanda/testdata/artifact-optional.wanda.yaml b/wanda/testdata/artifact-optional.wanda.yaml new file mode 100644 index 00000000..62f4bd05 --- /dev/null +++ b/wanda/testdata/artifact-optional.wanda.yaml @@ -0,0 +1,8 @@ +name: artifact-optional +dockerfile: Dockerfile.artifact +artifacts: + - src: /app/bin/myapp + dst: bin/myapp + - src: /nonexistent/optional-file.txt + dst: optional.txt + optional: true