Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions wanda/docker_cmd.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package wanda

import (
"archive/tar"
"bytes"
"encoding/json"
"fmt"
"io"
"log"
"os"
"os/exec"
Expand Down Expand Up @@ -126,6 +128,68 @@ func (c *dockerCmd) tag(src, asTag string) error {
return c.run("tag", src, asTag)
}

// createContainer creates a container from an image without starting it.
// Returns the container ID. A dummy command is provided for images without
// CMD/ENTRYPOINT. The command doesn't need to exist since the container is
// never started.
func (c *dockerCmd) createContainer(image string) (string, error) {
cmd := c.cmd("create", image, "unused")
buf := new(bytes.Buffer)
cmd.Stdout = buf
if err := cmd.Run(); err != nil {
return "", err
}
return strings.TrimSpace(buf.String()), nil
}

// copyFromContainer copies a file or directory from a container to the host.
func (c *dockerCmd) copyFromContainer(containerID, src, dst string) error {
return c.run("cp", containerID+":"+src, dst)
}

// removeContainer removes a container quietly (no stdout).
func (c *dockerCmd) removeContainer(containerID string) error {
cmd := exec.Command(c.bin, "rm", containerID)
cmd.Env = c.envs
cmd.Stderr = os.Stderr
return cmd.Run()
}

// listContainerFiles lists all files in a container using docker export.
func (c *dockerCmd) listContainerFiles(containerID string) ([]string, error) {
exportCmd := exec.Command(c.bin, "export", containerID)
exportCmd.Env = c.envs

stdout, err := exportCmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("create stdout pipe: %w", err)
}

if err := exportCmd.Start(); err != nil {
return nil, fmt.Errorf("start docker export: %w", err)
}

var files []string
tr := tar.NewReader(stdout)
for {
header, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
exportCmd.Process.Kill()
return nil, fmt.Errorf("read tar stream: %w", err)
}
files = append(files, "/"+strings.TrimPrefix(header.Name, "/"))
}

if err := exportCmd.Wait(); err != nil {
return nil, fmt.Errorf("docker export: %w", err)
}

return files, nil
}

func (c *dockerCmd) build(in *buildInput, core *buildInputCore, hints *buildInputHints) error {
if hints == nil {
hints = newBuildInputHints(nil, nil)
Expand Down
115 changes: 113 additions & 2 deletions wanda/docker_cmd_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package wanda

import (
"testing"

"os"
"path/filepath"
"strings"
"testing"

"github.com/google/go-containerregistry/pkg/name"
"github.com/google/go-containerregistry/pkg/v1/daemon"
Expand Down Expand Up @@ -135,3 +136,113 @@ func TestDockerCmdBuild_withHints(t *testing.T) {
t.Errorf("MESSAGE env got %q, want `MESSAGE=hint message`", messageEnv)
}
}

func TestDockerCmdCopyFromContainer(t *testing.T) {
cmd := newDockerCmd(&dockerCmdConfig{})

const testImage = "alpine:latest"

if err := cmd.run("pull", testImage); err != nil {
t.Fatalf("pull image: %v", err)
}

containerID, err := cmd.createContainer(testImage)
if err != nil {
t.Fatalf("createContainer: %v", err)
}
defer cmd.removeContainer(containerID)

tmpDir := t.TempDir()

// Copy a known file from the container
if err := cmd.copyFromContainer(containerID, "/etc/alpine-release", filepath.Join(tmpDir, "alpine-release")); err != nil {
t.Fatalf("copyFromContainer: %v", err)
}

if _, err := os.Stat(filepath.Join(tmpDir, "alpine-release")); os.IsNotExist(err) {
t.Error("alpine-release was not copied")
}
}

func TestDockerCmdCopyFromContainer_directory(t *testing.T) {
cmd := newDockerCmd(&dockerCmdConfig{})

const testImage = "alpine:latest"

if err := cmd.run("pull", testImage); err != nil {
t.Fatalf("pull image: %v", err)
}

containerID, err := cmd.createContainer(testImage)
if err != nil {
t.Fatalf("createContainer: %v", err)
}
defer cmd.removeContainer(containerID)

tmpDir := t.TempDir()

// Copy a directory from the container
if err := cmd.copyFromContainer(containerID, "/etc", filepath.Join(tmpDir, "etc")); err != nil {
t.Fatalf("copyFromContainer: %v", err)
}

if _, err := os.Stat(filepath.Join(tmpDir, "etc", "alpine-release")); os.IsNotExist(err) {
t.Error("alpine-release was not copied from /etc directory")
}
}

func TestDockerCmdCopyFromContainer_notFound(t *testing.T) {
cmd := newDockerCmd(&dockerCmdConfig{})

const testImage = "alpine:latest"

if err := cmd.run("pull", testImage); err != nil {
t.Fatalf("pull image: %v", err)
}

containerID, err := cmd.createContainer(testImage)
if err != nil {
t.Fatalf("createContainer: %v", err)
}
defer cmd.removeContainer(containerID)

tmpDir := t.TempDir()

// Copying a non-existent file should fail
if err := cmd.copyFromContainer(containerID, "/nonexistent/file", filepath.Join(tmpDir, "file")); err == nil {
t.Error("copyFromContainer should fail for non-existent file")
}
}

func TestDockerCmdListContainerFiles(t *testing.T) {
cmd := newDockerCmd(&dockerCmdConfig{})

const testImage = "alpine:latest"

if err := cmd.run("pull", testImage); err != nil {
t.Fatalf("pull image: %v", err)
}

containerID, err := cmd.createContainer(testImage)
if err != nil {
t.Fatalf("createContainer: %v", err)
}
defer cmd.removeContainer(containerID)

files, err := cmd.listContainerFiles(containerID)
if err != nil {
t.Fatalf("listContainerFiles: %v", err)
}

// Check that some expected files are present
found := false
for _, f := range files {
if f == "/etc/alpine-release" {
found = true
break
}
}
if !found {
t.Error("/etc/alpine-release not found in file list")
}
}
128 changes: 128 additions & 0 deletions wanda/forge.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"path/filepath"
"runtime"
"strings"
"time"

"github.com/google/go-containerregistry/pkg/authn"
cranename "github.com/google/go-containerregistry/pkg/name"
Expand Down Expand Up @@ -69,6 +70,17 @@ func Build(specFile string, config *ForgeConfig) error {
}
}

// Extract artifacts only for the root spec.
if config.ArtifactsDir != "" {
rootSpec := graph.Specs[graph.Root].Spec
if len(rootSpec.Artifacts) > 0 {
rootTag := forge.workTag(rootSpec.Name)
if err := forge.ExtractArtifacts(rootSpec, rootTag); err != nil {
return fmt.Errorf("extract artifacts: %w", err)
}
}
}

return nil
}

Expand Down Expand Up @@ -182,6 +194,122 @@ func (f *Forge) resolveBases(froms []string) (map[string]*imageSource, error) {
return m, nil
}

// ExtractArtifacts copies Artifacts from a built image to ArtifactsDir.
// Supports glob patterns in src paths (e.g., "/*.whl").
//
// NOTE(andrew-anyscale): We use `docker cp` for copying file-by-file rather than
// a single more efficient method of extracting from `docker export` because
// docker cp handles cross-platform issues reliably. If this becomes a bottleneck
// indicated by the log-line below, we can consider using a different approach.
func (f *Forge) ExtractArtifacts(spec *Spec, imageTag string) error {
d := f.newDockerCmd()
artifactsDir := f.config.ArtifactsDir

// In RayCI mode, clear the artifacts directory to avoid stale artifacts.
if f.config.RayCI {
if err := os.RemoveAll(artifactsDir); err != nil {
return fmt.Errorf("clear artifacts dir: %w", err)
}
}

if err := os.MkdirAll(artifactsDir, 0755); err != nil {
return fmt.Errorf("create artifacts dir: %w", err)
}

log.Printf("extracting %d artifact(s) from %s", len(spec.Artifacts), imageTag)
extractStart := time.Now()

// In remote mode, pull the image first (it may only exist in registry after
// cache hit).
if f.isRemote() {
if err := d.run("pull", imageTag); err != nil {
return fmt.Errorf("pull image for extraction: %w", err)
}
}

containerID, err := d.createContainer(imageTag)
if err != nil {
return fmt.Errorf("create container: %w", err)
}
defer func() {
if err := d.removeContainer(containerID); err != nil {
log.Printf("warning: failed to remove container %s: %v", containerID, err)
}
}()

// Lazily list container files only if needed for glob matching.
var containerFiles []string
var extracted []string

for _, a := range spec.Artifacts {
if err := a.Validate(); err != nil {
return fmt.Errorf("invalid artifact: %w", err)
}

if a.HasGlob() && containerFiles == nil {
var err error
containerFiles, err = d.listContainerFiles(containerID)
if err != nil {
return fmt.Errorf("list container files: %w", err)
}
}

srcs := a.ResolveSrcs(containerFiles)
if len(srcs) == 0 {
if a.Optional {
log.Printf("warning: no files matched pattern: %s", a.Src)
continue
}
return fmt.Errorf("no files matched pattern: %s", a.Src)
}

dstBase, err := a.ResolveDst(artifactsDir)
if err != nil {
return fmt.Errorf("resolve artifact dst: %w", err)
}

// Treat dst as a directory (preserving original filenames) when:
// - dst ends with "/" (explicit directory), or
// - multiple source files (can't rename all to one name)
dstIsDir := strings.HasSuffix(a.Dst, "/") || len(srcs) > 1

if dstIsDir {
if err := os.MkdirAll(dstBase, 0755); err != nil {
return fmt.Errorf("create dir for artifact %s: %w", a.Dst, err)
}
} else {
if err := os.MkdirAll(filepath.Dir(dstBase), 0755); err != nil {
return fmt.Errorf("create dir for artifact %s: %w", a.Dst, err)
}
}

for _, src := range srcs {
dst := dstBase
if dstIsDir {
dst = filepath.Join(dstBase, filepath.Base(src))
}

if err := d.copyFromContainer(containerID, src, dst); err != nil {
if a.Optional {
log.Printf("warning: optional artifact not found: %s", src)
continue
}
return fmt.Errorf("copy artifact %s: %w", src, err)
}
if abs, err := filepath.Abs(dst); err == nil {
dst = abs
}
extracted = append(extracted, dst)
}
}

log.Printf("extracted %d artifact(s) in %v:", len(extracted), time.Since(extractStart).Round(time.Millisecond))
for _, f := range extracted {
log.Printf(" %s", f)
}
return nil
}

// Build builds a container image from the given specification.
func (f *Forge) Build(spec *Spec) error {
// Prepare the tar stream.
Expand Down
1 change: 1 addition & 0 deletions wanda/forge_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type ForgeConfig struct {
Epoch string
WandaSpecsFile string
EnvFile string
ArtifactsDir string

RayCI bool
Rebuild bool
Expand Down
Loading