Skip to content

Commit 1e3c63b

Browse files
feat(wanda): implement artifact copying from built images
Extract artifacts from built images using `docker cp`. This approach is cross-platform reliable on Windows. - Creates container from image without starting it - Copies each artifact using `docker cp` - Cleans up container when done - Optional artifacts log warning instead of failing - Extraction runs for root spec only, even on cache hit - Logs extraction duration for performance monitoring Example: ```shell ❯ PYTHON_VERSION=3.13 MANYLINUX_VERSION=260128.221a193 HOSTTYPE=aarch64 ARCH_SUFFIX=-aarch64 BUILDKITE_COMMIT=b5737cefc0 IS_LOCAL_BUILD=true /Users/andrew/devel/rayci-wanda-artifacts/_release/wanda-darwin-arm64 --artifacts_dir .whl/ ci/docker/ray-wheel.wanda.yaml ... 2026/02/04 09:10:39 extracting 1 artifact(s) from localhost:5000/rayci-work:ray-wheel-py3.13-aarch64 Successfully copied 72MB to /Users/andrew/devel/ray-local-wheel-build/.whl/ray-3.0.0.dev0-cp313-cp313-manylinux2014_aarch64.whl 2026/02/04 09:10:40 extracted 1 artifact(s) in 722ms: 2026/02/04 09:10:40 /Users/andrew/devel/ray-local-wheel-build/.whl/ray-3.0.0.dev0-cp313-cp313-manylinux2014_aarch64.whl ``` Topic: wanda-artifact-copy Relative: wanda-artifact-spec Labels: draft Signed-off-by: andrew <andrew@anyscale.com>
1 parent f1104ff commit 1e3c63b

15 files changed

+572
-2
lines changed

wanda/docker_cmd.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package wanda
22

33
import (
4+
"archive/tar"
45
"bytes"
56
"encoding/json"
67
"fmt"
8+
"io"
79
"log"
810
"os"
911
"os/exec"
@@ -100,6 +102,11 @@ type dockerImageInfo struct {
100102
ID string `json:"Id"`
101103
RepoDigests []string
102104
RepoTags []string
105+
Config *dockerImageConfig `json:"Config"`
106+
}
107+
108+
type dockerImageConfig struct {
109+
WorkingDir string `json:"WorkingDir"`
103110
}
104111

105112
func (c *dockerCmd) inspectImage(tag string) (*dockerImageInfo, error) {
@@ -126,6 +133,68 @@ func (c *dockerCmd) tag(src, asTag string) error {
126133
return c.run("tag", src, asTag)
127134
}
128135

136+
// createContainer creates a container from an image without starting it.
137+
// Returns the container ID. A dummy command is provided for images without
138+
// CMD/ENTRYPOINT. The command doesn't need to exist since the container is
139+
// never started.
140+
func (c *dockerCmd) createContainer(image string) (string, error) {
141+
cmd := c.cmd("create", image, "unused")
142+
buf := new(bytes.Buffer)
143+
cmd.Stdout = buf
144+
if err := cmd.Run(); err != nil {
145+
return "", err
146+
}
147+
return strings.TrimSpace(buf.String()), nil
148+
}
149+
150+
// copyFromContainer copies a file or directory from a container to the host.
151+
func (c *dockerCmd) copyFromContainer(containerID, src, dst string) error {
152+
return c.run("cp", containerID+":"+src, dst)
153+
}
154+
155+
// removeContainer removes a container quietly (no stdout).
156+
func (c *dockerCmd) removeContainer(containerID string) error {
157+
cmd := exec.Command(c.bin, "rm", containerID)
158+
cmd.Env = c.envs
159+
cmd.Stderr = os.Stderr
160+
return cmd.Run()
161+
}
162+
163+
// listContainerFiles lists all files in a container using docker export.
164+
func (c *dockerCmd) listContainerFiles(containerID string) ([]string, error) {
165+
exportCmd := exec.Command(c.bin, "export", containerID)
166+
exportCmd.Env = c.envs
167+
168+
stdout, err := exportCmd.StdoutPipe()
169+
if err != nil {
170+
return nil, fmt.Errorf("create stdout pipe: %w", err)
171+
}
172+
173+
if err := exportCmd.Start(); err != nil {
174+
return nil, fmt.Errorf("start docker export: %w", err)
175+
}
176+
177+
var files []string
178+
tr := tar.NewReader(stdout)
179+
for {
180+
header, err := tr.Next()
181+
if err == io.EOF {
182+
break
183+
}
184+
if err != nil {
185+
exportCmd.Process.Kill()
186+
return nil, fmt.Errorf("read tar stream: %w", err)
187+
}
188+
files = append(files, "/"+strings.TrimPrefix(header.Name, "/"))
189+
}
190+
191+
if err := exportCmd.Wait(); err != nil {
192+
return nil, fmt.Errorf("docker export: %w", err)
193+
}
194+
195+
return files, nil
196+
}
197+
129198
func (c *dockerCmd) build(in *buildInput, core *buildInputCore, hints *buildInputHints) error {
130199
if hints == nil {
131200
hints = newBuildInputHints(nil, nil)

wanda/docker_cmd_test.go

Lines changed: 113 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
package wanda
22

33
import (
4-
"testing"
5-
4+
"os"
5+
"path/filepath"
66
"strings"
7+
"testing"
78

89
"github.com/google/go-containerregistry/pkg/name"
910
"github.com/google/go-containerregistry/pkg/v1/daemon"
@@ -135,3 +136,113 @@ func TestDockerCmdBuild_withHints(t *testing.T) {
135136
t.Errorf("MESSAGE env got %q, want `MESSAGE=hint message`", messageEnv)
136137
}
137138
}
139+
140+
func TestDockerCmdCopyFromContainer(t *testing.T) {
141+
cmd := newDockerCmd(&dockerCmdConfig{})
142+
143+
const testImage = "alpine:latest"
144+
145+
if err := cmd.run("pull", testImage); err != nil {
146+
t.Fatalf("pull image: %v", err)
147+
}
148+
149+
containerID, err := cmd.createContainer(testImage)
150+
if err != nil {
151+
t.Fatalf("createContainer: %v", err)
152+
}
153+
defer cmd.removeContainer(containerID)
154+
155+
tmpDir := t.TempDir()
156+
157+
// Copy a known file from the container
158+
if err := cmd.copyFromContainer(containerID, "/etc/alpine-release", filepath.Join(tmpDir, "alpine-release")); err != nil {
159+
t.Fatalf("copyFromContainer: %v", err)
160+
}
161+
162+
if _, err := os.Stat(filepath.Join(tmpDir, "alpine-release")); os.IsNotExist(err) {
163+
t.Error("alpine-release was not copied")
164+
}
165+
}
166+
167+
func TestDockerCmdCopyFromContainer_directory(t *testing.T) {
168+
cmd := newDockerCmd(&dockerCmdConfig{})
169+
170+
const testImage = "alpine:latest"
171+
172+
if err := cmd.run("pull", testImage); err != nil {
173+
t.Fatalf("pull image: %v", err)
174+
}
175+
176+
containerID, err := cmd.createContainer(testImage)
177+
if err != nil {
178+
t.Fatalf("createContainer: %v", err)
179+
}
180+
defer cmd.removeContainer(containerID)
181+
182+
tmpDir := t.TempDir()
183+
184+
// Copy a directory from the container
185+
if err := cmd.copyFromContainer(containerID, "/etc", filepath.Join(tmpDir, "etc")); err != nil {
186+
t.Fatalf("copyFromContainer: %v", err)
187+
}
188+
189+
if _, err := os.Stat(filepath.Join(tmpDir, "etc", "alpine-release")); os.IsNotExist(err) {
190+
t.Error("alpine-release was not copied from /etc directory")
191+
}
192+
}
193+
194+
func TestDockerCmdCopyFromContainer_notFound(t *testing.T) {
195+
cmd := newDockerCmd(&dockerCmdConfig{})
196+
197+
const testImage = "alpine:latest"
198+
199+
if err := cmd.run("pull", testImage); err != nil {
200+
t.Fatalf("pull image: %v", err)
201+
}
202+
203+
containerID, err := cmd.createContainer(testImage)
204+
if err != nil {
205+
t.Fatalf("createContainer: %v", err)
206+
}
207+
defer cmd.removeContainer(containerID)
208+
209+
tmpDir := t.TempDir()
210+
211+
// Copying a non-existent file should fail
212+
if err := cmd.copyFromContainer(containerID, "/nonexistent/file", filepath.Join(tmpDir, "file")); err == nil {
213+
t.Error("copyFromContainer should fail for non-existent file")
214+
}
215+
}
216+
217+
func TestDockerCmdListContainerFiles(t *testing.T) {
218+
cmd := newDockerCmd(&dockerCmdConfig{})
219+
220+
const testImage = "alpine:latest"
221+
222+
if err := cmd.run("pull", testImage); err != nil {
223+
t.Fatalf("pull image: %v", err)
224+
}
225+
226+
containerID, err := cmd.createContainer(testImage)
227+
if err != nil {
228+
t.Fatalf("createContainer: %v", err)
229+
}
230+
defer cmd.removeContainer(containerID)
231+
232+
files, err := cmd.listContainerFiles(containerID)
233+
if err != nil {
234+
t.Fatalf("listContainerFiles: %v", err)
235+
}
236+
237+
// Check that some expected files are present
238+
found := false
239+
for _, f := range files {
240+
if f == "/etc/alpine-release" {
241+
found = true
242+
break
243+
}
244+
}
245+
if !found {
246+
t.Error("/etc/alpine-release not found in file list")
247+
}
248+
}

wanda/forge.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"path/filepath"
88
"runtime"
99
"strings"
10+
"time"
1011

1112
"github.com/google/go-containerregistry/pkg/authn"
1213
cranename "github.com/google/go-containerregistry/pkg/name"
@@ -69,6 +70,17 @@ func Build(specFile string, config *ForgeConfig) error {
6970
}
7071
}
7172

73+
// Extract artifacts only for the root spec.
74+
if config.ArtifactsDir != "" {
75+
rootSpec := graph.Specs[graph.Root].Spec
76+
if len(rootSpec.Artifacts) > 0 {
77+
rootTag := forge.workTag(rootSpec.Name)
78+
if err := forge.ExtractArtifacts(rootSpec, rootTag); err != nil {
79+
return fmt.Errorf("extract artifacts: %w", err)
80+
}
81+
}
82+
}
83+
7284
return nil
7385
}
7486

@@ -182,6 +194,103 @@ func (f *Forge) resolveBases(froms []string) (map[string]*imageSource, error) {
182194
return m, nil
183195
}
184196

197+
// ExtractArtifacts copies Artifacts from a built image to ArtifactsDir.
198+
// Supports glob patterns in src paths (e.g., "/*.whl").
199+
//
200+
// NOTE(andrew-anyscale): We use `docker cp` for copying file-by-file rather than
201+
// a single more efficient method of extracting from `docker export` because
202+
// docker cp handles cross-platform issues reliably. If this becomes a bottleneck
203+
// indicated by the log-line below, we can consider using a different approach.
204+
func (f *Forge) ExtractArtifacts(spec *Spec, imageTag string) error {
205+
d := f.newDockerCmd()
206+
artifactsDir := f.config.ArtifactsDir
207+
208+
if err := os.MkdirAll(artifactsDir, 0755); err != nil {
209+
return fmt.Errorf("create artifacts dir: %w", err)
210+
}
211+
212+
log.Printf("extracting %d artifact(s) from %s", len(spec.Artifacts), imageTag)
213+
extractStart := time.Now()
214+
215+
containerID, err := d.createContainer(imageTag)
216+
if err != nil {
217+
return fmt.Errorf("create container: %w", err)
218+
}
219+
defer func() {
220+
if err := d.removeContainer(containerID); err != nil {
221+
log.Printf("warning: failed to remove container %s: %v", containerID, err)
222+
}
223+
}()
224+
225+
// Lazily list container files only if needed for glob matching.
226+
var containerFiles []string
227+
var extracted []string
228+
229+
for _, a := range spec.Artifacts {
230+
if err := a.Validate(); err != nil {
231+
return fmt.Errorf("invalid artifact: %w", err)
232+
}
233+
234+
if a.HasGlob() && containerFiles == nil {
235+
var err error
236+
containerFiles, err = d.listContainerFiles(containerID)
237+
if err != nil {
238+
return fmt.Errorf("list container files: %w", err)
239+
}
240+
}
241+
242+
srcs := a.ResolveSrcs(containerFiles)
243+
if len(srcs) == 0 {
244+
if a.Optional {
245+
log.Printf("warning: no files matched pattern: %s", a.Src)
246+
continue
247+
}
248+
return fmt.Errorf("no files matched pattern: %s", a.Src)
249+
}
250+
251+
dstBase, err := a.ResolveDst(artifactsDir)
252+
if err != nil {
253+
return fmt.Errorf("resolve artifact dst: %w", err)
254+
}
255+
copyIntoDir := a.CopyIntoDir(len(srcs))
256+
257+
if copyIntoDir {
258+
if err := os.MkdirAll(dstBase, 0755); err != nil {
259+
return fmt.Errorf("create dir for artifact %s: %w", a.Dst, err)
260+
}
261+
} else {
262+
if err := os.MkdirAll(filepath.Dir(dstBase), 0755); err != nil {
263+
return fmt.Errorf("create dir for artifact %s: %w", a.Dst, err)
264+
}
265+
}
266+
267+
for _, src := range srcs {
268+
dst := dstBase
269+
if copyIntoDir {
270+
dst = filepath.Join(dstBase, filepath.Base(src))
271+
}
272+
273+
if err := d.copyFromContainer(containerID, src, dst); err != nil {
274+
if a.Optional {
275+
log.Printf("warning: optional artifact not found: %s", src)
276+
continue
277+
}
278+
return fmt.Errorf("copy artifact %s: %w", src, err)
279+
}
280+
if abs, err := filepath.Abs(dst); err == nil {
281+
dst = abs
282+
}
283+
extracted = append(extracted, dst)
284+
}
285+
}
286+
287+
log.Printf("extracted %d artifact(s) in %v:", len(extracted), time.Since(extractStart).Round(time.Millisecond))
288+
for _, f := range extracted {
289+
log.Printf(" %s", f)
290+
}
291+
return nil
292+
}
293+
185294
// Build builds a container image from the given specification.
186295
func (f *Forge) Build(spec *Spec) error {
187296
// Prepare the tar stream.
@@ -282,6 +391,7 @@ func (f *Forge) Build(spec *Spec) error {
282391
}
283392
}
284393
}
394+
285395
return nil // and we are done.
286396
}
287397
}

wanda/forge_config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ type ForgeConfig struct {
1515
Epoch string
1616
WandaSpecsFile string
1717
EnvFile string
18+
ArtifactsDir string
1819

1920
RayCI bool
2021
Rebuild bool

0 commit comments

Comments
 (0)