Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions wanda/artifact.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package wanda

import (
"fmt"
"path"
"path/filepath"
"strings"
)

// Artifact defines a file or directory to extract from a built image.
type Artifact struct {
// Src is the path inside the container to extract.
// Can be a file, directory, or glob pattern (e.g., "/*.whl").
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we really need glob pattern support? glob only matches file name, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went back-and-forth on glob inclusion, and settled on it being worth inclusion. It drastically simplifies the Wheel upload case. Without this, we'd need to rename the wheel file away from schemas like ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl and into a generic name.

Yes, glob only matches file names currently.

// Must be an absolute path. Supports variable expansion.
Src string `yaml:"src"`

// Dst is the destination path relative to the artifacts directory.
// If it ends with "/" or src matches multiple files, src is copied
// into that directory (preserving original filenames).
// Otherwise, src is renamed to dst during copy.
// Must be a relative path (no ".." allowed).
Dst string `yaml:"dst"`

// Optional marks this artifact as best-effort.
// If true, extraction failure will be logged but won't fail the build.
Optional bool `yaml:"optional,omitempty"`
}

// Validate checks that the artifact paths are safe.
// Src must be an absolute path (inside the container).
// Dst must be relative and cannot escape the artifacts directory.
func (a *Artifact) Validate() error {
if !strings.HasPrefix(a.Src, "/") {
return fmt.Errorf("artifact src must be absolute path: %q", a.Src)
}

if filepath.IsAbs(a.Dst) {
return fmt.Errorf("artifact dst must be relative path: %q", a.Dst)
}

cleaned := filepath.Clean(a.Dst)
if strings.HasPrefix(cleaned, "..") || strings.Contains(cleaned, string(filepath.Separator)+"..") {
return fmt.Errorf("artifact dst cannot escape artifacts directory: %q", a.Dst)
}

return nil
}

// HasGlob returns true if the artifact source path contains glob characters.
func (a *Artifact) HasGlob() bool {
return strings.ContainsAny(a.Src, "*?[")
}

// ResolveSrcs returns the source paths to extract.
func (a *Artifact) ResolveSrcs(containerFiles []string) []string {
if a.HasGlob() {
return a.matchFiles(containerFiles)
}
return []string{a.Src}
}

// matchFiles returns files that match the artifact's glob pattern.
func (a *Artifact) matchFiles(files []string) []string {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we like listing all the files first and then perform the matching? should we send in a fs interface or something? this can be a lot of files to match (potentially files of the whole container, which can be a lot?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/ray-project/rayci/pull/411/changes#diff-c7fe292f56579652957100e992ec51957cb167f5787d0ac59d1171f7cf872a32R233
^ This is the implementation detail. If we have no globs, we can call docker cp directly and avoid any filesystem parsing, which is the optimal case.

If we do have a glob, then we have to export the filesystem to get all filenames. https://github.com/ray-project/rayci/pull/411/changes#diff-7df0dd7dbd85029bba68703b7de4462bee6f7f6fe89a15f9286c6d09d1897323R160

Running on the wheel image today to extract the wheel glob, it takes <1s

# ci/docker/ray-wheel.wanda.yaml
artifacts:
  - src: "/*.whl"
    dst: ./
$ PYTHON_VERSION=3.13 MANYLINUX_VERSION=260128.221a193 HOSTTYPE=aarch64 ARCH_SUFFIX=-aarch64 BUILDKITE_COMMIT=b5737cefc0 IS_LOCAL_BUILD=true /Users/andrew/devel/rayci-wanda-artifacts/_release/wanda-darwin-arm64 --artifacts_dir .whl/ ci/docker/ray-wheel.wanda.yaml

2026/02/04 09:10:39 extracting 1 artifact(s) from localhost:5000/rayci-work:ray-wheel-py3.13-aarch64
Successfully copied 72MB to /Users/andrew/devel/ray-local-wheel-build/.whl/ray-3.0.0.dev0-cp313-cp313-manylinux2014_aarch64.whl
2026/02/04 09:10:40 extracted 1 artifact(s) in 722ms:
2026/02/04 09:10:40   /Users/andrew/devel/ray-local-wheel-build/.whl/ray-3.0.0.dev0-cp313-cp313-manylinux2014_aarch64.whl

var matches []string
for _, f := range files {
if matched, _ := path.Match(a.Src, f); matched {
matches = append(matches, f)
}
}
return matches
}

// ResolveDst resolves the destination path for the artifact.
// Returns an error if the resolved path would escape the artifacts directory.
func (a *Artifact) ResolveDst(artifactsDir string) (string, error) {
if filepath.IsAbs(a.Dst) {
return "", fmt.Errorf("artifact dst must be relative path: %q", a.Dst)
}

resolved := filepath.Join(artifactsDir, a.Dst)
resolved = filepath.Clean(resolved)

absArtifactsDir, err := filepath.Abs(artifactsDir)
if err != nil {
return "", fmt.Errorf("resolve artifacts dir: %w", err)
}
absResolved, err := filepath.Abs(resolved)
if err != nil {
return "", fmt.Errorf("resolve dst: %w", err)
}

if !strings.HasPrefix(absResolved, absArtifactsDir+string(filepath.Separator)) &&
absResolved != absArtifactsDir {
return "", fmt.Errorf("artifact dst escapes artifacts directory: %q", a.Dst)
}

return resolved, nil
}
138 changes: 138 additions & 0 deletions wanda/artifact_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package wanda

import (
"path/filepath"
"strings"
"testing"
)

func TestArtifact_Validate(t *testing.T) {
tests := []struct {
name string
src string
dst string
wantErr string
}{
{"valid absolute src", "/app/file.txt", "output.txt", ""},
{"valid glob src", "/*.whl", "wheels/", ""},
{"valid nested dst", "/app/file.txt", "subdir/output.txt", ""},
{"relative src rejected", "relative/path.txt", "output.txt", "must be absolute"},
{"absolute dst rejected", "/app/file.txt", "/absolute/path.txt", "must be relative"},
{"dst with .. rejected", "/app/file.txt", "../escape.txt", "cannot escape"},
{"dst with nested .. rejected", "/app/file.txt", "subdir/../../escape.txt", "cannot escape"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
a := &Artifact{Src: tt.src, Dst: tt.dst}
err := a.Validate()
if tt.wantErr == "" {
if err != nil {
t.Errorf("Validate() unexpected error: %v", err)
}
} else {
if err == nil {
t.Errorf("Validate() expected error containing %q, got nil", tt.wantErr)
} else if !strings.Contains(err.Error(), tt.wantErr) {
t.Errorf("Validate() error = %q, want containing %q", err.Error(), tt.wantErr)
}
}
})
}
}

func TestArtifact_HasGlob(t *testing.T) {
tests := []struct {
src string
want bool
}{
{"/app/file.txt", false},
{"/app/*.txt", true},
{"/app/file?.txt", true},
{"/app/[abc].txt", true},
{"/*.whl", true},
{"/path/to/dir/", false},
}

for _, tt := range tests {
a := &Artifact{Src: tt.src}
got := a.HasGlob()
if got != tt.want {
t.Errorf("Artifact{Src: %q}.HasGlob() = %v, want %v", tt.src, got, tt.want)
}
}
}

func TestArtifact_ResolveSrcs(t *testing.T) {
containerFiles := []string{
"/app.whl",
"/other.whl",
"/readme.txt",
"/etc/config",
"/build/output.whl",
}

tests := []struct {
src string
want []string
}{
{"/*.whl", []string{"/app.whl", "/other.whl"}},
{"/*.txt", []string{"/readme.txt"}},
{"/build/*.whl", []string{"/build/output.whl"}},
{"/*.exe", nil},
{"/app.whl", []string{"/app.whl"}}, // non-glob returns src directly
{"/nonexistent", []string{"/nonexistent"}}, // non-glob doesn't check existence
}

for _, tt := range tests {
a := &Artifact{Src: tt.src}
got := a.ResolveSrcs(containerFiles)
if len(got) != len(tt.want) {
t.Errorf("Artifact{Src: %q}.ResolveSrcs() = %v, want %v", tt.src, got, tt.want)
continue
}
for i := range got {
if got[i] != tt.want[i] {
t.Errorf("Artifact{Src: %q}.ResolveSrcs()[%d] = %q, want %q", tt.src, i, got[i], tt.want[i])
}
}
}
}

func TestArtifact_ResolveDst(t *testing.T) {
artifactsDir := t.TempDir()

tests := []struct {
name string
dst string
want string
wantErr string
}{
{"simple file", "output.txt", filepath.Join(artifactsDir, "output.txt"), ""},
{"subdirectory", "wheels/file.whl", filepath.Join(artifactsDir, "wheels/file.whl"), ""},
{"trailing slash", "wheels/", filepath.Join(artifactsDir, "wheels"), ""},
{"absolute rejected", "/absolute/path.txt", "", "must be relative"},
{"escape rejected", "../escape.txt", "", "escapes artifacts"},
{"nested escape rejected", "sub/../../escape.txt", "", "escapes artifacts"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
a := &Artifact{Dst: tt.dst}
got, err := a.ResolveDst(artifactsDir)
if tt.wantErr == "" {
if err != nil {
t.Errorf("ResolveDst() unexpected error: %v", err)
} else if got != tt.want {
t.Errorf("ResolveDst() = %q, want %q", got, tt.want)
}
} else {
if err == nil {
t.Errorf("ResolveDst() expected error containing %q, got nil", tt.wantErr)
} else if !strings.Contains(err.Error(), tt.wantErr) {
t.Errorf("ResolveDst() error = %q, want containing %q", err.Error(), tt.wantErr)
}
}
})
}
}
19 changes: 19 additions & 0 deletions wanda/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ type Spec struct {

// DisableCaching disables use of caching.
DisableCaching bool `yaml:"disable_caching,omitempty"`

// Artifacts defines files and directories to extract from the built image.
Artifacts []*Artifact `yaml:"artifacts,omitempty"`
}

func parseSpecFile(f string) (*Spec, error) {
Expand Down Expand Up @@ -116,6 +119,21 @@ func stringsExpandVar(slice []string, lookup lookupFunc) []string {
return result
}

func artifactsExpandVar(artifacts []*Artifact, lookup lookupFunc) []*Artifact {
if len(artifacts) == 0 {
return nil
}
result := make([]*Artifact, len(artifacts))
for i, a := range artifacts {
result[i] = &Artifact{
Src: expandVar(a.Src, lookup),
Dst: expandVar(a.Dst, lookup),
Optional: a.Optional,
}
}
return result
}

func (s *Spec) expandVar(lookup lookupFunc) *Spec {
result := new(Spec)

Expand All @@ -127,6 +145,7 @@ func (s *Spec) expandVar(lookup lookupFunc) *Spec {
result.BuildArgs = stringsExpandVar(s.BuildArgs, lookup)
result.BuildHintArgs = stringsExpandVar(s.BuildHintArgs, lookup)
result.DisableCaching = s.DisableCaching
result.Artifacts = artifactsExpandVar(s.Artifacts, lookup)

return result
}
Loading