Skip to content

Commit 7a60b1e

Browse files
feat(wanda): add Artifact struct to spec for extraction
Define Artifact struct for specifying files to extract from built images: - src: absolute path inside container (supports glob patterns like "/*.whl") - dst: relative path in artifacts directory - optional: if true, missing files warn instead of fail Includes helper methods for path resolution and validation: - Validate(): ensures src is absolute, dst is relative, no path traversal - ResolveSrcs(): resolves glob patterns against container file list - ResolveDst(): resolves destination within artifacts directory - CopyIntoDir(): determines if copying into directory vs renaming Topic: wanda-artifact-spec Signed-off-by: andrew <andrew@anyscale.com>
1 parent 6165f02 commit 7a60b1e

File tree

4 files changed

+402
-0
lines changed

4 files changed

+402
-0
lines changed

wanda/artifact.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
package wanda
2+
3+
import (
4+
"fmt"
5+
"path"
6+
"path/filepath"
7+
"strings"
8+
)
9+
10+
// Artifact defines a file or directory to extract from a built image.
11+
type Artifact struct {
12+
// Src is the path inside the container to extract.
13+
// Can be a file, directory, or glob pattern (e.g., "/*.whl").
14+
// Must be an absolute path. Supports variable expansion.
15+
Src string `yaml:"src"`
16+
17+
// Dst is the destination path relative to the artifacts directory.
18+
// If it ends with "/" or src matches multiple files, src is copied
19+
// into that directory (preserving original filenames).
20+
// Otherwise, src is renamed to dst during copy.
21+
// Must be a relative path (no ".." allowed).
22+
Dst string `yaml:"dst"`
23+
24+
// Optional marks this artifact as best-effort.
25+
// If true, extraction failure will be logged but won't fail the build.
26+
Optional bool `yaml:"optional,omitempty"`
27+
}
28+
29+
// Validate checks that the artifact paths are safe.
30+
// Src must be an absolute path (inside the container).
31+
// Dst must be relative and cannot escape the artifacts directory.
32+
func (a *Artifact) Validate() error {
33+
if !strings.HasPrefix(a.Src, "/") {
34+
return fmt.Errorf("artifact src must be absolute path: %q", a.Src)
35+
}
36+
37+
if filepath.IsAbs(a.Dst) {
38+
return fmt.Errorf("artifact dst must be relative path: %q", a.Dst)
39+
}
40+
41+
cleaned := filepath.Clean(a.Dst)
42+
if strings.HasPrefix(cleaned, "..") || strings.Contains(cleaned, string(filepath.Separator)+"..") {
43+
return fmt.Errorf("artifact dst cannot escape artifacts directory: %q", a.Dst)
44+
}
45+
46+
return nil
47+
}
48+
49+
// HasGlob returns true if the artifact source path contains glob characters.
50+
func (a *Artifact) HasGlob() bool {
51+
return strings.ContainsAny(a.Src, "*?[")
52+
}
53+
54+
// ResolveSrcs returns the source paths to extract.
55+
func (a *Artifact) ResolveSrcs(containerFiles []string) []string {
56+
if a.HasGlob() {
57+
return a.matchFiles(containerFiles)
58+
}
59+
return []string{a.Src}
60+
}
61+
62+
// CopyIntoDir returns true if sources should be copied into dst as a directory.
63+
func (a *Artifact) CopyIntoDir(numSrcs int) bool {
64+
return strings.HasSuffix(a.Dst, "/") || numSrcs > 1
65+
}
66+
67+
// matchFiles returns files that match the artifact's glob pattern.
68+
func (a *Artifact) matchFiles(files []string) []string {
69+
var matches []string
70+
for _, f := range files {
71+
if matched, _ := path.Match(a.Src, f); matched {
72+
matches = append(matches, f)
73+
}
74+
}
75+
return matches
76+
}
77+
78+
// ResolveDst resolves the destination path for the artifact.
79+
// Returns an error if the resolved path would escape the artifacts directory.
80+
func (a *Artifact) ResolveDst(artifactsDir string) (string, error) {
81+
if filepath.IsAbs(a.Dst) {
82+
return "", fmt.Errorf("artifact dst must be relative path: %q", a.Dst)
83+
}
84+
85+
resolved := filepath.Join(artifactsDir, a.Dst)
86+
resolved = filepath.Clean(resolved)
87+
88+
absArtifactsDir, err := filepath.Abs(artifactsDir)
89+
if err != nil {
90+
return "", fmt.Errorf("resolve artifacts dir: %w", err)
91+
}
92+
absResolved, err := filepath.Abs(resolved)
93+
if err != nil {
94+
return "", fmt.Errorf("resolve dst: %w", err)
95+
}
96+
97+
if !strings.HasPrefix(absResolved, absArtifactsDir+string(filepath.Separator)) &&
98+
absResolved != absArtifactsDir {
99+
return "", fmt.Errorf("artifact dst escapes artifacts directory: %q", a.Dst)
100+
}
101+
102+
return resolved, nil
103+
}

wanda/artifact_test.go

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
package wanda
2+
3+
import (
4+
"path/filepath"
5+
"strings"
6+
"testing"
7+
)
8+
9+
func TestArtifact_Validate(t *testing.T) {
10+
tests := []struct {
11+
name string
12+
src string
13+
dst string
14+
wantErr string
15+
}{
16+
{"valid absolute src", "/app/file.txt", "output.txt", ""},
17+
{"valid glob src", "/*.whl", "wheels/", ""},
18+
{"valid nested dst", "/app/file.txt", "subdir/output.txt", ""},
19+
{"relative src rejected", "relative/path.txt", "output.txt", "must be absolute"},
20+
{"absolute dst rejected", "/app/file.txt", "/absolute/path.txt", "must be relative"},
21+
{"dst with .. rejected", "/app/file.txt", "../escape.txt", "cannot escape"},
22+
{"dst with nested .. rejected", "/app/file.txt", "subdir/../../escape.txt", "cannot escape"},
23+
}
24+
25+
for _, tt := range tests {
26+
t.Run(tt.name, func(t *testing.T) {
27+
a := &Artifact{Src: tt.src, Dst: tt.dst}
28+
err := a.Validate()
29+
if tt.wantErr == "" {
30+
if err != nil {
31+
t.Errorf("Validate() unexpected error: %v", err)
32+
}
33+
} else {
34+
if err == nil {
35+
t.Errorf("Validate() expected error containing %q, got nil", tt.wantErr)
36+
} else if !strings.Contains(err.Error(), tt.wantErr) {
37+
t.Errorf("Validate() error = %q, want containing %q", err.Error(), tt.wantErr)
38+
}
39+
}
40+
})
41+
}
42+
}
43+
44+
func TestArtifact_HasGlob(t *testing.T) {
45+
tests := []struct {
46+
src string
47+
want bool
48+
}{
49+
{"/app/file.txt", false},
50+
{"/app/*.txt", true},
51+
{"/app/file?.txt", true},
52+
{"/app/[abc].txt", true},
53+
{"/*.whl", true},
54+
{"/path/to/dir/", false},
55+
}
56+
57+
for _, tt := range tests {
58+
a := &Artifact{Src: tt.src}
59+
got := a.HasGlob()
60+
if got != tt.want {
61+
t.Errorf("Artifact{Src: %q}.HasGlob() = %v, want %v", tt.src, got, tt.want)
62+
}
63+
}
64+
}
65+
66+
func TestArtifact_ResolveSrcs(t *testing.T) {
67+
containerFiles := []string{
68+
"/app.whl",
69+
"/other.whl",
70+
"/readme.txt",
71+
"/etc/config",
72+
"/build/output.whl",
73+
}
74+
75+
tests := []struct {
76+
src string
77+
want []string
78+
}{
79+
{"/*.whl", []string{"/app.whl", "/other.whl"}},
80+
{"/*.txt", []string{"/readme.txt"}},
81+
{"/build/*.whl", []string{"/build/output.whl"}},
82+
{"/*.exe", nil},
83+
{"/app.whl", []string{"/app.whl"}}, // non-glob returns src directly
84+
{"/nonexistent", []string{"/nonexistent"}}, // non-glob doesn't check existence
85+
}
86+
87+
for _, tt := range tests {
88+
a := &Artifact{Src: tt.src}
89+
got := a.ResolveSrcs(containerFiles)
90+
if len(got) != len(tt.want) {
91+
t.Errorf("Artifact{Src: %q}.ResolveSrcs() = %v, want %v", tt.src, got, tt.want)
92+
continue
93+
}
94+
for i := range got {
95+
if got[i] != tt.want[i] {
96+
t.Errorf("Artifact{Src: %q}.ResolveSrcs()[%d] = %q, want %q", tt.src, i, got[i], tt.want[i])
97+
}
98+
}
99+
}
100+
}
101+
102+
func TestArtifact_CopyIntoDir(t *testing.T) {
103+
tests := []struct {
104+
dst string
105+
numSrcs int
106+
want bool
107+
}{
108+
{"wheels/", 1, true}, // trailing slash
109+
{"wheels/", 2, true}, // trailing slash + multiple
110+
{"wheels", 1, false}, // no slash, single file
111+
{"wheels", 2, true}, // no slash, multiple files
112+
{"out.txt", 1, false}, // single file rename
113+
}
114+
115+
for _, tt := range tests {
116+
a := &Artifact{Dst: tt.dst}
117+
got := a.CopyIntoDir(tt.numSrcs)
118+
if got != tt.want {
119+
t.Errorf("Artifact{Dst: %q}.CopyIntoDir(%d) = %v, want %v", tt.dst, tt.numSrcs, got, tt.want)
120+
}
121+
}
122+
}
123+
124+
func TestArtifact_ResolveDst(t *testing.T) {
125+
artifactsDir := t.TempDir()
126+
127+
tests := []struct {
128+
name string
129+
dst string
130+
want string
131+
wantErr string
132+
}{
133+
{"simple file", "output.txt", filepath.Join(artifactsDir, "output.txt"), ""},
134+
{"subdirectory", "wheels/file.whl", filepath.Join(artifactsDir, "wheels/file.whl"), ""},
135+
{"trailing slash", "wheels/", filepath.Join(artifactsDir, "wheels"), ""},
136+
{"absolute rejected", "/absolute/path.txt", "", "must be relative"},
137+
{"escape rejected", "../escape.txt", "", "escapes artifacts"},
138+
{"nested escape rejected", "sub/../../escape.txt", "", "escapes artifacts"},
139+
}
140+
141+
for _, tt := range tests {
142+
t.Run(tt.name, func(t *testing.T) {
143+
a := &Artifact{Dst: tt.dst}
144+
got, err := a.ResolveDst(artifactsDir)
145+
if tt.wantErr == "" {
146+
if err != nil {
147+
t.Errorf("ResolveDst() unexpected error: %v", err)
148+
} else if got != tt.want {
149+
t.Errorf("ResolveDst() = %q, want %q", got, tt.want)
150+
}
151+
} else {
152+
if err == nil {
153+
t.Errorf("ResolveDst() expected error containing %q, got nil", tt.wantErr)
154+
} else if !strings.Contains(err.Error(), tt.wantErr) {
155+
t.Errorf("ResolveDst() error = %q, want containing %q", err.Error(), tt.wantErr)
156+
}
157+
}
158+
})
159+
}
160+
}

wanda/spec.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ type Spec struct {
2828

2929
// DisableCaching disables use of caching.
3030
DisableCaching bool `yaml:"disable_caching,omitempty"`
31+
32+
// Artifacts defines files and directories to extract from the built image.
33+
Artifacts []*Artifact `yaml:"artifacts,omitempty"`
3134
}
3235

3336
func parseSpecFile(f string) (*Spec, error) {
@@ -116,6 +119,21 @@ func stringsExpandVar(slice []string, lookup lookupFunc) []string {
116119
return result
117120
}
118121

122+
func artifactsExpandVar(artifacts []*Artifact, lookup lookupFunc) []*Artifact {
123+
if len(artifacts) == 0 {
124+
return nil
125+
}
126+
result := make([]*Artifact, len(artifacts))
127+
for i, a := range artifacts {
128+
result[i] = &Artifact{
129+
Src: expandVar(a.Src, lookup),
130+
Dst: expandVar(a.Dst, lookup),
131+
Optional: a.Optional,
132+
}
133+
}
134+
return result
135+
}
136+
119137
func (s *Spec) expandVar(lookup lookupFunc) *Spec {
120138
result := new(Spec)
121139

@@ -127,6 +145,7 @@ func (s *Spec) expandVar(lookup lookupFunc) *Spec {
127145
result.BuildArgs = stringsExpandVar(s.BuildArgs, lookup)
128146
result.BuildHintArgs = stringsExpandVar(s.BuildHintArgs, lookup)
129147
result.DisableCaching = s.DisableCaching
148+
result.Artifacts = artifactsExpandVar(s.Artifacts, lookup)
130149

131150
return result
132151
}

0 commit comments

Comments
 (0)