Skip to content

Commit 928b35c

Browse files
feat(wanda): add Artifact struct to spec for extraction
Define Artifact struct for specifying files to extract from built images: - src: absolute path inside container (supports glob patterns like "/*.whl") - dst: relative path in artifacts directory - optional: if true, missing files warn instead of fail Includes helper methods for path resolution and validation: - Validate(): ensures src is absolute, dst is relative, no path traversal - ResolveSrcs(): resolves glob patterns against container file list - ResolveDst(): resolves destination within artifacts directory - CopyIntoDir(): determines if copying into directory vs renaming Topic: wanda-artifact-spec Signed-off-by: andrew <andrew@anyscale.com>
1 parent 6165f02 commit 928b35c

File tree

4 files changed

+375
-0
lines changed

4 files changed

+375
-0
lines changed

wanda/artifact.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package wanda
2+
3+
import (
4+
"fmt"
5+
"path"
6+
"path/filepath"
7+
"strings"
8+
)
9+
10+
// Artifact defines a file or directory to extract from a built image.
11+
type Artifact struct {
12+
// Src is the path inside the container to extract.
13+
// Can be a file, directory, or glob pattern (e.g., "/*.whl").
14+
// Must be an absolute path. Supports variable expansion.
15+
Src string `yaml:"src"`
16+
17+
// Dst is the destination path relative to the artifacts directory.
18+
// If it ends with "/" or src matches multiple files, src is copied
19+
// into that directory (preserving original filenames).
20+
// Otherwise, src is renamed to dst during copy.
21+
// Must be a relative path (no ".." allowed).
22+
Dst string `yaml:"dst"`
23+
24+
// Optional marks this artifact as best-effort.
25+
// If true, extraction failure will be logged but won't fail the build.
26+
Optional bool `yaml:"optional,omitempty"`
27+
}
28+
29+
// Validate checks that the artifact paths are safe.
30+
// Src must be an absolute path (inside the container).
31+
// Dst must be relative and cannot escape the artifacts directory.
32+
func (a *Artifact) Validate() error {
33+
if !strings.HasPrefix(a.Src, "/") {
34+
return fmt.Errorf("artifact src must be absolute path: %q", a.Src)
35+
}
36+
37+
if filepath.IsAbs(a.Dst) {
38+
return fmt.Errorf("artifact dst must be relative path: %q", a.Dst)
39+
}
40+
41+
cleaned := filepath.Clean(a.Dst)
42+
if strings.HasPrefix(cleaned, "..") || strings.Contains(cleaned, string(filepath.Separator)+"..") {
43+
return fmt.Errorf("artifact dst cannot escape artifacts directory: %q", a.Dst)
44+
}
45+
46+
return nil
47+
}
48+
49+
// HasGlob returns true if the artifact source path contains glob characters.
50+
func (a *Artifact) HasGlob() bool {
51+
return strings.ContainsAny(a.Src, "*?[")
52+
}
53+
54+
// ResolveSrcs returns the source paths to extract.
55+
func (a *Artifact) ResolveSrcs(containerFiles []string) []string {
56+
if a.HasGlob() {
57+
return a.matchFiles(containerFiles)
58+
}
59+
return []string{a.Src}
60+
}
61+
62+
// matchFiles returns files that match the artifact's glob pattern.
63+
func (a *Artifact) matchFiles(files []string) []string {
64+
var matches []string
65+
for _, f := range files {
66+
if matched, _ := path.Match(a.Src, f); matched {
67+
matches = append(matches, f)
68+
}
69+
}
70+
return matches
71+
}
72+
73+
// ResolveDst resolves the destination path for the artifact.
74+
// Returns an error if the resolved path would escape the artifacts directory.
75+
func (a *Artifact) ResolveDst(artifactsDir string) (string, error) {
76+
if filepath.IsAbs(a.Dst) {
77+
return "", fmt.Errorf("artifact dst must be relative path: %q", a.Dst)
78+
}
79+
80+
resolved := filepath.Join(artifactsDir, a.Dst)
81+
resolved = filepath.Clean(resolved)
82+
83+
absArtifactsDir, err := filepath.Abs(artifactsDir)
84+
if err != nil {
85+
return "", fmt.Errorf("resolve artifacts dir: %w", err)
86+
}
87+
absResolved, err := filepath.Abs(resolved)
88+
if err != nil {
89+
return "", fmt.Errorf("resolve dst: %w", err)
90+
}
91+
92+
if !strings.HasPrefix(absResolved, absArtifactsDir+string(filepath.Separator)) &&
93+
absResolved != absArtifactsDir {
94+
return "", fmt.Errorf("artifact dst escapes artifacts directory: %q", a.Dst)
95+
}
96+
97+
return resolved, nil
98+
}

wanda/artifact_test.go

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
package wanda
2+
3+
import (
4+
"path/filepath"
5+
"strings"
6+
"testing"
7+
)
8+
9+
func TestArtifact_Validate(t *testing.T) {
10+
tests := []struct {
11+
name string
12+
src string
13+
dst string
14+
wantErr string
15+
}{
16+
{"valid absolute src", "/app/file.txt", "output.txt", ""},
17+
{"valid glob src", "/*.whl", "wheels/", ""},
18+
{"valid nested dst", "/app/file.txt", "subdir/output.txt", ""},
19+
{"relative src rejected", "relative/path.txt", "output.txt", "must be absolute"},
20+
{"absolute dst rejected", "/app/file.txt", "/absolute/path.txt", "must be relative"},
21+
{"dst with .. rejected", "/app/file.txt", "../escape.txt", "cannot escape"},
22+
{"dst with nested .. rejected", "/app/file.txt", "subdir/../../escape.txt", "cannot escape"},
23+
}
24+
25+
for _, tt := range tests {
26+
t.Run(tt.name, func(t *testing.T) {
27+
a := &Artifact{Src: tt.src, Dst: tt.dst}
28+
err := a.Validate()
29+
if tt.wantErr == "" {
30+
if err != nil {
31+
t.Errorf("Validate() unexpected error: %v", err)
32+
}
33+
} else {
34+
if err == nil {
35+
t.Errorf("Validate() expected error containing %q, got nil", tt.wantErr)
36+
} else if !strings.Contains(err.Error(), tt.wantErr) {
37+
t.Errorf("Validate() error = %q, want containing %q", err.Error(), tt.wantErr)
38+
}
39+
}
40+
})
41+
}
42+
}
43+
44+
func TestArtifact_HasGlob(t *testing.T) {
45+
tests := []struct {
46+
src string
47+
want bool
48+
}{
49+
{"/app/file.txt", false},
50+
{"/app/*.txt", true},
51+
{"/app/file?.txt", true},
52+
{"/app/[abc].txt", true},
53+
{"/*.whl", true},
54+
{"/path/to/dir/", false},
55+
}
56+
57+
for _, tt := range tests {
58+
a := &Artifact{Src: tt.src}
59+
got := a.HasGlob()
60+
if got != tt.want {
61+
t.Errorf("Artifact{Src: %q}.HasGlob() = %v, want %v", tt.src, got, tt.want)
62+
}
63+
}
64+
}
65+
66+
func TestArtifact_ResolveSrcs(t *testing.T) {
67+
containerFiles := []string{
68+
"/app.whl",
69+
"/other.whl",
70+
"/readme.txt",
71+
"/etc/config",
72+
"/build/output.whl",
73+
}
74+
75+
tests := []struct {
76+
src string
77+
want []string
78+
}{
79+
{"/*.whl", []string{"/app.whl", "/other.whl"}},
80+
{"/*.txt", []string{"/readme.txt"}},
81+
{"/build/*.whl", []string{"/build/output.whl"}},
82+
{"/*.exe", nil},
83+
{"/app.whl", []string{"/app.whl"}}, // non-glob returns src directly
84+
{"/nonexistent", []string{"/nonexistent"}}, // non-glob doesn't check existence
85+
}
86+
87+
for _, tt := range tests {
88+
a := &Artifact{Src: tt.src}
89+
got := a.ResolveSrcs(containerFiles)
90+
if len(got) != len(tt.want) {
91+
t.Errorf("Artifact{Src: %q}.ResolveSrcs() = %v, want %v", tt.src, got, tt.want)
92+
continue
93+
}
94+
for i := range got {
95+
if got[i] != tt.want[i] {
96+
t.Errorf("Artifact{Src: %q}.ResolveSrcs()[%d] = %q, want %q", tt.src, i, got[i], tt.want[i])
97+
}
98+
}
99+
}
100+
}
101+
102+
func TestArtifact_ResolveDst(t *testing.T) {
103+
artifactsDir := t.TempDir()
104+
105+
tests := []struct {
106+
name string
107+
dst string
108+
want string
109+
wantErr string
110+
}{
111+
{"simple file", "output.txt", filepath.Join(artifactsDir, "output.txt"), ""},
112+
{"subdirectory", "wheels/file.whl", filepath.Join(artifactsDir, "wheels/file.whl"), ""},
113+
{"trailing slash", "wheels/", filepath.Join(artifactsDir, "wheels"), ""},
114+
{"absolute rejected", "/absolute/path.txt", "", "must be relative"},
115+
{"escape rejected", "../escape.txt", "", "escapes artifacts"},
116+
{"nested escape rejected", "sub/../../escape.txt", "", "escapes artifacts"},
117+
}
118+
119+
for _, tt := range tests {
120+
t.Run(tt.name, func(t *testing.T) {
121+
a := &Artifact{Dst: tt.dst}
122+
got, err := a.ResolveDst(artifactsDir)
123+
if tt.wantErr == "" {
124+
if err != nil {
125+
t.Errorf("ResolveDst() unexpected error: %v", err)
126+
} else if got != tt.want {
127+
t.Errorf("ResolveDst() = %q, want %q", got, tt.want)
128+
}
129+
} else {
130+
if err == nil {
131+
t.Errorf("ResolveDst() expected error containing %q, got nil", tt.wantErr)
132+
} else if !strings.Contains(err.Error(), tt.wantErr) {
133+
t.Errorf("ResolveDst() error = %q, want containing %q", err.Error(), tt.wantErr)
134+
}
135+
}
136+
})
137+
}
138+
}

wanda/spec.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ type Spec struct {
2828

2929
// DisableCaching disables use of caching.
3030
DisableCaching bool `yaml:"disable_caching,omitempty"`
31+
32+
// Artifacts defines files and directories to extract from the built image.
33+
Artifacts []*Artifact `yaml:"artifacts,omitempty"`
3134
}
3235

3336
func parseSpecFile(f string) (*Spec, error) {
@@ -116,6 +119,21 @@ func stringsExpandVar(slice []string, lookup lookupFunc) []string {
116119
return result
117120
}
118121

122+
func artifactsExpandVar(artifacts []*Artifact, lookup lookupFunc) []*Artifact {
123+
if len(artifacts) == 0 {
124+
return nil
125+
}
126+
result := make([]*Artifact, len(artifacts))
127+
for i, a := range artifacts {
128+
result[i] = &Artifact{
129+
Src: expandVar(a.Src, lookup),
130+
Dst: expandVar(a.Dst, lookup),
131+
Optional: a.Optional,
132+
}
133+
}
134+
return result
135+
}
136+
119137
func (s *Spec) expandVar(lookup lookupFunc) *Spec {
120138
result := new(Spec)
121139

@@ -127,6 +145,7 @@ func (s *Spec) expandVar(lookup lookupFunc) *Spec {
127145
result.BuildArgs = stringsExpandVar(s.BuildArgs, lookup)
128146
result.BuildHintArgs = stringsExpandVar(s.BuildHintArgs, lookup)
129147
result.DisableCaching = s.DisableCaching
148+
result.Artifacts = artifactsExpandVar(s.Artifacts, lookup)
130149

131150
return result
132151
}

0 commit comments

Comments
 (0)