-
Notifications
You must be signed in to change notification settings - Fork 4
feat(wanda): add Artifact struct to spec for extraction #407
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| package wanda | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "path" | ||
| "path/filepath" | ||
| "strings" | ||
| ) | ||
|
|
||
| // Artifact defines a file or directory to extract from a built image. | ||
| type Artifact struct { | ||
| // Src is the path inside the container to extract. | ||
| // Can be a file, directory, or glob pattern (e.g., "/*.whl"). | ||
| // Must be an absolute path. Supports variable expansion. | ||
| Src string `yaml:"src"` | ||
|
|
||
| // Dst is the destination path relative to the artifacts directory. | ||
| // If it ends with "/" or src matches multiple files, src is copied | ||
| // into that directory (preserving original filenames). | ||
| // Otherwise, src is renamed to dst during copy. | ||
| // Must be a relative path (no ".." allowed). | ||
| Dst string `yaml:"dst"` | ||
|
|
||
| // Optional marks this artifact as best-effort. | ||
| // If true, extraction failure will be logged but won't fail the build. | ||
| Optional bool `yaml:"optional,omitempty"` | ||
| } | ||
|
|
||
| // Validate checks that the artifact paths are safe. | ||
| // Src must be an absolute path (inside the container). | ||
| // Dst must be relative and cannot escape the artifacts directory. | ||
| func (a *Artifact) Validate() error { | ||
| if !strings.HasPrefix(a.Src, "/") { | ||
| return fmt.Errorf("artifact src must be absolute path: %q", a.Src) | ||
| } | ||
|
|
||
| if filepath.IsAbs(a.Dst) { | ||
| return fmt.Errorf("artifact dst must be relative path: %q", a.Dst) | ||
| } | ||
|
|
||
| cleaned := filepath.Clean(a.Dst) | ||
| if strings.HasPrefix(cleaned, "..") || strings.Contains(cleaned, string(filepath.Separator)+"..") { | ||
| return fmt.Errorf("artifact dst cannot escape artifacts directory: %q", a.Dst) | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // HasGlob returns true if the artifact source path contains glob characters. | ||
| func (a *Artifact) HasGlob() bool { | ||
| return strings.ContainsAny(a.Src, "*?[") | ||
| } | ||
|
|
||
| // ResolveSrcs returns the source paths to extract. | ||
| func (a *Artifact) ResolveSrcs(containerFiles []string) []string { | ||
| if a.HasGlob() { | ||
| return a.matchFiles(containerFiles) | ||
| } | ||
| return []string{a.Src} | ||
| } | ||
|
|
||
| // matchFiles returns files that match the artifact's glob pattern. | ||
| func (a *Artifact) matchFiles(files []string) []string { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are we like listing all the files first and then perform the matching? should we send in a fs interface or something? this can be a lot of files to match (potentially files of the whole container, which can be a lot?)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. https://github.com/ray-project/rayci/pull/411/changes#diff-c7fe292f56579652957100e992ec51957cb167f5787d0ac59d1171f7cf872a32R233 If we do have a glob, then we have to export the filesystem to get all filenames. https://github.com/ray-project/rayci/pull/411/changes#diff-7df0dd7dbd85029bba68703b7de4462bee6f7f6fe89a15f9286c6d09d1897323R160 Running on the wheel image today to extract the wheel glob, it takes <1s |
||
| var matches []string | ||
| for _, f := range files { | ||
| if matched, _ := path.Match(a.Src, f); matched { | ||
| matches = append(matches, f) | ||
| } | ||
| } | ||
| return matches | ||
| } | ||
|
|
||
| // ResolveDst resolves the destination path for the artifact. | ||
| // Returns an error if the resolved path would escape the artifacts directory. | ||
| func (a *Artifact) ResolveDst(artifactsDir string) (string, error) { | ||
| if filepath.IsAbs(a.Dst) { | ||
| return "", fmt.Errorf("artifact dst must be relative path: %q", a.Dst) | ||
| } | ||
|
|
||
| resolved := filepath.Join(artifactsDir, a.Dst) | ||
| resolved = filepath.Clean(resolved) | ||
|
|
||
| absArtifactsDir, err := filepath.Abs(artifactsDir) | ||
| if err != nil { | ||
| return "", fmt.Errorf("resolve artifacts dir: %w", err) | ||
| } | ||
| absResolved, err := filepath.Abs(resolved) | ||
| if err != nil { | ||
| return "", fmt.Errorf("resolve dst: %w", err) | ||
| } | ||
|
|
||
| if !strings.HasPrefix(absResolved, absArtifactsDir+string(filepath.Separator)) && | ||
| absResolved != absArtifactsDir { | ||
| return "", fmt.Errorf("artifact dst escapes artifacts directory: %q", a.Dst) | ||
| } | ||
|
|
||
| return resolved, nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,138 @@ | ||
| package wanda | ||
|
|
||
| import ( | ||
| "path/filepath" | ||
| "strings" | ||
| "testing" | ||
| ) | ||
|
|
||
| func TestArtifact_Validate(t *testing.T) { | ||
| tests := []struct { | ||
| name string | ||
| src string | ||
| dst string | ||
| wantErr string | ||
| }{ | ||
| {"valid absolute src", "/app/file.txt", "output.txt", ""}, | ||
| {"valid glob src", "/*.whl", "wheels/", ""}, | ||
| {"valid nested dst", "/app/file.txt", "subdir/output.txt", ""}, | ||
| {"relative src rejected", "relative/path.txt", "output.txt", "must be absolute"}, | ||
| {"absolute dst rejected", "/app/file.txt", "/absolute/path.txt", "must be relative"}, | ||
| {"dst with .. rejected", "/app/file.txt", "../escape.txt", "cannot escape"}, | ||
| {"dst with nested .. rejected", "/app/file.txt", "subdir/../../escape.txt", "cannot escape"}, | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| a := &Artifact{Src: tt.src, Dst: tt.dst} | ||
| err := a.Validate() | ||
| if tt.wantErr == "" { | ||
| if err != nil { | ||
| t.Errorf("Validate() unexpected error: %v", err) | ||
| } | ||
| } else { | ||
| if err == nil { | ||
| t.Errorf("Validate() expected error containing %q, got nil", tt.wantErr) | ||
| } else if !strings.Contains(err.Error(), tt.wantErr) { | ||
| t.Errorf("Validate() error = %q, want containing %q", err.Error(), tt.wantErr) | ||
| } | ||
| } | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| func TestArtifact_HasGlob(t *testing.T) { | ||
| tests := []struct { | ||
| src string | ||
| want bool | ||
| }{ | ||
| {"/app/file.txt", false}, | ||
| {"/app/*.txt", true}, | ||
| {"/app/file?.txt", true}, | ||
| {"/app/[abc].txt", true}, | ||
| {"/*.whl", true}, | ||
| {"/path/to/dir/", false}, | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| a := &Artifact{Src: tt.src} | ||
| got := a.HasGlob() | ||
| if got != tt.want { | ||
| t.Errorf("Artifact{Src: %q}.HasGlob() = %v, want %v", tt.src, got, tt.want) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func TestArtifact_ResolveSrcs(t *testing.T) { | ||
| containerFiles := []string{ | ||
| "/app.whl", | ||
| "/other.whl", | ||
| "/readme.txt", | ||
| "/etc/config", | ||
| "/build/output.whl", | ||
| } | ||
|
|
||
| tests := []struct { | ||
| src string | ||
| want []string | ||
| }{ | ||
| {"/*.whl", []string{"/app.whl", "/other.whl"}}, | ||
| {"/*.txt", []string{"/readme.txt"}}, | ||
| {"/build/*.whl", []string{"/build/output.whl"}}, | ||
| {"/*.exe", nil}, | ||
| {"/app.whl", []string{"/app.whl"}}, // non-glob returns src directly | ||
| {"/nonexistent", []string{"/nonexistent"}}, // non-glob doesn't check existence | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| a := &Artifact{Src: tt.src} | ||
| got := a.ResolveSrcs(containerFiles) | ||
| if len(got) != len(tt.want) { | ||
| t.Errorf("Artifact{Src: %q}.ResolveSrcs() = %v, want %v", tt.src, got, tt.want) | ||
| continue | ||
| } | ||
| for i := range got { | ||
| if got[i] != tt.want[i] { | ||
| t.Errorf("Artifact{Src: %q}.ResolveSrcs()[%d] = %q, want %q", tt.src, i, got[i], tt.want[i]) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func TestArtifact_ResolveDst(t *testing.T) { | ||
| artifactsDir := t.TempDir() | ||
|
|
||
| tests := []struct { | ||
| name string | ||
| dst string | ||
| want string | ||
| wantErr string | ||
| }{ | ||
| {"simple file", "output.txt", filepath.Join(artifactsDir, "output.txt"), ""}, | ||
| {"subdirectory", "wheels/file.whl", filepath.Join(artifactsDir, "wheels/file.whl"), ""}, | ||
| {"trailing slash", "wheels/", filepath.Join(artifactsDir, "wheels"), ""}, | ||
| {"absolute rejected", "/absolute/path.txt", "", "must be relative"}, | ||
| {"escape rejected", "../escape.txt", "", "escapes artifacts"}, | ||
| {"nested escape rejected", "sub/../../escape.txt", "", "escapes artifacts"}, | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| a := &Artifact{Dst: tt.dst} | ||
| got, err := a.ResolveDst(artifactsDir) | ||
| if tt.wantErr == "" { | ||
| if err != nil { | ||
| t.Errorf("ResolveDst() unexpected error: %v", err) | ||
| } else if got != tt.want { | ||
| t.Errorf("ResolveDst() = %q, want %q", got, tt.want) | ||
| } | ||
| } else { | ||
| if err == nil { | ||
| t.Errorf("ResolveDst() expected error containing %q, got nil", tt.wantErr) | ||
| } else if !strings.Contains(err.Error(), tt.wantErr) { | ||
| t.Errorf("ResolveDst() error = %q, want containing %q", err.Error(), tt.wantErr) | ||
| } | ||
| } | ||
| }) | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we really need glob pattern support? glob only matches file name, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I went back-and-forth on glob inclusion, and settled on it being worth inclusion. It drastically simplifies the Wheel upload case. Without this, we'd need to rename the wheel file away from schemas like
ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whland into a generic name.Yes, glob only matches file names currently.