Skip to content

Commit 6649b42

Browse files
authored
Merge pull request #2808 from fullsend-ai/agent/2806-host-files-url-resolution
fix(#2806): resolve host_files relative src paths for URL bases
2 parents e5bbbf4 + 03ed46a commit 6649b42

2 files changed

Lines changed: 239 additions & 0 deletions

File tree

internal/harness/compose.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,15 @@ func loadBaseChain(
216216
}
217217
deps = append(deps, resourceDeps...)
218218

219+
// host_files with relative src paths need the same fetch-cache-rewrite
220+
// treatment as scripts and resources. Entries using ${VAR} expansion
221+
// are left unchanged — they resolve at bootstrap time on the host.
222+
hostFileDeps, err := resolveBaseHostFiles(ctx, base, baseRef, allowlist, opts)
223+
if err != nil {
224+
return nil, nil, fmt.Errorf("resolving base host_files from %s: %w", cleanURL, err)
225+
}
226+
deps = append(deps, hostFileDeps...)
227+
219228
baseDir = childDir
220229
} else {
221230
// Local path base
@@ -664,6 +673,41 @@ func resolveBaseResources(ctx context.Context, base *Harness, baseURL string, al
664673
return deps, nil
665674
}
666675

676+
// resolveBaseHostFiles fetches host_files with relative src paths from a
677+
// URL-referenced base harness. For each host_files entry whose src is a
678+
// non-empty relative path (not a ${VAR} reference, URL, or absolute path),
679+
// the file is fetched from the base URL's directory, cached content-addressed,
680+
// and the src field is rewritten to the local cache path. This ensures
681+
// host_files inherited through base: composition resolve correctly at sandbox
682+
// setup time, the same way scripts and resources do.
683+
func resolveBaseHostFiles(ctx context.Context, base *Harness, baseURL string, allowlist []string, opts ComposeOpts) ([]Dependency, error) {
684+
baseURLDir := urlParentDirPrefix(baseURL)
685+
if baseURLDir == "" {
686+
return nil, fmt.Errorf("cannot determine directory from base URL")
687+
}
688+
689+
var deps []Dependency
690+
691+
for i := range base.HostFiles {
692+
src := base.HostFiles[i].Src
693+
if src == "" || strings.Contains(src, "${") || IsURL(src) || filepath.IsAbs(src) {
694+
continue
695+
}
696+
fieldName := fmt.Sprintf("host_files[%d].src", i)
697+
if err := validateBaseRelPath(fieldName, src); err != nil {
698+
return nil, err
699+
}
700+
dep, cachePath, err := fetchBaseFile(ctx, fieldName, baseURLDir, src, allowlist, opts, "resource", false)
701+
if err != nil {
702+
return nil, err
703+
}
704+
base.HostFiles[i].Src = cachePath
705+
deps = append(deps, dep)
706+
}
707+
708+
return deps, nil
709+
}
710+
667711
// validateBaseRelPath validates that a relative path inherited from a URL base
668712
// is safe to resolve. Rejects null bytes, query/fragment markers, URLs,
669713
// absolute paths, and path traversal segments.

internal/harness/compose_test.go

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2929,3 +2929,198 @@ func TestFetchBaseSkill_ForgeClient_NarrowAllowlist(t *testing.T) {
29292929
require.Error(t, err)
29302930
assert.Contains(t, err.Error(), "not in allowed_remote_resources")
29312931
}
2932+
2933+
// --- resolveBaseHostFiles tests ---
2934+
2935+
func TestLoadWithBase_URLBase_HostFilesFetched(t *testing.T) {
2936+
envContent := []byte("GCP_PROJECT=test-project\n")
2937+
triageEnv := []byte("TRIAGE_MODE=auto\n")
2938+
2939+
baseContent := []byte(`
2940+
agent: agents/triage.md
2941+
role: test
2942+
host_files:
2943+
- src: env/gcp-vertex.env
2944+
dest: /sandbox/workspace/.env.d/gcp-vertex.env
2945+
expand: true
2946+
- src: env/triage.env
2947+
dest: /sandbox/workspace/.env.d/triage.env
2948+
expand: true
2949+
`)
2950+
2951+
server, policy := setupScriptTestServer(t, baseContent, map[string][]byte{
2952+
"/env/gcp-vertex.env": envContent,
2953+
"/env/triage.env": triageEnv,
2954+
})
2955+
2956+
hash := computeHash(baseContent)
2957+
dir := t.TempDir()
2958+
cacheDir := filepath.Join(dir, "cache")
2959+
2960+
baseURL := server.URL + "/harness/triage.yaml#sha256=" + hash
2961+
2962+
path := writeTestHarness(t, dir, "child.yaml", `
2963+
role: test
2964+
base: `+baseURL+`
2965+
`)
2966+
2967+
h, deps, err := LoadWithBase(context.Background(), path, ComposeOpts{
2968+
WorkspaceRoot: cacheDir,
2969+
FetchPolicy: policy,
2970+
OrgAllowlist: []string{server.URL + "/"},
2971+
})
2972+
require.NoError(t, err)
2973+
2974+
// Host files resolved to local cache paths
2975+
require.Len(t, h.HostFiles, 2)
2976+
for i, hf := range h.HostFiles {
2977+
assert.True(t, filepath.IsAbs(hf.Src), "host_files[%d].src should be absolute cache path", i)
2978+
assert.False(t, IsURL(hf.Src), "host_files[%d].src should not be a URL", i)
2979+
}
2980+
2981+
// Verify cached content
2982+
content0, err := os.ReadFile(h.HostFiles[0].Src)
2983+
require.NoError(t, err)
2984+
assert.Equal(t, envContent, content0)
2985+
2986+
content1, err := os.ReadFile(h.HostFiles[1].Src)
2987+
require.NoError(t, err)
2988+
assert.Equal(t, triageEnv, content1)
2989+
2990+
// Dest and expand preserved
2991+
assert.Equal(t, "/sandbox/workspace/.env.d/gcp-vertex.env", h.HostFiles[0].Dest)
2992+
assert.True(t, h.HostFiles[0].Expand)
2993+
2994+
// Dependencies include host_files
2995+
hostFileDeps := []Dependency{}
2996+
for _, d := range deps {
2997+
if strings.HasPrefix(d.Field, "host_files[") {
2998+
hostFileDeps = append(hostFileDeps, d)
2999+
}
3000+
}
3001+
assert.Len(t, hostFileDeps, 2)
3002+
for _, d := range hostFileDeps {
3003+
assert.Equal(t, "resource", d.Type)
3004+
}
3005+
}
3006+
3007+
func TestLoadWithBase_URLBase_HostFilesMixedEnvVarAndRelative(t *testing.T) {
3008+
envContent := []byte("KEY=value\n")
3009+
3010+
baseContent := []byte(`
3011+
agent: agents/triage.md
3012+
role: test
3013+
host_files:
3014+
- src: env/app.env
3015+
dest: /sandbox/.env.d/app.env
3016+
- src: ${GOOGLE_APPLICATION_CREDENTIALS}
3017+
dest: /tmp/.gcp-credentials.json
3018+
`)
3019+
3020+
server, policy := setupScriptTestServer(t, baseContent, map[string][]byte{
3021+
"/env/app.env": envContent,
3022+
})
3023+
3024+
hash := computeHash(baseContent)
3025+
dir := t.TempDir()
3026+
cacheDir := filepath.Join(dir, "cache")
3027+
3028+
baseURL := server.URL + "/harness/triage.yaml#sha256=" + hash
3029+
3030+
path := writeTestHarness(t, dir, "child.yaml", `
3031+
role: test
3032+
base: `+baseURL+`
3033+
`)
3034+
3035+
h, _, err := LoadWithBase(context.Background(), path, ComposeOpts{
3036+
WorkspaceRoot: cacheDir,
3037+
FetchPolicy: policy,
3038+
OrgAllowlist: []string{server.URL + "/"},
3039+
})
3040+
require.NoError(t, err)
3041+
3042+
require.Len(t, h.HostFiles, 2)
3043+
3044+
// Relative src resolved to cache path
3045+
assert.True(t, filepath.IsAbs(h.HostFiles[0].Src), "relative src should be resolved")
3046+
3047+
// ${VAR} src left unchanged
3048+
assert.Equal(t, "${GOOGLE_APPLICATION_CREDENTIALS}", h.HostFiles[1].Src)
3049+
}
3050+
3051+
func TestResolveBaseHostFiles_SkipsEnvVarPaths(t *testing.T) {
3052+
base := &Harness{
3053+
HostFiles: []HostFile{
3054+
{Src: "${HOME}/file.txt", Dest: "/sandbox/file.txt"},
3055+
},
3056+
}
3057+
deps, err := resolveBaseHostFiles(context.Background(), base, "https://example.com/harness/triage.yaml#sha256=abc", nil, ComposeOpts{})
3058+
require.NoError(t, err)
3059+
assert.Empty(t, deps)
3060+
assert.Equal(t, "${HOME}/file.txt", base.HostFiles[0].Src)
3061+
}
3062+
3063+
func TestResolveBaseHostFiles_SkipsAbsolutePaths(t *testing.T) {
3064+
base := &Harness{
3065+
HostFiles: []HostFile{
3066+
{Src: "/absolute/path/file.txt", Dest: "/sandbox/file.txt"},
3067+
},
3068+
}
3069+
deps, err := resolveBaseHostFiles(context.Background(), base, "https://example.com/harness/triage.yaml#sha256=abc", nil, ComposeOpts{})
3070+
require.NoError(t, err)
3071+
assert.Empty(t, deps)
3072+
assert.Equal(t, "/absolute/path/file.txt", base.HostFiles[0].Src)
3073+
}
3074+
3075+
func TestResolveBaseHostFiles_SkipsEmptySrc(t *testing.T) {
3076+
base := &Harness{
3077+
HostFiles: []HostFile{
3078+
{Src: "", Dest: "/sandbox/file.txt"},
3079+
},
3080+
}
3081+
deps, err := resolveBaseHostFiles(context.Background(), base, "https://example.com/harness/triage.yaml#sha256=abc", nil, ComposeOpts{})
3082+
require.NoError(t, err)
3083+
assert.Empty(t, deps)
3084+
}
3085+
3086+
func TestResolveBaseHostFiles_RejectsPathTraversal(t *testing.T) {
3087+
base := &Harness{
3088+
HostFiles: []HostFile{
3089+
{Src: "../../etc/passwd", Dest: "/sandbox/passwd"},
3090+
},
3091+
}
3092+
_, err := resolveBaseHostFiles(context.Background(), base, "https://example.com/harness/triage.yaml#sha256=abc", nil, ComposeOpts{})
3093+
require.Error(t, err)
3094+
assert.Contains(t, err.Error(), "must not contain path traversal")
3095+
assert.Contains(t, err.Error(), "host_files[0].src")
3096+
}
3097+
3098+
func TestResolveBaseHostFiles_RejectsNullBytes(t *testing.T) {
3099+
base := &Harness{
3100+
HostFiles: []HostFile{
3101+
{Src: "env/test\x00.env", Dest: "/sandbox/.env"},
3102+
},
3103+
}
3104+
_, err := resolveBaseHostFiles(context.Background(), base, "https://example.com/harness/triage.yaml#sha256=abc", nil, ComposeOpts{})
3105+
require.Error(t, err)
3106+
assert.Contains(t, err.Error(), "must not contain null bytes")
3107+
assert.Contains(t, err.Error(), "host_files[0].src")
3108+
}
3109+
3110+
func TestResolveBaseHostFiles_InvalidBaseURL(t *testing.T) {
3111+
base := &Harness{
3112+
HostFiles: []HostFile{
3113+
{Src: "env/test.env", Dest: "/sandbox/.env"},
3114+
},
3115+
}
3116+
_, err := resolveBaseHostFiles(context.Background(), base, "", nil, ComposeOpts{})
3117+
require.Error(t, err)
3118+
assert.Contains(t, err.Error(), "cannot determine directory")
3119+
}
3120+
3121+
func TestResolveBaseHostFiles_EmptyHostFiles(t *testing.T) {
3122+
base := &Harness{}
3123+
deps, err := resolveBaseHostFiles(context.Background(), base, "https://example.com/harness/triage.yaml#sha256=abc", nil, ComposeOpts{})
3124+
require.NoError(t, err)
3125+
assert.Empty(t, deps)
3126+
}

0 commit comments

Comments
 (0)