From a5f03ac4ac245fe26a316c039d1a0fcfbb3c1aec Mon Sep 17 00:00:00 2001 From: Eddie Knight Date: Wed, 29 Apr 2026 11:34:07 -0500 Subject: [PATCH] fix: local file:// prefix is no longer mandatory Signed-off-by: Eddie Knight --- README.md | 2 +- fetcher/uri.go | 35 ++++++++++++------ fetcher/uri_test.go | 37 ++++++++++++++++++++ gemaraconv/markdown/lexicon_load.go | 8 ++--- gemaraconv/markdown/lexicon_load_test.go | 2 +- gemaraconv/markdown/lexicon_testdata_test.go | 5 --- gemaraconv/markdown/render_test.go | 2 +- gemaraconv/markdown_test.go | 10 +++--- 8 files changed, 73 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index a56924c..0a612ee 100644 --- a/README.md +++ b/README.md @@ -189,7 +189,7 @@ func main() { // ... populate evaluation log ... } - sarifBytes, err := gemaraconv.EvaluationLog(evaluationLog).ToSARIF("file:///path/to/artifact.md", catalog) + sarifBytes, err := gemaraconv.EvaluationLog(evaluationLog).ToSARIF("path/to/artifact.md", catalog) if err != nil { panic(err) } diff --git a/fetcher/uri.go b/fetcher/uri.go index 57b3910..6dbdf08 100644 --- a/fetcher/uri.go +++ b/fetcher/uri.go @@ -8,10 +8,18 @@ import ( "io" "net/http" "net/url" + "regexp" + "strings" ) -// URI routes to File or HTTP based on the URI scheme. -// Supported schemes: file://, http://, https://. +// URI routes to File or HTTP based on the source string. +// +// Recognized forms: +// - http:// or https:// URLs are fetched via [HTTP]. +// - file:// URIs are fetched via [File]. +// - Any other input without a scheme (absolute or relative local paths, +// including Windows drive paths) is treated as a local file path. +// - Inputs with any other :// prefix return an unsupported-scheme error. // // For HTTP(S) sources it delegates to [HTTP]; see that type's // documentation for security considerations. @@ -19,17 +27,22 @@ type URI struct { Client *http.Client } +// schemePrefix matches a leading "://" per RFC 3986 scheme syntax. +var schemePrefix = regexp.MustCompile(`^[a-zA-Z][a-zA-Z0-9+.\-]*://`) + func (u *URI) Fetch(ctx context.Context, source string) (io.ReadCloser, error) { - parsed, err := url.Parse(source) - if err != nil { - return nil, fmt.Errorf("invalid URI %q: %w", source, err) - } - switch parsed.Scheme { - case "file": - return (&File{}).Fetch(ctx, parsed.Path) - case "http", "https": + switch { + case strings.HasPrefix(source, "http://"), strings.HasPrefix(source, "https://"): return (&HTTP{Client: u.Client}).Fetch(ctx, source) - default: + case strings.HasPrefix(source, "file://"): + parsed, err := url.Parse(source) + if err != nil { + return nil, fmt.Errorf("invalid file URI %q: %w", source, err) + } + return (&File{}).Fetch(ctx, parsed.Path) + case schemePrefix.MatchString(source): return nil, fmt.Errorf("unsupported URI scheme in %q", source) + default: + return (&File{}).Fetch(ctx, source) } } diff --git a/fetcher/uri_test.go b/fetcher/uri_test.go index 90cdb5c..ffcbce7 100644 --- a/fetcher/uri_test.go +++ b/fetcher/uri_test.go @@ -52,3 +52,40 @@ func TestURI_UnsupportedScheme(t *testing.T) { require.Error(t, err) assert.Contains(t, err.Error(), "unsupported URI scheme") } + +func TestURI_BarePath_Absolute(t *testing.T) { + tmp := t.TempDir() + p := filepath.Join(tmp, "data.yaml") + require.NoError(t, os.WriteFile(p, []byte("ok: true\n"), 0600)) + + f := &URI{} + rc, err := f.Fetch(context.Background(), p) + require.NoError(t, err) + defer rc.Close() //nolint:errcheck + + data, err := io.ReadAll(rc) + require.NoError(t, err) + assert.Equal(t, "ok: true\n", string(data)) +} + +func TestURI_BarePath_Relative(t *testing.T) { + tmp := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmp, "data.yaml"), []byte("ok: true\n"), 0600)) + t.Chdir(tmp) + + f := &URI{} + rc, err := f.Fetch(context.Background(), "./data.yaml") + require.NoError(t, err) + defer rc.Close() //nolint:errcheck + + data, err := io.ReadAll(rc) + require.NoError(t, err) + assert.Equal(t, "ok: true\n", string(data)) +} + +func TestURI_TypoScheme(t *testing.T) { + f := &URI{} + _, err := f.Fetch(context.Background(), "htps://example.com/file.yaml") + require.Error(t, err) + assert.Contains(t, err.Error(), "unsupported URI scheme") +} diff --git a/gemaraconv/markdown/lexicon_load.go b/gemaraconv/markdown/lexicon_load.go index 9ba478c..26c4f34 100644 --- a/gemaraconv/markdown/lexicon_load.go +++ b/gemaraconv/markdown/lexicon_load.go @@ -10,9 +10,9 @@ import ( "github.com/gemaraproj/go-gemara/internal/codec" ) -// resolveLexiconURL returns the https:// or file:// URI for the lexicon artifact. +// resolveLexiconURL returns the source string (URL or local path) for the lexicon artifact. // Precedence: metadata.mapping-references entry whose id matches metadata.lexicon.reference-id; -// else metadata.lexicon.remarks if it is a fetchable URL. +// else metadata.lexicon.remarks if it is a fetchable URL (must use http://, https://, or file://). func resolveLexiconURL(meta gemara.Metadata) (string, error) { if meta.Lexicon == nil { return "", fmt.Errorf("lexicon mapping is nil") @@ -37,8 +37,8 @@ func resolveLexiconURL(meta gemara.Metadata) (string, error) { return "", fmt.Errorf("no mapping-references entry with id %q for metadata.lexicon", refID) } -// loadLexiconFromURI fetches a Lexicon from a file:// or http(s):// URI -// and returns normalized entries. +// loadLexiconFromURI fetches a Lexicon from an http(s):// URL, a file:// URI, +// or a local file path, and returns normalized entries. func loadLexiconFromURI(ctx context.Context, uri string) ([]lexiconEntry, error) { doc, err := gemara.Load[gemara.Lexicon](ctx, &fetcher.URI{}, uri) if err != nil { diff --git a/gemaraconv/markdown/lexicon_load_test.go b/gemaraconv/markdown/lexicon_load_test.go index 993e780..af12166 100644 --- a/gemaraconv/markdown/lexicon_load_test.go +++ b/gemaraconv/markdown/lexicon_load_test.go @@ -39,7 +39,7 @@ func TestParseLexiconYAML_rejects(t *testing.T) { } func TestLoadLexiconFromURI_file(t *testing.T) { - entries, err := loadLexiconFromURI(context.Background(), lexiconFileURL(t, "lexicon_good.yaml")) + entries, err := loadLexiconFromURI(context.Background(), lexiconTestdataAbsPath(t, "lexicon_good.yaml")) require.NoError(t, err) require.Len(t, entries, 2) } diff --git a/gemaraconv/markdown/lexicon_testdata_test.go b/gemaraconv/markdown/lexicon_testdata_test.go index 7a7ff3c..703a4ca 100644 --- a/gemaraconv/markdown/lexicon_testdata_test.go +++ b/gemaraconv/markdown/lexicon_testdata_test.go @@ -22,8 +22,3 @@ func readLexiconTestdata(t *testing.T, name string) []byte { require.NoError(t, err) return fileBytes } - -func lexiconFileURL(t *testing.T, name string) string { - t.Helper() - return "file://" + filepath.ToSlash(lexiconTestdataAbsPath(t, name)) -} diff --git a/gemaraconv/markdown/render_test.go b/gemaraconv/markdown/render_test.go index 7d2fd37..5cd02ad 100644 --- a/gemaraconv/markdown/render_test.go +++ b/gemaraconv/markdown/render_test.go @@ -269,7 +269,7 @@ func TestCatalogToMarkdown_lexiconAutolinkFromFile(t *testing.T) { Id: "m", Type: gemara.ControlCatalogArtifact, Description: "d", Author: gemara.Actor{Name: "a", Type: gemara.Human}, Lexicon: &gemara.ArtifactMapping{ReferenceId: "lex"}, MappingReferences: []gemara.MappingReference{ - {Id: "lex", Title: "L", Version: "1", Url: lexiconFileURL(t, "lexicon_good.yaml")}, + {Id: "lex", Title: "L", Version: "1", Url: lexiconTestdataAbsPath(t, "lexicon_good.yaml")}, }, }, Title: "Lex", diff --git a/gemaraconv/markdown_test.go b/gemaraconv/markdown_test.go index aad4da3..a9495eb 100644 --- a/gemaraconv/markdown_test.go +++ b/gemaraconv/markdown_test.go @@ -15,12 +15,12 @@ import ( "github.com/stretchr/testify/require" ) -// testDataFileURL returns a file:// URI to ../test-data/ resolved to an absolute path. -func testDataFileURL(t *testing.T, name string) string { +// testDataFilePath returns the absolute path to ../test-data/. +func testDataFilePath(t *testing.T, name string) string { t.Helper() abs, err := filepath.Abs(filepath.Join("..", "test-data", name)) require.NoError(t, err) - return "file://" + filepath.ToSlash(abs) + return abs } func loadControlCatalogFromTestData(t *testing.T, name string) *gemara.ControlCatalog { @@ -345,7 +345,7 @@ func TestCatalogToMarkdown_lexiconAutolink(t *testing.T) { GemaraVersion: "1.0", Lexicon: &gemara.ArtifactMapping{ReferenceId: "lex"}, MappingReferences: []gemara.MappingReference{ - {Id: "lex", Title: "Lex", Version: "1", Url: testDataFileURL(t, "lexicon_good.yaml")}, + {Id: "lex", Title: "Lex", Version: "1", Url: testDataFilePath(t, "lexicon_good.yaml")}, }, }, Title: "Lex test", @@ -389,7 +389,7 @@ func TestCatalogToMarkdown_lexiconAutolink_offByDefault(t *testing.T) { Author: gemara.Actor{Name: "a", Type: gemara.Human}, Lexicon: &gemara.ArtifactMapping{ReferenceId: "lex"}, MappingReferences: []gemara.MappingReference{ - {Id: "lex", Title: "L", Version: "1", Url: testDataFileURL(t, "lexicon_good.yaml")}, + {Id: "lex", Title: "L", Version: "1", Url: testDataFilePath(t, "lexicon_good.yaml")}, }, }, Title: "x",