Skip to content

Commit ab5b93c

Browse files
thisisaaronlandthisisaaronland
andauthored
Bug fix: Refactor multi:// scheme to use correct URI template (#4)
* snapshot: block out code to support multiple resolver+template findingaids * refactor the code to handle multi:// URIs * Increaded (debug) logging --------- Co-authored-by: thisisaaronland <thisisaaronland@localhost>
1 parent b7f3429 commit ab5b93c

File tree

2 files changed

+193
-89
lines changed

2 files changed

+193
-89
lines changed

reader.go

Lines changed: 159 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@ package findingaid
22

33
import (
44
"context"
5-
"database/sql"
65
"fmt"
76
"io"
8-
_ "log"
7+
"log/slog"
98
"net/url"
109
"strings"
1110

@@ -18,15 +17,18 @@ import (
1817
// WHOSONFIRST_DATA_TEMPLATE is a URL template for the root `data` directory in Who's On First data repositories.
1918
const WHOSONFIRST_DATA_TEMPLATE string = "https://raw.githubusercontent.com/whosonfirst-data/{repo}/master/data/"
2019

20+
// findingaid is a struct defining a resolver.Resolver and *uritemplates.UriTemplate pair
21+
type findingaid struct {
22+
// A resolver.Resolver instance used to derive the Who's On First repository name for an ID.
23+
resolver resolver.Resolver
24+
// A compiled `uritemplates.UriTemplate` to use resolving Who's On First finding aid URIs.
25+
template *uritemplates.UriTemplate
26+
}
27+
2128
// type FindingAidReader implements the `whosonfirst/go-reader` interface for use with Who's On First finding aids.
2229
type FindingAidReader struct {
2330
wof_reader.Reader
24-
// A SQLite `sql.DB` instance containing Who's On First finding aid data. (Optional)
25-
db *sql.DB
26-
// A compiled `uritemplates.UriTemplate` to use resolving Who's On First finding aid URIs.
27-
template *uritemplates.UriTemplate
28-
// A resolver.Resolver instance used to derive the Who's On First repository name for an ID.
29-
resolver resolver.Resolver
31+
findingaids []*findingaid
3032
}
3133

3234
func init() {
@@ -46,82 +48,65 @@ func NewFindingAidReader(ctx context.Context, uri string) (wof_reader.Reader, er
4648

4749
q := u.Query()
4850

49-
uri_template := WHOSONFIRST_DATA_TEMPLATE
50-
51-
if q.Get("template") != "" {
52-
uri_template = q.Get("template")
53-
}
51+
findingaids := make([]*findingaid, 0)
5452

55-
uri_template, err = url.QueryUnescape(uri_template)
56-
57-
if err != nil {
58-
return nil, fmt.Errorf("Failed to unescape ?template= parameter, %w", err)
59-
}
53+
logger := slog.Default()
54+
logger = logger.With("scheme", u.Host)
55+
56+
switch u.Host {
57+
case "multi":
6058

61-
t, err := uritemplates.Parse(uri_template)
59+
for _, rt_uri := range q["resolver"] {
6260

63-
if err != nil {
64-
return nil, fmt.Errorf("Failed to parse URI template, %w", err)
65-
}
61+
rt_u, err := url.Parse(rt_uri)
6662

67-
q.Del("template")
68-
u.RawQuery = q.Encode()
69-
70-
// findingaid://sqlite?dsn={DSN}
71-
// findingaid://awsdynamo/{TABLENAME}
72-
// findingaid://http(s)/{HOST}/{PATH}
63+
if err != nil {
64+
return nil, fmt.Errorf("Failed to parse resolver URI, %w", err)
65+
}
7366

74-
// Set up resolver
67+
fa_u := url.URL{}
68+
fa_u.Scheme = "findingaid"
69+
fa_u.Host = rt_u.Scheme
70+
fa_u.Path = rt_u.Host + rt_u.Path
71+
fa_u.RawQuery = rt_u.RawQuery
7572

76-
var ru *url.URL
73+
fa_uri := fa_u.String()
7774

78-
switch u.Host {
79-
case "http", "https":
75+
r, t, err := deriveResolverAndTemplate(ctx, fa_uri)
8076

81-
path := u.Path
82-
path = strings.TrimLeft(path, "/")
77+
if err != nil {
78+
return nil, fmt.Errorf("Failed to derive resolver and template from ?resolver= URI, %w", err)
79+
}
8380

84-
parts := strings.Split(path, "/")
81+
fa := &findingaid{
82+
resolver: r,
83+
template: t,
84+
}
8585

86-
ru = &url.URL{}
87-
ru.Scheme = u.Host
88-
ru.Host = parts[0]
89-
90-
if len(parts) > 1 {
91-
path = strings.Join(parts[1:], "/")
92-
ru.Path = fmt.Sprintf("/%s", path)
86+
logger.Debug("Add findingaid reader", "uri", fa_uri)
87+
findingaids = append(findingaids, fa)
9388
}
9489

95-
ru.RawQuery = u.RawQuery
96-
97-
case "multi":
98-
99-
ru = &url.URL{}
100-
ru.Scheme = u.Host
101-
ru.RawQuery = u.RawQuery
102-
10390
default:
10491

105-
path := u.Path
106-
path = strings.TrimLeft(path, "/")
107-
108-
ru = &url.URL{}
109-
ru.Scheme = u.Host
110-
ru.Host = path
111-
ru.RawQuery = u.RawQuery
112-
}
92+
r, t, err := deriveResolverAndTemplate(ctx, uri)
11393

114-
r_uri := ru.String()
115-
116-
f, err := resolver.NewResolver(ctx, r_uri)
94+
if err != nil {
95+
return nil, err
96+
}
11797

118-
if err != nil {
119-
return nil, fmt.Errorf("Failed to create resolver, %w", err)
98+
logger.Debug("Add findingaid reader", "uri", uri)
99+
100+
findingaids = []*findingaid{
101+
&findingaid{
102+
resolver: r,
103+
template: t,
104+
},
105+
}
120106
}
121107

122108
r := &FindingAidReader{
123-
resolver: f,
124-
template: t,
109+
findingaids: findingaids,
125110
}
126111

127112
return r, nil
@@ -186,33 +171,133 @@ func (r *FindingAidReader) getReaderURIAndPath(ctx context.Context, uri string)
186171

187172
// TBD: cache this?
188173

174+
logger := slog.Default()
175+
logger = logger.With("uri", uri)
176+
189177
id, uri_args, err := wof_uri.ParseURI(uri)
190178

191179
if err != nil {
180+
logger.Error("Failed to parse URI", "error", err)
192181
return "", "", fmt.Errorf("Failed to parse URI, %w", err)
193182
}
194183

195-
repo, err := r.resolver.GetRepo(ctx, id)
184+
rel_path, err := wof_uri.Id2RelPath(id, uri_args)
196185

197186
if err != nil {
198-
return "", "", fmt.Errorf("Failed to derive repo, %w", err)
187+
logger.Error("Failed to derive relative path for ID", "id", id, "error", err)
188+
return "", "", fmt.Errorf("Failed to derive path, %w", err)
199189
}
200190

201-
rel_path, err := wof_uri.Id2RelPath(id, uri_args)
191+
for idx, fa := range r.findingaids {
192+
193+
logger.Debug("Get repo", "findingaid", idx, "id", id)
194+
repo, err := fa.resolver.GetRepo(ctx, id)
195+
196+
if err != nil {
197+
198+
if err == resolver.ErrNotFound {
199+
logger.Debug("Failed to derive repo with resolver, not found", "id", id)
200+
continue
201+
}
202+
203+
logger.Error("Failed to derive repo with resolver", "id", id, "error", err)
204+
return "", "", fmt.Errorf("Failed to derive repo, %w", err)
205+
}
206+
207+
values := map[string]interface{}{
208+
"repo": repo,
209+
}
210+
211+
reader_uri, err := fa.template.Expand(values)
212+
213+
if err != nil {
214+
logger.Error("Failed to expand template for resolver", "repo", repo, "template", fa.template, "error", err)
215+
return "", "", fmt.Errorf("Failed to derive reader URI, %w", err)
216+
}
217+
218+
logger.Debug("Return reader URI for resolver", "reader_uri", reader_uri, "rel_path", rel_path)
219+
return reader_uri, rel_path, nil
220+
}
221+
222+
return "", "", fmt.Errorf("Failed to derive repo, no findingaid matches")
223+
}
224+
225+
func deriveResolverAndTemplate(ctx context.Context, uri string) (resolver.Resolver, *uritemplates.UriTemplate, error) {
226+
227+
u, err := url.Parse(uri)
202228

203229
if err != nil {
204-
return "", "", fmt.Errorf("Failed to derive path, %w", err)
230+
return nil, nil, err
205231
}
206232

207-
values := map[string]interface{}{
208-
"repo": repo,
233+
q := u.Query()
234+
235+
uri_template := WHOSONFIRST_DATA_TEMPLATE
236+
237+
if q.Get("template") != "" {
238+
uri_template = q.Get("template")
239+
}
240+
241+
uri_template, err = url.QueryUnescape(uri_template)
242+
243+
if err != nil {
244+
return nil, nil, fmt.Errorf("Failed to unescape ?template= parameter, %w", err)
245+
}
246+
247+
t, err := uritemplates.Parse(uri_template)
248+
249+
if err != nil {
250+
return nil, nil, fmt.Errorf("Failed to parse URI template, %w", err)
209251
}
210252

211-
reader_uri, err := r.template.Expand(values)
253+
q.Del("template")
254+
u.RawQuery = q.Encode()
255+
256+
// findingaid://sqlite?dsn={DSN}
257+
// findingaid://awsdynamo/{TABLENAME}
258+
// findingaid://http(s)/{HOST}/{PATH}
259+
260+
// Set up resolver
261+
262+
var ru *url.URL
263+
264+
switch u.Host {
265+
case "http", "https":
266+
267+
path := u.Path
268+
path = strings.TrimLeft(path, "/")
269+
270+
parts := strings.Split(path, "/")
271+
272+
ru = &url.URL{}
273+
ru.Scheme = u.Host
274+
ru.Host = parts[0]
275+
276+
if len(parts) > 1 {
277+
path = strings.Join(parts[1:], "/")
278+
ru.Path = fmt.Sprintf("/%s", path)
279+
}
280+
281+
ru.RawQuery = u.RawQuery
282+
283+
default:
284+
285+
path := u.Path
286+
path = strings.TrimLeft(path, "/")
287+
288+
ru = &url.URL{}
289+
ru.Scheme = u.Host
290+
ru.Host = path
291+
ru.RawQuery = u.RawQuery
292+
}
293+
294+
r_uri := ru.String()
295+
296+
r, err := resolver.NewResolver(ctx, r_uri)
212297

213298
if err != nil {
214-
return "", "", fmt.Errorf("Failed to derive reader URI, %w", err)
299+
return nil, nil, fmt.Errorf("Failed to create resolver, %w", err)
215300
}
216301

217-
return reader_uri, rel_path, nil
302+
return r, t, nil
218303
}

reader_test.go

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@ package findingaid
33
import (
44
"context"
55
"fmt"
6+
"log/slog"
7+
"net/url"
68
"os"
79
"testing"
810

911
"github.com/whosonfirst/go-reader/v2"
1012
)
1113

12-
//
13-
1414
func TestSQLiteFindingAid(t *testing.T) {
1515

1616
ctx := context.Background()
@@ -87,33 +87,52 @@ func TestHTTPFindingAid(t *testing.T) {
8787

8888
func TestMultiFindingAid(t *testing.T) {
8989

90+
slog.SetLogLoggerLevel(slog.LevelDebug)
91+
slog.Debug("Verbose logging enabled")
92+
9093
ctx := context.Background()
9194

92-
reader_uri := "findingaid://multi?resolver=https%3A%2F%2Fstatic.sfomuseum.org%2Ffindingaid%3Ftemplate%3Dhttps%3A%2F%2Fraw.githubusercontent.com%2Fsfomuseum-data%2F%7Brepo%7D%2Fmain%2Fdata%2F&resolver=https%3A%2F%2Fdata.whosonfirst.org%2Ffindingaid%3Ftemplate%3Dhttps%3A%2F%2Fraw.githubusercontent.com%2Fwhosonfirst-data%2F%7Brepo%7D%2Fmaster%2Fdata%2F"
95+
reader_q := url.Values{}
96+
reader_q.Add("resolver", "https://static.sfomuseum.org/findingaid?template=https://raw.githubusercontent.com/sfomuseum-data/{repo}/main/data/")
97+
reader_q.Add("resolver", "https://data.whosonfirst.org/findingaid?template=https://raw.githubusercontent.com/whosonfirst-data/{repo}/master/data/")
98+
99+
reader_u := url.URL{}
100+
reader_u.Scheme = "findingaid"
101+
reader_u.Host = "multi"
102+
reader_u.RawQuery = reader_q.Encode()
103+
104+
reader_uri := reader_u.String()
105+
slog.Debug("Create reader", "uri", reader_uri)
93106

94107
r, err := reader.NewReader(ctx, reader_uri)
95108

96109
if err != nil {
97110
t.Fatalf("Failed to create new reader, %v", err)
98111
}
99112

100-
uri := "85865975"
113+
tests := []string{
114+
"85865975",
115+
"1159396133",
116+
}
101117

102-
fh, err := r.Read(ctx, uri)
118+
for _, uri := range tests {
103119

104-
if err != nil {
105-
t.Fatalf("Failed to read %s, %v", uri, err)
106-
}
120+
fh, err := r.Read(ctx, uri)
107121

108-
fh.Close()
122+
if err != nil {
123+
t.Fatalf("Failed to read %s, %v", uri, err)
124+
}
109125

110-
exists, err := r.Exists(ctx, uri)
126+
fh.Close()
111127

112-
if err != nil {
113-
t.Fatalf("Failed to determine if %s exists, %v", uri, err)
114-
}
128+
exists, err := r.Exists(ctx, uri)
115129

116-
if !exists {
117-
t.Fatalf("Expected %s to exists", uri)
130+
if err != nil {
131+
t.Fatalf("Failed to determine if %s exists, %v", uri, err)
132+
}
133+
134+
if !exists {
135+
t.Fatalf("Expected %s to exists", uri)
136+
}
118137
}
119138
}

0 commit comments

Comments
 (0)