Skip to content

Commit c024f89

Browse files
committed
refactor: use doiProvider interface
1 parent d0dcce3 commit c024f89

4 files changed

Lines changed: 160 additions & 96 deletions

File tree

backend/doi/dataverse.go

Lines changed: 35 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -35,48 +35,35 @@ func resolveDataverseEndpoint(resolvedURL *url.URL) (provider Provider, endpoint
3535
return Dataverse, endpointURL, nil
3636
}
3737

38-
// Implements Fs.List() for Dataverse installations
39-
func (f *Fs) listDataverse(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
40-
fileEntries, err := f.listDataverseDoiFiles(ctx)
41-
if err != nil {
42-
return nil, fmt.Errorf("error listing %q: %w", dir, err)
43-
}
38+
// dataverseProvider implements the doiProvider interface for Dataverse installations
39+
type dataverseProvider struct {
40+
f *Fs
41+
}
42+
43+
// CanHaveSubDirs is true when the remote can have subdirectories
44+
func (dp *dataverseProvider) CanHaveSubDirs() bool {
45+
return true
46+
}
4447

45-
fullDir := path.Join(f.root, dir)
46-
if fullDir != "" {
47-
fullDir += "/"
48+
// IsFile returns true if remote is a file
49+
func (dp *dataverseProvider) IsFile(ctx context.Context, remote string) (isFile bool, err error) {
50+
entries, err := dp.ListEntries(ctx)
51+
if err != nil {
52+
return false, err
4853
}
49-
dirPaths := map[string]bool{}
50-
for _, entry := range fileEntries {
51-
// First, filter out files not in `fullDir`
52-
if !strings.HasPrefix(entry.remote, fullDir) {
53-
continue
54-
}
55-
// Then, find entries in subfolers
56-
remotePath := entry.remote
57-
if fullDir != "" {
58-
remotePath = strings.TrimLeft(strings.TrimPrefix(remotePath, fullDir), "/")
59-
}
60-
parts := strings.SplitN(remotePath, "/", 2)
61-
if len(parts) == 1 {
62-
newEntry := *entry
63-
newEntry.remote = path.Join(dir, remotePath)
64-
entries = append(entries, &newEntry)
65-
} else {
66-
dirPaths[path.Join(dir, parts[0])] = true
54+
for _, entry := range entries {
55+
if entry.remote == remote {
56+
isFile = true
57+
break
6758
}
6859
}
69-
for dirPath := range dirPaths {
70-
entry := fs.NewDir(dirPath, time.Time{})
71-
entries = append(entries, entry)
72-
}
73-
return entries, nil
60+
return isFile, nil
7461
}
7562

76-
// List the files contained in the DOI
77-
func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err error) {
63+
// ListEntries returns the full list of entries found at the remote, regardless of root
64+
func (dp *dataverseProvider) ListEntries(ctx context.Context) (entries []*Object, err error) {
7865
// Use the cache if populated
79-
cachedEntries, found := f.cache.GetMaybe("files")
66+
cachedEntries, found := dp.f.cache.GetMaybe("files")
8067
if found {
8168
parsedEntries, ok := cachedEntries.([]Object)
8269
if ok {
@@ -88,33 +75,33 @@ func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err
8875
}
8976
}
9077

91-
filesURL := f.endpoint
78+
filesURL := dp.f.endpoint
9279
var res *http.Response
9380
var result api.DataverseDatasetResponse
9481
opts := rest.Opts{
9582
Method: "GET",
9683
Path: strings.TrimLeft(filesURL.EscapedPath(), "/"),
9784
Parameters: filesURL.Query(),
9885
}
99-
err = f.pacer.Call(func() (bool, error) {
100-
res, err = f.srv.CallJSON(ctx, &opts, nil, &result)
86+
err = dp.f.pacer.Call(func() (bool, error) {
87+
res, err = dp.f.srv.CallJSON(ctx, &opts, nil, &result)
10188
return shouldRetry(ctx, res, err)
10289
})
10390
if err != nil {
10491
return nil, fmt.Errorf("readDir failed: %w", err)
10592
}
10693
modTime, modTimeErr := time.Parse(time.RFC3339, result.Data.LatestVersion.LastUpdateTime)
10794
if modTimeErr != nil {
108-
fs.Logf(f, "error: could not parse last update time %v", modTimeErr)
95+
fs.Logf(dp.f, "error: could not parse last update time %v", modTimeErr)
10996
modTime = timeUnset
11097
}
11198
for _, file := range result.Data.LatestVersion.Files {
11299
contentURLPath := fmt.Sprintf("/api/access/datafile/%d", file.DataFile.ID)
113100
query := url.Values{}
114101
query.Add("format", "original")
115-
contentURL := f.endpoint.ResolveReference(&url.URL{Path: contentURLPath, RawQuery: query.Encode()})
102+
contentURL := dp.f.endpoint.ResolveReference(&url.URL{Path: contentURLPath, RawQuery: query.Encode()})
116103
entry := &Object{
117-
fs: f,
104+
fs: dp.f,
118105
remote: path.Join(file.DirectoryLabel, file.DataFile.Filename),
119106
contentURL: contentURL.String(),
120107
size: file.DataFile.FileSize,
@@ -134,6 +121,12 @@ func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err
134121
for _, entry := range entries {
135122
cacheEntries = append(cacheEntries, *entry)
136123
}
137-
f.cache.Put("files", cacheEntries)
124+
dp.f.cache.Put("files", cacheEntries)
138125
return entries, nil
139126
}
127+
128+
func newDataverseProvider(f *Fs) doiProvider {
129+
return &dataverseProvider{
130+
f: f,
131+
}
132+
}

backend/doi/doi.go

Lines changed: 72 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ type Fs struct {
111111
name string // name of this remote
112112
root string // the path we are working on
113113
provider Provider // the DOI provider
114+
doiProvider doiProvider // the interface used to interact with the DOI provider
114115
features *fs.Features // optional features
115116
opt Options // options for this backend
116117
ci *fs.ConfigInfo // global config
@@ -132,6 +133,16 @@ type Object struct {
132133
md5 string // MD5 hash of the object content
133134
}
134135

136+
// doiProvider is the interface used to list objects in a DOI
137+
type doiProvider interface {
138+
// CanHaveSubDirs is true when the remote can have subdirectories
139+
CanHaveSubDirs() bool
140+
// IsFile returns true if remote is a file
141+
IsFile(ctx context.Context, remote string) (isFile bool, err error)
142+
// ListEntries returns the full list of entries found at the remote, regardless of root
143+
ListEntries(ctx context.Context) (entries []*Object, err error)
144+
}
145+
135146
// Parse the input string as a DOI
136147
// Examples:
137148
// 10.1000/182 -> 10.1000/182
@@ -240,24 +251,17 @@ func (f *Fs) httpConnection(ctx context.Context, opt *Options) (isFile bool, err
240251
f.provider = provider
241252
f.opt.Provider = string(provider)
242253

243-
// Determine if the root is a file
244254
switch f.provider {
245255
case Dataverse:
246-
entries, err := f.listDataverseDoiFiles(ctx)
247-
if err != nil {
248-
return false, err
249-
}
250-
for _, entry := range entries {
251-
if entry.remote == f.root {
252-
isFile = true
253-
break
254-
}
255-
}
256+
f.doiProvider = newDataverseProvider(f)
256257
case Invenio, Zenodo:
257-
isFile = f.root != ""
258+
f.doiProvider = newInvenioProvider(f)
259+
default:
260+
return false, fmt.Errorf("provider type '%s' not supported", f.provider)
258261
}
259262

260-
return isFile, nil
263+
// Determine if the root is a file
264+
return f.doiProvider.IsFile(ctx, f.root)
261265
}
262266

263267
// retryErrorCodes is a slice of error codes that we will retry
@@ -270,8 +274,8 @@ var retryErrorCodes = []int{
270274
509, // Bandwidth Limit Exceeded
271275
}
272276

273-
// shouldRetry returns a boolean as to whether this resp and err
274-
// deserve to be retried. It returns the err as a convenience
277+
// shouldRetry returns a boolean as to whether this res and err
278+
// deserve to be retried. It returns the err as a convenience.
275279
func shouldRetry(ctx context.Context, res *http.Response, err error) (bool, error) {
276280
if fserrors.ContextError(ctx, &err) {
277281
return false, err
@@ -373,16 +377,7 @@ func (f *Fs) Rmdir(ctx context.Context, dir string) error {
373377

374378
// NewObject creates a new remote http file object
375379
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
376-
var entries []*Object
377-
var err error
378-
switch f.provider {
379-
case Dataverse:
380-
entries, err = f.listDataverseDoiFiles(ctx)
381-
case Invenio, Zenodo:
382-
entries, err = f.listInvevioDoiFiles(ctx)
383-
default:
384-
err = fmt.Errorf("provider type '%s' not supported", f.provider)
385-
}
380+
entries, err := f.doiProvider.ListEntries(ctx)
386381
if err != nil {
387382
return nil, err
388383
}
@@ -406,14 +401,59 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
406401
// This should return ErrDirNotFound if the directory isn't
407402
// found.
408403
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
409-
switch f.provider {
410-
case Dataverse:
411-
return f.listDataverse(ctx, dir)
412-
case Invenio, Zenodo:
413-
return f.listInvenio(ctx, dir)
414-
default:
415-
return nil, fmt.Errorf("provider type '%s' not supported", f.provider)
404+
if f.doiProvider.CanHaveSubDirs() {
405+
fileEntries, err := f.doiProvider.ListEntries(ctx)
406+
if err != nil {
407+
return nil, fmt.Errorf("error listing %q: %w", dir, err)
408+
}
409+
410+
fullDir := path.Join(f.root, dir)
411+
if fullDir != "" {
412+
fullDir += "/"
413+
}
414+
dirPaths := map[string]bool{}
415+
for _, entry := range fileEntries {
416+
// First, filter out files not in `fullDir`
417+
if !strings.HasPrefix(entry.remote, fullDir) {
418+
continue
419+
}
420+
// Then, find entries in subfolers
421+
remotePath := entry.remote
422+
if fullDir != "" {
423+
remotePath = strings.TrimLeft(strings.TrimPrefix(remotePath, fullDir), "/")
424+
}
425+
parts := strings.SplitN(remotePath, "/", 2)
426+
if len(parts) == 1 {
427+
newEntry := *entry
428+
newEntry.remote = path.Join(dir, remotePath)
429+
entries = append(entries, &newEntry)
430+
} else {
431+
dirPaths[path.Join(dir, parts[0])] = true
432+
}
433+
}
434+
for dirPath := range dirPaths {
435+
entry := fs.NewDir(dirPath, time.Time{})
436+
entries = append(entries, entry)
437+
}
438+
return entries, nil
416439
}
440+
441+
if !f.doiProvider.CanHaveSubDirs() {
442+
if dir != "" {
443+
return nil, fs.ErrorDirNotFound
444+
}
445+
446+
fileEntries, err := f.doiProvider.ListEntries(ctx)
447+
if err != nil {
448+
return nil, fmt.Errorf("error listing %q: %w", dir, err)
449+
}
450+
for _, entry := range fileEntries {
451+
entries = append(entries, entry)
452+
}
453+
return entries, nil
454+
}
455+
456+
return nil, fmt.Errorf("provider type '%s' not supported", f.provider)
417457
}
418458

419459
// Put in to the remote path with the modTime given of the given size

backend/doi/invenio.go

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -98,26 +98,25 @@ func checkInvenioAPIURL(ctx context.Context, srv *rest.Client, pacer *fs.Pacer,
9898
return url.Parse(result.Links.Self)
9999
}
100100

101-
// Implements Fs.List() for Invenio
102-
func (f *Fs) listInvenio(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
103-
if dir != "" {
104-
return nil, fs.ErrorDirNotFound
105-
}
101+
// invenioProvider implements the doiProvider interface for InvenioRDM installations
102+
type invenioProvider struct {
103+
f *Fs
104+
}
106105

107-
fileEntries, err := f.listInvevioDoiFiles(ctx)
108-
if err != nil {
109-
return nil, fmt.Errorf("error listing %q: %w", dir, err)
110-
}
111-
for _, entry := range fileEntries {
112-
entries = append(entries, entry)
113-
}
114-
return entries, nil
106+
// CanHaveSubDirs is true when the remote can have subdirectories
107+
func (ip *invenioProvider) CanHaveSubDirs() bool {
108+
return false
109+
}
110+
111+
// IsFile returns true if remote is a file
112+
func (ip *invenioProvider) IsFile(ctx context.Context, remote string) (isFile bool, err error) {
113+
return remote != "", nil
115114
}
116115

117-
// List the files contained in the DOI
118-
func (f *Fs) listInvevioDoiFiles(ctx context.Context) (entries []*Object, err error) {
116+
// ListEntries returns the full list of entries found at the remote, regardless of root
117+
func (ip *invenioProvider) ListEntries(ctx context.Context) (entries []*Object, err error) {
119118
// Use the cache if populated
120-
cachedEntries, found := f.cache.GetMaybe("files")
119+
cachedEntries, found := ip.f.cache.GetMaybe("files")
121120
if found {
122121
parsedEntries, ok := cachedEntries.([]Object)
123122
if ok {
@@ -129,14 +128,14 @@ func (f *Fs) listInvevioDoiFiles(ctx context.Context) (entries []*Object, err er
129128
}
130129
}
131130

132-
filesURL := f.endpoint.JoinPath("files")
131+
filesURL := ip.f.endpoint.JoinPath("files")
133132
var result api.InvenioFilesResponse
134133
opts := rest.Opts{
135134
Method: "GET",
136135
Path: strings.TrimLeft(filesURL.EscapedPath(), "/"),
137136
}
138-
err = f.pacer.Call(func() (bool, error) {
139-
res, err := f.srv.CallJSON(ctx, &opts, nil, &result)
137+
err = ip.f.pacer.Call(func() (bool, error) {
138+
res, err := ip.f.srv.CallJSON(ctx, &opts, nil, &result)
140139
return shouldRetry(ctx, res, err)
141140
})
142141
if err != nil {
@@ -145,11 +144,11 @@ func (f *Fs) listInvevioDoiFiles(ctx context.Context) (entries []*Object, err er
145144
for _, file := range result.Entries {
146145
modTime, modTimeErr := time.Parse(time.RFC3339, file.Updated)
147146
if modTimeErr != nil {
148-
fs.Logf(f, "error: could not parse last update time %v", modTimeErr)
147+
fs.Logf(ip.f, "error: could not parse last update time %v", modTimeErr)
149148
modTime = timeUnset
150149
}
151150
entry := &Object{
152-
fs: f,
151+
fs: ip.f,
153152
remote: file.Key,
154153
contentURL: file.Links.Content,
155154
size: file.Size,
@@ -164,6 +163,12 @@ func (f *Fs) listInvevioDoiFiles(ctx context.Context) (entries []*Object, err er
164163
for _, entry := range entries {
165164
cacheEntries = append(cacheEntries, *entry)
166165
}
167-
f.cache.Put("files", cacheEntries)
166+
ip.f.cache.Put("files", cacheEntries)
168167
return entries, nil
169168
}
169+
170+
func newInvenioProvider(f *Fs) doiProvider {
171+
return &invenioProvider{
172+
f: f,
173+
}
174+
}

docs/content/doi.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,32 @@ Properties:
105105
- "invenio"
106106
- Invenio
107107

108+
#### --doi-doi-resolver-api-url
109+
110+
The URL of the DOI resolver API to use.
111+
112+
The DOI resolver can be set for testing or for cases when the the canonical DOI resolver API cannot be used.
113+
114+
Defaults to "https://doi.org/api".
115+
116+
Properties:
117+
118+
- Config: doi_resolver_api_url
119+
- Env Var: RCLONE_DOI_DOI_RESOLVER_API_URL
120+
- Type: string
121+
- Required: false
122+
123+
#### --doi-description
124+
125+
Description of the remote.
126+
127+
Properties:
128+
129+
- Config: description
130+
- Env Var: RCLONE_DOI_DESCRIPTION
131+
- Type: string
132+
- Required: false
133+
108134
## Backend commands
109135

110136
Here are the commands specific to the doi backend.

0 commit comments

Comments
 (0)