Skip to content

Commit 0d6a5ee

Browse files
committed
fix(sync): panic on s3 URI with query string
Verified locally against MinIO. Signed-off-by: Todd Baert <todd.baert@dynatrace.com>
1 parent f5de45a commit 0d6a5ee

4 files changed

Lines changed: 126 additions & 15 deletions

File tree

core/pkg/sync/blob/blob_sync.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010

1111
"github.com/open-feature/flagd/core/pkg/logger"
1212
"github.com/open-feature/flagd/core/pkg/sync"
13+
"github.com/open-feature/flagd/core/pkg/sync/internal/bloburi"
1314
"github.com/open-feature/flagd/core/pkg/sync/internal/polling"
1415
"github.com/open-feature/flagd/core/pkg/utils"
1516
"gocloud.dev/blob"
@@ -100,7 +101,7 @@ func (hs *Sync) sync(ctx context.Context, dataSync chan<- sync.DataSync, skipChe
100101
if !skipCheckingModTime {
101102
hs.lastUpdated = updated
102103
}
103-
dataSync <- sync.DataSync{FlagData: msg, Source: hs.Bucket + hs.Object}
104+
dataSync <- sync.DataSync{FlagData: msg, Source: bloburi.Join(hs.Bucket, hs.Object)}
104105
return nil
105106
}
106107

core/pkg/sync/builder/syncbuilder.go

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"fmt"
55
"os"
66
"regexp"
7-
"strings"
87

98
"github.com/open-feature/flagd/core/pkg/logger"
109
"github.com/open-feature/flagd/core/pkg/sync"
@@ -13,6 +12,7 @@ import (
1312
"github.com/open-feature/flagd/core/pkg/sync/grpc"
1413
"github.com/open-feature/flagd/core/pkg/sync/grpc/credentials"
1514
httpSync "github.com/open-feature/flagd/core/pkg/sync/http"
15+
"github.com/open-feature/flagd/core/pkg/sync/internal/bloburi"
1616
"github.com/open-feature/flagd/core/pkg/sync/internal/polling"
1717
"github.com/open-feature/flagd/core/pkg/sync/kubernetes"
1818
"go.uber.org/zap"
@@ -229,8 +229,7 @@ func (sb *SyncBuilder) newGcs(config sync.SourceConfig, logger *logger.Logger) (
229229
// Extract bucket uri and object name from the full URI:
230230
// gs://bucket/path/to/object results in gs://bucket/ as bucketUri and
231231
// path/to/object as an object name.
232-
bucketURI := regGcs.FindString(config.URI)
233-
objectName := regGcs.ReplaceAllString(config.URI, "")
232+
bucketURI, objectName := bloburi.Split(config.URI, regGcs)
234233

235234
interval, poller, err := newPoller(config)
236235
if err != nil {
@@ -265,8 +264,7 @@ func (sb *SyncBuilder) newAzblob(config sync.SourceConfig, logger *logger.Logger
265264
// Extract bucket uri and object name from the full URI:
266265
// azblob://bucket/path/to/object results in azblob://bucket/ as bucketUri and
267266
// path/to/object as an object name.
268-
bucketURI := regAzblob.FindString(config.URI)
269-
objectName := regAzblob.ReplaceAllString(config.URI, "")
267+
bucketURI, objectName := bloburi.Split(config.URI, regAzblob)
270268

271269
interval, poller, err := newPoller(config)
272270
if err != nil {
@@ -291,15 +289,9 @@ func (sb *SyncBuilder) newAzblob(config sync.SourceConfig, logger *logger.Logger
291289
func (sb *SyncBuilder) newS3(config sync.SourceConfig, logger *logger.Logger) (*blobSync.Sync, error) {
292290
// Extract bucket uri and object name from the full URI:
293291
// s3://bucket/path/to/object results in s3://bucket/ as bucketUri and
294-
// path/to/object as an object name.
295-
rawURI, query, hasQuery := strings.Cut(config.URI, "?")
296-
bucketURI := regS3.FindString(rawURI)
297-
objectName := regS3.ReplaceAllString(rawURI, "")
298-
if hasQuery && query != "" {
299-
// s3blob reads use_path_style/region/etc. from the bucket URL query string;
300-
// the bucket host must not carry a trailing slash before "?".
301-
bucketURI = strings.TrimSuffix(bucketURI, "/") + "?" + query
302-
}
292+
// path/to/object as an object name. Any query string (e.g. use_path_style,
293+
// region) is moved onto the bucket URL so gocloud's s3blob driver reads it.
294+
bucketURI, objectName := bloburi.Split(config.URI, regS3)
303295

304296
interval, poller, err := newPoller(config)
305297
if err != nil {
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Package bloburi splits and rejoins blob sync URIs (s3://, gs://, azblob://).
2+
package bloburi
3+
4+
import (
5+
"regexp"
6+
"strings"
7+
)
8+
9+
// Split parses "scheme://bucket/key?opt=1" into bucket URL ("scheme://bucket?opt=1")
10+
// and object key ("key"). The query moves to the bucket URL because gocloud blob
11+
// drivers read driver options (e.g. s3blob use_path_style, region) from there.
12+
// schemeRegex must match through the first "/" after the scheme (e.g. "^s3://.+?/").
13+
func Split(uri string, schemeRegex *regexp.Regexp) (bucket, object string) {
14+
raw, query, hasQuery := strings.Cut(uri, "?")
15+
bucket = schemeRegex.FindString(raw)
16+
object = schemeRegex.ReplaceAllString(raw, "")
17+
if hasQuery && query != "" {
18+
bucket = strings.TrimSuffix(bucket, "/") + "?" + query
19+
}
20+
return bucket, object
21+
}
22+
23+
// Join is the inverse of Split. The reconstructed URI must match what was
24+
// registered in the store (see flagd#1971).
25+
func Join(bucket, object string) string {
26+
i := strings.Index(bucket, "?")
27+
if i < 0 {
28+
return bucket + object
29+
}
30+
base := strings.TrimSuffix(bucket[:i], "/") + "/"
31+
return base + object + bucket[i:]
32+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package bloburi
2+
3+
import (
4+
"regexp"
5+
"testing"
6+
)
7+
8+
var schemeRegexes = map[string]*regexp.Regexp{
9+
"s3": regexp.MustCompile("^s3://.+?/"),
10+
"gs": regexp.MustCompile("^gs://.+?/"),
11+
"azblob": regexp.MustCompile("^azblob://.+?/"),
12+
}
13+
14+
func TestSplit(t *testing.T) {
15+
tests := map[string]struct {
16+
uri string
17+
scheme string
18+
bucket string
19+
object string
20+
}{
21+
"s3 simple": {
22+
uri: "s3://my-bucket/flags.json",
23+
scheme: "s3",
24+
bucket: "s3://my-bucket/",
25+
object: "flags.json",
26+
},
27+
"s3 with query (use_path_style)": {
28+
uri: "s3://my-bucket/example_flags.json?use_path_style=true&region=garage&endpoint=http://127.0.0.1:3900",
29+
scheme: "s3",
30+
bucket: "s3://my-bucket?use_path_style=true&region=garage&endpoint=http://127.0.0.1:3900",
31+
object: "example_flags.json",
32+
},
33+
"gs simple": {
34+
uri: "gs://my-bucket/path/to/object",
35+
scheme: "gs",
36+
bucket: "gs://my-bucket/",
37+
object: "path/to/object",
38+
},
39+
"azblob simple": {
40+
uri: "azblob://my-bucket/flags.yaml",
41+
scheme: "azblob",
42+
bucket: "azblob://my-bucket/",
43+
object: "flags.yaml",
44+
},
45+
}
46+
for name, tt := range tests {
47+
t.Run(name, func(t *testing.T) {
48+
bucket, object := Split(tt.uri, schemeRegexes[tt.scheme])
49+
if bucket != tt.bucket {
50+
t.Errorf("Split bucket = %q, want %q", bucket, tt.bucket)
51+
}
52+
if object != tt.object {
53+
t.Errorf("Split object = %q, want %q", object, tt.object)
54+
}
55+
})
56+
}
57+
}
58+
59+
// TestSplitJoinInverse ensures that Join is the inverse of Split
60+
func TestSplitJoinInverse(t *testing.T) {
61+
uris := []string{
62+
"s3://b/o",
63+
"s3://b/path/to/object",
64+
"s3://b/o?use_path_style=true",
65+
"s3://b/o?use_path_style=true&region=garage&endpoint=http://127.0.0.1:3900",
66+
"s3://b/path/to/object?a=1&b=2",
67+
"gs://b/o",
68+
"gs://b/path/to/object",
69+
"azblob://b/o",
70+
"azblob://b/flags.yaml",
71+
}
72+
for _, uri := range uris {
73+
t.Run(uri, func(t *testing.T) {
74+
for _, reg := range schemeRegexes {
75+
if !reg.MatchString(uri) {
76+
continue
77+
}
78+
bucket, object := Split(uri, reg)
79+
if got := Join(bucket, object); got != uri {
80+
t.Errorf("Join(Split(%q)) = %q, want %q (bucket=%q object=%q)",
81+
uri, got, uri, bucket, object)
82+
}
83+
}
84+
})
85+
}
86+
}

0 commit comments

Comments
 (0)