Skip to content

Commit e6a36a4

Browse files
committed
Added validation
1 parent 7edff44 commit e6a36a4

File tree

2 files changed

+264
-47
lines changed

2 files changed

+264
-47
lines changed

destination/iceberg/config.go

Lines changed: 72 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -26,55 +26,51 @@ const (
2626
// TODO: add validation for each catalog properly
2727
type Config struct {
2828
// S3-compatible Storage Configuration
29-
Region string `json:"aws_region,omitempty"`
30-
AccessKey string `json:"aws_access_key,omitempty"`
31-
SecretKey string `json:"aws_secret_key,omitempty"`
32-
SessionToken string `json:"aws_session_token,omitempty"`
33-
ProfileName string `json:"aws_profile,omitempty"`
29+
Region string `json:"aws_region" validate:"required_if=CatalogType glue"`
30+
AccessKey string `json:"aws_access_key" validate:"required_with=SecretKey"`
31+
SecretKey string `json:"aws_secret_key" validate:"required_with=AccessKey"`
32+
SessionToken string `json:"aws_session_token" validate:"omitempty"`
33+
ProfileName string `json:"aws_profile" validate:"omitempty,excluded_with=AccessKey"`
3434
NoIdentifierFields bool `json:"no_identifier_fields"` // Needed to set true for Databricks Unity Catalog as it doesn't support identifier fields
3535

3636
// S3 endpoint for custom S3-compatible services (like MinIO)
37-
S3Endpoint string `json:"s3_endpoint,omitempty"`
38-
S3UseSSL bool `json:"s3_use_ssl,omitempty"` // Use HTTPS if true
39-
S3PathStyle bool `json:"s3_path_style,omitempty"` // Use path-style instead of virtual-hosted-style https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html
37+
S3Endpoint string `json:"s3_endpoint" validate:"omitempty,url"`
38+
S3UseSSL bool `json:"s3_use_ssl"` // Use HTTPS if true
39+
S3PathStyle bool `json:"s3_path_style"` // Use path-style instead of virtual-hosted-style https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html
4040

4141
// Catalog Configuration
42-
CatalogType CatalogType `json:"catalog_type,omitempty"`
43-
CatalogName string `json:"catalog_name,omitempty"`
42+
CatalogType CatalogType `json:"catalog_type" validate:"required,oneof=glue jdbc hive rest"`
43+
CatalogName string `json:"catalog_name" validate:"required"`
4444

4545
// JDBC specific configuration
46-
JDBCUrl string `json:"jdbc_url,omitempty"`
47-
JDBCUsername string `json:"jdbc_username,omitempty"`
48-
JDBCPassword string `json:"jdbc_password,omitempty"`
46+
JDBCUrl string `json:"jdbc_url" validate:"required_if=CatalogType jdbc,url"`
47+
JDBCUsername string `json:"jdbc_username" validate:"required_if=CatalogType jdbc"`
48+
JDBCPassword string `json:"jdbc_password" validate:"required_if=CatalogType jdbc"`
4949

5050
// Hive specific configuration
51-
HiveURI string `json:"hive_uri,omitempty"`
52-
HiveClients int `json:"hive_clients,omitempty"`
53-
HiveSaslEnabled bool `json:"hive_sasl_enabled,omitempty"`
51+
HiveURI string `json:"hive_uri" validate:"required_if=CatalogType hive,url"`
52+
HiveClients int `json:"hive_clients" validate:"omitempty,min=1"`
53+
HiveSaslEnabled bool `json:"hive_sasl_enabled"`
5454

5555
// Iceberg Configuration
56-
IcebergDatabase string `json:"iceberg_db,omitempty"`
57-
IcebergS3Path string `json:"iceberg_s3_path"` // e.g. s3://bucket/path
58-
JarPath string `json:"sink_jar_path,omitempty"` // Path to the Iceberg sink JAR
59-
ServerHost string `json:"sink_rpc_server_host,omitempty"` // gRPC server host
56+
IcebergDatabase string `json:"iceberg_db" validate:"omitempty"`
57+
IcebergS3Path string `json:"iceberg_s3_path" validate:"required,startswith=s3://"` // e.g. s3://bucket/path
58+
JarPath string `json:"sink_jar_path" validate:"omitempty,file"` // Path to the Iceberg sink JAR
59+
ServerHost string `json:"sink_rpc_server_host" validate:"omitempty,hostname"` // gRPC server host
6060

6161
// Rest Catalog Configuration
62-
RestCatalogURL string `json:"rest_catalog_url,omitempty"`
62+
RestCatalogURL string `json:"rest_catalog_url" validate:"required_if=CatalogType rest,url"`
6363
RestSigningName string `json:"rest_signing_name,omitempty"`
64-
RestSigningRegion string `json:"rest_signing_region,omitempty"`
65-
RestSigningV4 bool `json:"rest_signing_v_4,omitempty"`
66-
RestToken string `json:"token,omitempty"`
67-
RestOAuthURI string `json:"oauth2_uri,omitempty"`
68-
RestAuthType string `json:"rest_auth_type,omitempty"`
69-
RestScope string `json:"scope,omitempty"`
70-
RestCredential string `json:"credential,omitempty"`
64+
RestSigningRegion string `json:"rest_signing_region" validate:"required_if=RestSigningV4 true"`
65+
RestSigningV4 bool `json:"rest_signing_v_4"`
66+
RestToken string `json:"token" validate:"required_if=RestAuthType token"`
67+
RestOAuthURI string `json:"oauth2_uri" validate:"required_if=RestAuthType oauth2,omitempty,url"`
68+
RestAuthType string `json:"rest_auth_type" validate:"omitempty,oneof=none token oauth2"`
69+
RestScope string `json:"scope" validate:"required_if=RestAuthType oauth2"`
70+
RestCredential string `json:"credential" validate:"required_if=RestAuthType oauth2"`
7171
}
7272

7373
func (c *Config) Validate() error {
74-
if c.IcebergS3Path == "" {
75-
return fmt.Errorf("s3_path is required")
76-
}
77-
7874
// Set defaults for catalog type
7975
if c.CatalogType == "" {
8076
c.CatalogType = GlueCatalog
@@ -84,44 +80,76 @@ func (c *Config) Validate() error {
8480
c.CatalogName = "olake_iceberg"
8581
}
8682

83+
if c.ServerHost == "" {
84+
c.ServerHost = "localhost"
85+
}
86+
8787
// Default to path-style access for S3-compatible services
8888
if c.S3Endpoint != "" {
8989
c.S3PathStyle = true
9090
}
9191

92+
// Set Hive defaults
93+
if c.CatalogType == HiveCatalog && c.HiveClients <= 0 {
94+
c.HiveClients = 5
95+
}
96+
9297
// Validate S3 configuration
9398
// Region can be picked up from environment or credentials file for AWS S3
9499
if c.S3Endpoint == "" && c.Region == "" {
95100
logger.Warn("aws_region not explicitly provided, will attempt to use region from environment variables or AWS config/credentials file")
96101
}
97102

98-
// Log information about credentials for all S3 configurations
99103
if c.AccessKey == "" && c.SecretKey == "" && c.ProfileName == "" {
100104
if c.S3Endpoint == "" {
101-
// AWS S3 scenario
102-
logger.Info("AWS credentials not explicitly provided, will use default credential chain (environment variables, AWS config/credentials file, or instance metadata service)")
105+
logger.Info("AWS credentials not explicitly provided, will use default credential chain")
103106
} else {
104-
// Custom S3 endpoint scenario
105-
logger.Info("S3 credentials not explicitly provided for custom endpoint. Ensure the service supports anonymous access or credentials are available through other means")
107+
logger.Info("S3 credentials not explicitly provided for custom endpoint")
106108
}
107109
}
108110

111+
// Basic validation
112+
if c.IcebergS3Path == "" {
113+
return fmt.Errorf("s3_path is required")
114+
}
115+
109116
// Validate based on catalog type
110117
switch c.CatalogType {
111118
case JDBCCatalog:
112-
if c.JDBCUrl == "" {
113-
return fmt.Errorf("jdbc_url is required when using JDBC catalog")
119+
if !strings.HasPrefix(c.JDBCUrl, "jdbc:") {
120+
return fmt.Errorf("invalid JDBC URL format: must start with 'jdbc:'")
114121
}
115122
case RestCatalog:
116-
if c.RestCatalogURL == "" {
117-
return fmt.Errorf("rest_catalog_url is required when using REST catalog")
123+
if c.RestSigningV4 && (c.RestSigningRegion == "" || c.RestSigningName == "") {
124+
return fmt.Errorf("rest_signing_name and rest_signing_region are required when rest_signing_v4 is enabled")
125+
}
126+
127+
switch c.RestAuthType {
128+
case "token":
129+
if c.RestToken == "" {
130+
return fmt.Errorf("token is required when using token authentication")
131+
}
132+
case "oauth2":
133+
if c.RestOAuthURI == "" || c.RestScope == "" || c.RestCredential == "" {
134+
return fmt.Errorf("oauth2_uri, scope, and credential are required for OAuth2 authentication")
135+
}
136+
case "", "none":
137+
default:
138+
return fmt.Errorf("unsupported REST authentication type: %s", c.RestAuthType)
118139
}
119140
case HiveCatalog:
120-
if c.HiveURI == "" {
121-
return fmt.Errorf("hive_uri is required when using Hive catalog")
141+
if !strings.HasPrefix(c.HiveURI, "thrift://") {
142+
return fmt.Errorf("invalid Hive URI format: must start with 'thrift://'")
143+
}
144+
if c.HiveClients > 20 {
145+
logger.Warn("High number of Hive clients configured (%d). This may impact performance", c.HiveClients)
122146
}
123147
case GlueCatalog:
124-
// No additional validation required for Glue catalog
148+
if c.AccessKey != "" || c.SecretKey != "" {
149+
if c.AccessKey == "" || c.SecretKey == "" {
150+
return fmt.Errorf("both aws_access_key and aws_secret_key must be provided when using explicit AWS credentials")
151+
}
152+
}
125153
default:
126154
return fmt.Errorf("unsupported catalog_type: %s", c.CatalogType)
127155
}
@@ -156,8 +184,5 @@ func (c *Config) Validate() error {
156184
}
157185
}
158186
}
159-
if c.ServerHost == "" {
160-
c.ServerHost = "localhost"
161-
}
162187
return utils.Validate(c)
163188
}

destination/iceberg/config_test.go

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
package iceberg
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func TestConfig_Validate(t *testing.T) {
10+
tests := []struct {
11+
name string
12+
config Config
13+
wantErr bool
14+
errMsg string
15+
}{
16+
{
17+
name: "valid glue config",
18+
config: Config{
19+
Region: "us-west-2",
20+
CatalogType: GlueCatalog,
21+
CatalogName: "test_catalog",
22+
IcebergS3Path: "s3://bucket/path",
23+
},
24+
wantErr: false,
25+
},
26+
{
27+
name: "invalid glue config - missing region",
28+
config: Config{
29+
CatalogType: GlueCatalog,
30+
CatalogName: "test_catalog",
31+
IcebergS3Path: "s3://bucket/path",
32+
},
33+
wantErr: true,
34+
errMsg: "aws_region is required when catalog_type is glue",
35+
},
36+
{
37+
name: "valid jdbc config",
38+
config: Config{
39+
CatalogType: JDBCCatalog,
40+
CatalogName: "test_catalog",
41+
JDBCUrl: "jdbc:postgresql://localhost:5432/db",
42+
JDBCUsername: "user",
43+
JDBCPassword: "pass",
44+
IcebergS3Path: "s3://bucket/path",
45+
},
46+
wantErr: false,
47+
},
48+
{
49+
name: "invalid jdbc config - missing url",
50+
config: Config{
51+
CatalogType: JDBCCatalog,
52+
CatalogName: "test_catalog",
53+
JDBCUsername: "user",
54+
JDBCPassword: "pass",
55+
IcebergS3Path: "s3://bucket/path",
56+
},
57+
wantErr: true,
58+
errMsg: "jdbc_url is required when catalog_type is jdbc",
59+
},
60+
{
61+
name: "invalid jdbc url format",
62+
config: Config{
63+
CatalogType: JDBCCatalog,
64+
CatalogName: "test_catalog",
65+
JDBCUrl: "invalid-url",
66+
JDBCUsername: "user",
67+
JDBCPassword: "pass",
68+
IcebergS3Path: "s3://bucket/path",
69+
},
70+
wantErr: true,
71+
errMsg: "invalid JDBC URL format: must start with 'jdbc:'",
72+
},
73+
{
74+
name: "valid rest config with token auth",
75+
config: Config{
76+
CatalogType: RestCatalog,
77+
CatalogName: "test_catalog",
78+
RestCatalogURL: "https://rest-catalog.example.com",
79+
RestAuthType: "token",
80+
RestToken: "my-token",
81+
IcebergS3Path: "s3://bucket/path",
82+
},
83+
wantErr: false,
84+
},
85+
{
86+
name: "invalid rest config - missing token",
87+
config: Config{
88+
CatalogType: RestCatalog,
89+
CatalogName: "test_catalog",
90+
RestCatalogURL: "https://rest-catalog.example.com",
91+
RestAuthType: "token",
92+
IcebergS3Path: "s3://bucket/path",
93+
},
94+
wantErr: true,
95+
errMsg: "token is required when using token authentication",
96+
},
97+
{
98+
name: "valid rest config with oauth2",
99+
config: Config{
100+
CatalogType: RestCatalog,
101+
CatalogName: "test_catalog",
102+
RestCatalogURL: "https://rest-catalog.example.com",
103+
RestAuthType: "oauth2",
104+
RestOAuthURI: "https://auth.example.com",
105+
RestScope: "catalog.read",
106+
RestCredential: "client-id:secret",
107+
IcebergS3Path: "s3://bucket/path",
108+
},
109+
wantErr: false,
110+
},
111+
{
112+
name: "invalid rest config - incomplete oauth2",
113+
config: Config{
114+
CatalogType: RestCatalog,
115+
CatalogName: "test_catalog",
116+
RestCatalogURL: "https://rest-catalog.example.com",
117+
RestAuthType: "oauth2",
118+
RestOAuthURI: "https://auth.example.com",
119+
IcebergS3Path: "s3://bucket/path",
120+
},
121+
wantErr: true,
122+
errMsg: "oauth2_uri, scope, and credential are required for OAuth2 authentication",
123+
},
124+
{
125+
name: "valid hive config",
126+
config: Config{
127+
CatalogType: HiveCatalog,
128+
CatalogName: "test_catalog",
129+
HiveURI: "thrift://localhost:9083",
130+
IcebergS3Path: "s3://bucket/path",
131+
},
132+
wantErr: false,
133+
},
134+
{
135+
name: "invalid hive config - wrong uri format",
136+
config: Config{
137+
CatalogType: HiveCatalog,
138+
CatalogName: "test_catalog",
139+
HiveURI: "invalid://localhost:9083",
140+
IcebergS3Path: "s3://bucket/path",
141+
},
142+
wantErr: true,
143+
errMsg: "invalid Hive URI format: must start with 'thrift://'",
144+
},
145+
{
146+
name: "invalid hive config - missing uri",
147+
config: Config{
148+
CatalogType: HiveCatalog,
149+
CatalogName: "test_catalog",
150+
IcebergS3Path: "s3://bucket/path",
151+
},
152+
wantErr: true,
153+
errMsg: "hive_uri is required when catalog_type is hive",
154+
},
155+
{
156+
name: "invalid aws credentials - partial keys",
157+
config: Config{
158+
CatalogType: GlueCatalog,
159+
CatalogName: "test_catalog",
160+
Region: "us-west-2",
161+
AccessKey: "AKIAXXXXXXXX",
162+
IcebergS3Path: "s3://bucket/path",
163+
},
164+
wantErr: true,
165+
errMsg: "both aws_access_key and aws_secret_key must be provided when using explicit AWS credentials",
166+
},
167+
{
168+
name: "missing s3 path",
169+
config: Config{
170+
CatalogType: GlueCatalog,
171+
CatalogName: "test_catalog",
172+
Region: "us-west-2",
173+
},
174+
wantErr: true,
175+
errMsg: "s3_path is required",
176+
},
177+
}
178+
179+
for _, tt := range tests {
180+
t.Run(tt.name, func(t *testing.T) {
181+
err := tt.config.Validate()
182+
if tt.wantErr {
183+
assert.Error(t, err)
184+
if tt.errMsg != "" {
185+
assert.Contains(t, err.Error(), tt.errMsg)
186+
}
187+
} else {
188+
assert.NoError(t, err)
189+
}
190+
})
191+
}
192+
}

0 commit comments

Comments
 (0)