Skip to content

Commit 53fc831

Browse files
authored
Merge pull request #830 from datazip-inc/staging
chore: release v0.3.17
2 parents 71b2945 + 98fbd96 commit 53fc831

16 files changed

Lines changed: 264 additions & 19 deletions

File tree

.github/workflows/security-ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
check-latest: "true"
4646
go-version: "1.24.x"
4747
- name: install gosec
48-
run: curl -sfL https://raw.githubusercontent.com/securego/gosec/master/install.sh | sh -s -- -b $(go env GOPATH)/bin
48+
run: curl -sfL https://raw.githubusercontent.com/securego/gosec/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.22.11
4949
- name: Run Gosec Security Scanner
5050
run: $(go env GOPATH)/bin/gosec -exclude=G115 -severity=high -confidence=medium ./...
5151

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@
7575

7676
| Source → Destination | CDC | Relative Performance (CDC) | Full Report |
7777
|----------------------|-----------------|--------------------------------------|--------------------------------------------------------------|
78-
| Postgres → Iceberg | 41,390 RPS | 1.5× faster than Fivetran | [Full Report](https://olake.io/docs/benchmarks?tab=postgres) |
78+
| Postgres → Iceberg | 55,555 RPS | 2× faster than Fivetran | [Full Report](https://olake.io/docs/benchmarks?tab=postgres) |
7979
| MySQL → Iceberg | 51,867 RPS | 1.85× faster than Fivetran | [Full Report](https://olake.io/docs/benchmarks/?tab=mysql) |
80-
| MongoDB → Iceberg | - | - | [Full Report](https://olake.io/docs/benchmarks/?tab=mongodb) |
80+
| MongoDB → Iceberg | 10,692 RPS | - | [Full Report](https://olake.io/docs/benchmarks/?tab=mongodb) |
8181
| Oracle → Iceberg | - | - | [Full Report](https://olake.io/docs/benchmarks/?tab=oracle) |
8282

8383

constants/state_version.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,16 @@ package constants
1818
// * When a string cannot be parsed as a timestamp, it will be returned as string. Earlier it was returning epoch time (1970-01-01)
1919
// * This prevents data corruption by failing fast on invalid date strings
2020
//
21-
// - Version 2: Current Version (Introduces consistent timezone handling between MySQL Full Refresh and CDC.)
21+
// - Version 2: Introduces consistent timezone handling between MySQL Full Refresh and CDC.
2222
// * Binlog CDC now uses TimestampStringLocation to align with the connection's timezone configuration.
2323
// * This prevents discrepancies where CDC timestamps could differ from Full Refresh data.
24+
//
25+
// - Version 3: Current Version (Parses the timezone offset for MySQL correctly)
26+
// * Earlier if the session timezone or global was set in offset format, it was not parsed correctly and used to fallback to UTC.
27+
// * Now it parses the offset correctly and uses the timezone offset to set the timezone for the connection.
2428

2529
const (
26-
LatestStateVersion = 2
30+
LatestStateVersion = 3
2731
)
2832

2933
// Used as the current version of the state when the program is running

destination/iceberg/config.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@ type Config struct {
4242
CatalogType CatalogType `json:"catalog_type,omitempty"`
4343
CatalogName string `json:"catalog_name,omitempty"`
4444

45+
// Glue catalog optional overrides
46+
UseGlueAdditionalConfig bool `json:"glue_additional_config,omitempty"`
47+
GlueEndpoint string `json:"glue_endpoint,omitempty"`
48+
GlueAccessKey string `json:"glue_access_key,omitempty"`
49+
GlueSecretKey string `json:"glue_secret_key,omitempty"`
50+
GlueRegion string `json:"glue_region,omitempty"`
51+
GlueCatalogID string `json:"glue_catalog_id,omitempty"`
52+
4553
// JDBC specific configuration
4654
JDBCUrl string `json:"jdbc_url,omitempty"`
4755
JDBCUsername string `json:"jdbc_username,omitempty"`

destination/iceberg/java_client.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,16 @@ func getServerConfigJSON(config *Config, partitionInfo []internal.PartitionInfo,
7575
switch config.CatalogType {
7676
case GlueCatalog:
7777
serverConfig["catalog-impl"] = "org.apache.iceberg.aws.glue.GlueCatalog"
78+
79+
// if custom glue endpoint creds are passed
80+
if config.UseGlueAdditionalConfig {
81+
addMapKeyIfNotEmpty("client.factory", "io.debezium.server.iceberg.OlakeAwsClientFactory")
82+
addMapKeyIfNotEmpty("glue.access-key-id", config.GlueAccessKey)
83+
addMapKeyIfNotEmpty("glue.secret-access-key", config.GlueSecretKey)
84+
addMapKeyIfNotEmpty("glue.endpoint", config.GlueEndpoint)
85+
addMapKeyIfNotEmpty("glue.id", config.GlueCatalogID)
86+
addMapKeyIfNotEmpty("glue.region", config.GlueRegion)
87+
}
7888
case JDBCCatalog:
7989
serverConfig["catalog-impl"] = "org.apache.iceberg.jdbc.JdbcCatalog"
8090
serverConfig["uri"] = config.JDBCUrl

destination/iceberg/local-test/docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ version: "3"
22

33
services:
44
lakekeeper:
5-
image: &lakekeeper-image ${LAKEKEEPER__SERVER_IMAGE:-quay.io/lakekeeper/catalog:latest-main}
5+
image: &lakekeeper-image ${LAKEKEEPER__SERVER_IMAGE:-quay.io/lakekeeper/catalog:v0.11.1}
66
pull_policy: &lakekeeper-pull-policy always
77
environment: &lakekeeper-environment
88
- LAKEKEEPER__PG_ENCRYPTION_KEY=This-is-NOT-Secure!
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
package io.debezium.server.iceberg;
2+
3+
import java.net.URI;
4+
import java.util.HashMap;
5+
import java.util.Map;
6+
7+
import org.apache.iceberg.aws.AwsClientFactories;
8+
import org.apache.iceberg.aws.AwsClientFactory;
9+
10+
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
11+
import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider;
12+
import software.amazon.awssdk.regions.Region;
13+
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
14+
import software.amazon.awssdk.services.glue.GlueClient;
15+
import software.amazon.awssdk.services.glue.GlueClientBuilder;
16+
import software.amazon.awssdk.services.kms.KmsClient;
17+
import software.amazon.awssdk.services.s3.S3Client;
18+
19+
// for custom glue endpoint credentials
20+
public class OlakeAwsClientFactory implements AwsClientFactory {
21+
22+
private transient AwsClientFactory delegate;
23+
private transient Map<String, String> props;
24+
25+
@Override
26+
public void initialize(Map<String, String> properties) {
27+
Map<String, String> p = new HashMap<>();
28+
if (properties != null) {
29+
for (Map.Entry<String, String> e : properties.entrySet()) {
30+
if (e.getKey() != null && e.getValue() != null) {
31+
p.put(e.getKey(), e.getValue());
32+
}
33+
}
34+
}
35+
36+
this.props = p;
37+
this.delegate = AwsClientFactories.defaultFactory();
38+
this.delegate.initialize(this.props);
39+
}
40+
41+
@Override
42+
public S3Client s3() {
43+
return delegate.s3();
44+
}
45+
46+
@Override
47+
public GlueClient glue() {
48+
String glueAccessKey = props.get("glue.access-key-id");
49+
String glueSecretKey = props.get("glue.secret-access-key");
50+
51+
GlueClientBuilder builder = GlueClient.builder();
52+
if (!isBlank(glueAccessKey) && !isBlank(glueSecretKey)) {
53+
builder.credentialsProvider(
54+
StaticCredentialsProvider.create(
55+
AwsBasicCredentials.create(glueAccessKey, glueSecretKey)
56+
)
57+
);
58+
}
59+
60+
// prefer glue.region if set, otherwise fall back to s3.region
61+
String region = props.get("glue.region");
62+
if (isBlank(region)) {
63+
region = props.get("s3.region");
64+
}
65+
if (!isBlank(region)) {
66+
builder.region(Region.of(region));
67+
}
68+
69+
String endpoint = props.get("glue.endpoint");
70+
if (!isBlank(endpoint)) {
71+
builder.endpointOverride(URI.create(endpoint));
72+
}
73+
74+
return builder.build();
75+
}
76+
77+
@Override
78+
public KmsClient kms() {
79+
return delegate.kms();
80+
}
81+
82+
@Override
83+
public DynamoDbClient dynamo() {
84+
return delegate.dynamo();
85+
}
86+
87+
private static boolean isBlank(String s) {
88+
return s == null || s.trim().isEmpty();
89+
}
90+
}

destination/iceberg/resources/spec.json

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,64 @@
224224
"properties": {
225225
"catalog_type": {
226226
"const": "glue"
227+
},
228+
"s3_endpoint": {
229+
"type": "string",
230+
"title": "S3 Endpoint",
231+
"description": "Specifies the endpoint URL for S3 compatible services"
232+
},
233+
"glue_additional_config": {
234+
"type": "boolean",
235+
"default": false,
236+
"title": "Custom Glue Endpoint Configuration",
237+
"description": "Enable separate credentials (access key and secret key), endpoint, and region for the Glue catalog"
238+
}
239+
},
240+
"dependencies": {
241+
"glue_additional_config": {
242+
"oneOf": [
243+
{
244+
"properties": {
245+
"glue_additional_config": {
246+
"const": true
247+
},
248+
"glue_catalog_id": {
249+
"type": "string",
250+
"title": "Glue Catalog ID",
251+
"description": "AWS account ID used as the Glue Data Catalog identifier"
252+
},
253+
"glue_access_key": {
254+
"type": "string",
255+
"format": "password",
256+
"title": "Glue Access Key",
257+
"description": "Access key for authenticating Glue catalog requests, required when Glue credentials differ from S3 credentials"
258+
},
259+
"glue_secret_key": {
260+
"type": "string",
261+
"format": "password",
262+
"title": "Glue Secret Key",
263+
"description": "Secret key for authenticating Glue catalog requests, required when Glue credentials differ from S3 credentials"
264+
},
265+
"glue_endpoint": {
266+
"type": "string",
267+
"title": "Glue Endpoint",
268+
"description": "Custom endpoint URL for AWS Glue or a Glue-compatible catalog service"
269+
},
270+
"glue_region": {
271+
"type": "string",
272+
"title": "Glue Region",
273+
"description": "AWS region for the Glue catalog, if different from the S3 region. Falls back to AWS Region if not set"
274+
}
275+
}
276+
},
277+
{
278+
"properties": {
279+
"glue_additional_config": {
280+
"const": false
281+
}
282+
}
283+
}
284+
]
227285
}
228286
}
229287
}

drivers/db2/internal/backfill.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ func (d *DB2) ChunkIterator(ctx context.Context, stream types.StreamInterface, c
3838

3939
// if PK present then PK based chunking else RID based chunking
4040
pkColumns := stream.GetStream().SourceDefinedPrimaryKey.Array()
41+
sort.Strings(pkColumns)
4142

4243
var stmt string
4344
if stream.Self().StreamMetadata.ChunkColumn != "" {

drivers/mssql/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ require (
7272
github.com/klauspost/compress v1.18.0 // indirect
7373
github.com/klauspost/cpuid/v2 v2.2.10 // indirect
7474
github.com/leodido/go-urn v1.4.0 // indirect
75+
github.com/linkedin/goavro/v2 v2.15.0 // indirect
7576
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
7677
github.com/magiconair/properties v1.8.10 // indirect
7778
github.com/mattn/go-colorable v0.1.14 // indirect

0 commit comments

Comments
 (0)