diff --git a/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_common_config.md b/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_common_config.md index ff3d79338c..dc37bdf9e9 100644 --- a/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_common_config.md +++ b/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_common_config.md @@ -11,8 +11,12 @@ import Link from '@docusaurus/Link'; Optional. Default value 1 second. Events are emitted to Snowflake after a maximum of this duration, even if the maxBytes size has not been reached - batching.uploadConcurrency - Optional. Default value 3. How many batches can we send simultaneously over the network to Snowflake + batching.uploadParallelismFactor + Optional. Default value 2.5. Controls how many batches can we send simultaneously over the network to Snowflake. E.g. If there are 4 available processors, and uploadParallelismFactor is 2.5, then the loader sends up to 10 batches in parallel. Adjusting this value can cause the app to use more or less of the available CPU. + + + cpuParallelismFactor + Optional. Default value 0.75. Controls how the loaders splits the workload into concurrent batches which can be run in parallel. E.g. If there are 4 available processors, and cpuParallelismFactor is 0.75, then the loader processes 3 batches concurrently. Adjusting this value can cause the app to use more or less of the available CPU. retries.setupErrors.delay @@ -67,6 +71,10 @@ import Link from '@docusaurus/Link'; monitoring.webhook.tags.* Optional. A map of key/value strings to be included in the payload content sent to the webhook. + + monitoring.webhook.heartbeat.* + Optional. Default value 5.minutes. How often to send a heartbeat event to the webhook when healthy. + monitoring.sentry.dsn Optional. Set to a Sentry URI to report unexpected runtime exceptions. @@ -95,3 +103,7 @@ import Link from '@docusaurus/Link'; output.good.jdbcQueryTimeout Optional. Sets the query timeout on the JDBC driver which connects to Snowflake + + http.client.maxConnectionsPerServer + Optional. Default value 4. Configures the internal HTTP client used for alerts and telemetry. The maximum number of open HTTP requests to any single server at any one time. + diff --git a/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_kafka_config.md b/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_kafka_config.md index 2ec32c51f9..a56663f0aa 100644 --- a/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_kafka_config.md +++ b/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_kafka_config.md @@ -23,6 +23,6 @@ Optional. A map of key/value pairs for any standard Kafka producer configuration option. - output.bad.maxRecordSize.* + output.bad.maxRecordSize Optional. Default value 1000000. Any single failed event sent to Kafka should not exceed this size in bytes diff --git a/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_kinesis_config.md b/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_kinesis_config.md index 717ec3b2e8..0c2bcf9bd0 100644 --- a/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_kinesis_config.md +++ b/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_kinesis_config.md @@ -23,8 +23,16 @@ Optional. Default value 1000. How many events the Kinesis client may fetch in a single poll. Only used when `input.retrievalMode` is Polling. - input.bufferSize - Optional. Default value 1. The number of batches of events which are pre-fetched from kinesis. The default value is known to work well. + input.workerIdentifier + Optional. Defaults to the HOSTNAME environment variable. The name of this KCL worker used in the dynamodb lease table. + + + input.leaseDuration + Optional. Default value 10 seconds. The duration of shard leases. KCL workers must periodically refresh leases in the dynamodb table before this duration expires. + + + input.maxLeasesToStealAtOneTimeFactor + Optional. Default value 2.0. Controls how to pick the max number of shard-leases to steal at one time. E.g. If there are 4 available processors, and maxLeasesToStealAtOneTimeFactor is 2.0, then allow the KCL to steal up to 8 leases. Allows bigger instances to more quickly acquire the shard-leases they need to combat latency. output.bad.streamName @@ -47,6 +55,6 @@ Optional. Default value 5242880. The maximum number of bytes we are allowed to send to Kinesis in 1 PutRecords request. - output.bad.maxRecordSize.* + output.bad.maxRecordSize Optional. Default value 1000000. Any single event failed event sent to Kinesis should not exceed this size in bytes diff --git a/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_snowflake_config.md b/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_snowflake_config.md index 91ce10c3de..ff88e2846f 100644 --- a/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_snowflake_config.md +++ b/docs/pipeline-components-and-applications/loaders-storage-targets/snowflake-streaming-loader/configuration-reference/_snowflake_config.md @@ -28,9 +28,9 @@ output.good.table - Optional. Default value `events`. Name to use for the events table + Optional. Default value events. Name to use for the events table output.good.channel - Optional. Default value `snowplow`. Name to use for the Snowflake channel. If you run multiple loaders in parallel, then each channel must be given a unique name. + Optional. Default value snowplow. Prefix to use for the snowflake channels. The full name will be suffixed with a number, e.g. snowplow-1. If you run multiple loaders in parallel, then each loader must be configured with a unique channel prefix. diff --git a/src/componentVersions.js b/src/componentVersions.js index 0f11456d0f..e7dcbff601 100644 --- a/src/componentVersions.js +++ b/src/componentVersions.js @@ -37,7 +37,7 @@ export const versions = { s3Loader: '2.2.9', s3Loader22x: '2.2.9', lakeLoader: '0.5.0', - snowflakeStreamingLoader: '0.2.4', + snowflakeStreamingLoader: '0.3.0', // Data Modelling // dbt