-
Notifications
You must be signed in to change notification settings - Fork 3.5k
[exporter/kafka] add support for partitioning kafka records #47243
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
15b9863
2f515b2
bfad54e
9210ca7
6735ac6
a622bc5
9b570f4
f4afc79
05bb9ea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| # Use this changelog template to create an entry for release notes. | ||
|
|
||
| # One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' | ||
| change_type: enhancement | ||
|
|
||
| # The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) | ||
| component: exporter/kafka | ||
|
|
||
| # A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). | ||
| note: Add support for partitioning kafka records | ||
|
|
||
| # Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. | ||
| issues: [46931] | ||
|
|
||
| # (Optional) One or more lines of additional information to render under the primary note. | ||
| # These lines will be padded with 2 spaces and then inserted directly into the document. | ||
| # Use pipe (|) for multiline entries. | ||
| subtext: | | ||
| Add support for RoundRobin and LeastBackup partitioning strategies, as well as custom partitioners | ||
| provided by RecordPartitionerExtension implementations. Users can implement their own partitioning logic | ||
| and plug it into the kafka exporter via the RecordPartitionerExtension interface. | ||
|
|
||
|
|
||
| # If your change doesn't affect end users or the exported elements of any package, | ||
| # you should instead start your pull request title with [chore] or use the "Skip Changelog" label. | ||
| # Optional: The change log or logs in which this entry should be included. | ||
| # e.g. '[user]' or '[user, api]' | ||
| # Include 'user' if the change is relevant to end users. | ||
| # Include 'api' if there is a change to a library API. | ||
| # Default: '[user]' | ||
| change_logs: [] |
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -19,6 +19,11 @@ import ( | |||
|
|
||||
| var _ component.Config = (*Config)(nil) | ||||
|
|
||||
| var ( | ||||
| errRecordPartitionerUnknownType = errors.New("unknown partitioner type") | ||||
| errRecordPartitionerExtRequired = errors.New("partitioner.extension must be set when type is \"extension\"") | ||||
| ) | ||||
|
|
||||
| var errLogsPartitionExclusive = errors.New( | ||||
| "partition_logs_by_resource_attributes and partition_logs_by_trace_id cannot both be enabled", | ||||
| ) | ||||
|
|
@@ -29,6 +34,36 @@ var ( | |||
| errIncludeMetadataKeysNotPartitioned = errors.New("sending_queue::batch::partition::metadata_keys must include all include_metadata_keys values") | ||||
| ) | ||||
|
|
||||
| // RecordPartitionerConfig configures the strategy used to assign Kafka records to partitions. | ||||
| type RecordPartitionerConfig struct { | ||||
| // Type is the partitioning strategy. Valid values are: | ||||
| // - "" or "sarama_compatible" (default): sticky key partitioner with Sarama-compatible FNV-1a hashing | ||||
| // - "sticky": franz-go StickyKeyPartitioner with murmur2 hashing | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't seem to be implemented? Also, there's StickyPartitioner and StickyKeyPartitioner, so the name is a bit misleading. I think this simple name-based config is too simplistic. I suggest we make it more like this: exporters:
kafka:
...
record_partitioner:
# exactly one of the below is required, defaulting to sticky_key with sarama_compat hasher
least_backup:
round_robin:
sticky:
sticky_key:
hasher: sarama_compat # == SaramaCompatHasher(fnv32a)
uniform_bytes:
bytes: 123
adaptive: true
keys: false
hasher: kafka_murmur2
extension: whatever |
||||
| // - "round_robin": round-robin across all available partitions | ||||
| // - "least_backup": routes to the partition with fewest in-flight records | ||||
| // - "extension": delegates to an extension implementing RecordPartitionerExtension | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Can we simplify and make Type and Extension mutually exclusive? Then just setting Extension is sufficient. |
||||
| Type string `mapstructure:"type"` | ||||
|
|
||||
| // Extension is the component ID of an extension implementing RecordPartitionerExtension. | ||||
| // Required when Type is "extension"; must be empty for all other types. | ||||
| Extension *component.ID `mapstructure:"extension,omitempty"` | ||||
| } | ||||
|
|
||||
| func (c *RecordPartitionerConfig) Validate() error { | ||||
| switch c.Type { | ||||
| case RecordPartitionerTypeSaramaCompatible, | ||||
| RecordPartitionerTypeRoundRobin, | ||||
| RecordPartitionerTypeLeastBackup: | ||||
| case RecordPartitionerTypeCustom: | ||||
| if c.Extension == nil { | ||||
| return errRecordPartitionerExtRequired | ||||
| } | ||||
| default: | ||||
| return fmt.Errorf("%w: %q", errRecordPartitionerUnknownType, c.Type) | ||||
| } | ||||
| return nil | ||||
| } | ||||
|
|
||||
| // Config defines configuration for Kafka exporter. | ||||
| type Config struct { | ||||
| TimeoutSettings exporterhelper.TimeoutConfig `mapstructure:",squash"` // squash ensures fields are correctly decoded in embedded struct. | ||||
|
|
@@ -78,12 +113,21 @@ type Config struct { | |||
| // selection falls back to the Kafka client’s default strategy. Resource | ||||
| // attributes are not used for the key when this option is enabled. | ||||
| PartitionLogsByTraceID bool `mapstructure:"partition_logs_by_trace_id"` | ||||
|
|
||||
| // RecordPartitioner configures how Kafka records are assigned to partitions. | ||||
| // The default ("sarama_compatible") retains the legacy Sarama-compatible hashing | ||||
| // behavior. Set to "sticky", "round_robin", or "least_backup" to use one of the | ||||
| // built-in franz-go partitioners, or "extension" to delegate to a custom extension. | ||||
| RecordPartitioner RecordPartitionerConfig `mapstructure:"record_partitioner"` | ||||
| } | ||||
|
|
||||
| func (c *Config) Validate() error { | ||||
| if c.PartitionLogsByResourceAttributes && c.PartitionLogsByTraceID { | ||||
| return errLogsPartitionExclusive | ||||
| } | ||||
| if err := c.RecordPartitioner.Validate(); err != nil { | ||||
| return fmt.Errorf("record_partitioner: %w", err) | ||||
| } | ||||
| if err := validateBatchPartitionerKeys(c); err != nil { | ||||
| return err | ||||
| } | ||||
|
|
||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| // Copyright The OpenTelemetry Authors | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| package kafkaexporter // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/kafkaexporter" | ||
|
|
||
| import ( | ||
| "fmt" | ||
|
|
||
| "github.com/twmb/franz-go/pkg/kgo" | ||
| "go.opentelemetry.io/collector/component" | ||
|
|
||
| "github.com/open-telemetry/opentelemetry-collector-contrib/internal/kafka" | ||
| ) | ||
|
|
||
| const ( | ||
| // RecordPartitionerTypeSaramaCompatible is the default partitioner. It uses a sticky | ||
| // key partitioner with Sarama-compatible FNV-1a hashing when a record key is set, | ||
| // and a random sticky partition when no key is set. | ||
| RecordPartitionerTypeSaramaCompatible = "sarama_compatible" | ||
|
|
||
| // RecordPartitionerTypeRoundRobin distributes records evenly across all available | ||
| // partitions in a round-robin fashion, regardless of the record key. | ||
| RecordPartitionerTypeRoundRobin = "round_robin" | ||
|
|
||
| // RecordPartitionerTypeLeastBackup routes each record to the partition with the fewest | ||
| // buffered records, which can reduce produce latency under uneven load. | ||
| RecordPartitionerTypeLeastBackup = "least_backup" | ||
|
|
||
| // RecordPartitionerTypeCustom delegates partitioning to a user-provided extension | ||
| // that implements RecordPartitionerExtension. | ||
| RecordPartitionerTypeCustom = "custom" | ||
| ) | ||
|
|
||
| // RecordPartitionerExtension is implemented by extensions that supply a custom Kafka record | ||
| // partitioner for use with the kafka exporter. | ||
| type RecordPartitionerExtension interface { | ||
| component.Component | ||
|
|
||
| GetPartitioner() kgo.Partitioner | ||
| } | ||
|
|
||
| func buildPartitionerOpt(cfg RecordPartitionerConfig, host component.Host) (kgo.Opt, error) { | ||
| switch cfg.Type { | ||
| case RecordPartitionerTypeSaramaCompatible: | ||
| return kgo.RecordPartitioner(kafka.NewSaramaCompatPartitioner()), nil | ||
| case RecordPartitionerTypeRoundRobin: | ||
| return kgo.RecordPartitioner(kgo.RoundRobinPartitioner()), nil | ||
| case RecordPartitionerTypeLeastBackup: | ||
| return kgo.RecordPartitioner(kgo.LeastBackupPartitioner()), nil | ||
| case RecordPartitionerTypeCustom: | ||
| if cfg.Extension == nil { | ||
| return nil, errRecordPartitionerExtRequired | ||
| } | ||
| ext, ok := host.GetExtensions()[*cfg.Extension] | ||
| if !ok { | ||
| return nil, fmt.Errorf("partitioner extension %q not found", cfg.Extension) | ||
| } | ||
| partExt, ok := ext.(RecordPartitionerExtension) | ||
| if !ok { | ||
| return nil, fmt.Errorf("extension %q does not implement RecordPartitionerExtension", cfg.Extension) | ||
| } | ||
| return kgo.RecordPartitioner(partExt.GetPartitioner()), nil | ||
| default: | ||
| return nil, fmt.Errorf("unknown partitioner type %q", cfg.Type) | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to handle an empty string here? The default config sets the value.