diff --git a/automq-shell/build.gradle b/automq-shell/build.gradle index 4e8b5d9510..ca88699c76 100644 --- a/automq-shell/build.gradle +++ b/automq-shell/build.gradle @@ -65,4 +65,4 @@ jar { manifest { attributes 'Main-Class': 'com.automq.shell.AutoMQCLI' } -} \ No newline at end of file +} diff --git a/build.gradle b/build.gradle index f99c1a975e..d4cd9cd424 100644 --- a/build.gradle +++ b/build.gradle @@ -49,6 +49,7 @@ plugins { // We are going to drop JDK8 support. Hence, the spotless is upgrade to newest version and be applied only if the build env is compatible with JDK 11. // spotless 6.15.0+ has issue in runtime with JDK8 even through we define it with `apply:false`. see https://github.com/diffplug/spotless/issues/2156 for more details id 'com.diffplug.spotless' version "6.14.0" apply false + id('com.google.protobuf') version '0.9.4' } ext { @@ -165,8 +166,8 @@ allprojects { // ZooKeeper (potentially older and containing CVEs) libs.nettyHandler, libs.nettyTransportNativeEpoll, - // be explicit about the reload4j version instead of relying on the transitive versions - libs.reload4j + // be explicit about the reload4j version instead of relying on the transitive versions + libs.reload4j ) } } @@ -731,7 +732,7 @@ subprojects { jacoco { toolVersion = versions.jacoco } - + jacocoTestReport { dependsOn tasks.test sourceSets sourceSets.main @@ -755,8 +756,8 @@ subprojects { skipProjects = [ ":jmh-benchmarks", ":trogdor" ] skipConfigurations = [ "zinc" ] } - // the task `removeUnusedImports` is implemented by google-java-format, - // and unfortunately the google-java-format version used by spotless 6.14.0 can't work with JDK 21. + // the task `removeUnusedImports` is implemented by google-java-format, + // and unfortunately the google-java-format version used by spotless 6.14.0 can't work with JDK 21. // Hence, we apply spotless tasks only if the env is either JDK11 or JDK17 if ((JavaVersion.current().isJava11() || (JavaVersion.current() == JavaVersion.VERSION_17))) { apply plugin: 'com.diffplug.spotless' @@ -978,6 +979,10 @@ project(':core') { implementation libs.opentelemetryExporterOTLP implementation libs.opentelemetryJmx implementation libs.awsSdkAuth + implementation 'com.google.protobuf:protobuf-java:3.25.5' + implementation 'com.squareup.wire:wire-schema:4.9.1' + implementation 'com.squareup.wire:wire-runtime:4.9.1' + implementation 'com.google.api.grpc:proto-google-common-protos:2.52.0' implementation(libs.oshi) { exclude group: 'org.slf4j', module: '*' @@ -1223,7 +1228,7 @@ project(':core') { //By default gradle does not handle test dependencies between the sub-projects //This line is to include clients project test jar to dependant-testlibs from (project(':clients').testJar ) { "$buildDir/dependant-testlibs" } - // log4j-appender is not in core dependencies, + // log4j-appender is not in core dependencies, // so we add it to dependant-testlibs to avoid ClassNotFoundException in running kafka_log4j_appender.py from (project(':log4j-appender').jar ) { "$buildDir/dependant-testlibs" } duplicatesStrategy 'exclude' @@ -1235,6 +1240,39 @@ project(':core') { configProperties = checkstyleConfigProperties("import-control-core.xml") } + apply plugin: 'com.google.protobuf' + + protobuf { + protoc { + // The artifact spec for the Protobuf Compiler + artifact = 'com.google.protobuf:protoc:3.25.5' + } + + generateProtoTasks { + all().each { task -> + task.builtins { + java {} + } + } + } + } + + tasks.named('compileJava') { + dependsOn 'generateProto' + } + + tasks.named('spotlessJava') { + dependsOn tasks.named('generateProto') + } + + spotless { + java { + targetExclude( + "build/generated/source/proto/main/java/**/*.java" + ) + } + } + sourceSets { // Set java/scala source folders in the `scala` block to enable joint compilation main { @@ -1242,7 +1280,12 @@ project(':core') { srcDirs = [] } scala { - srcDirs = ["src/generated/java", "src/main/java", "src/main/scala"] + srcDirs = [ + "build/generated/source/proto/main/java", + "src/generated/java", + "src/main/java", + "src/main/scala" + ] } } test { @@ -1250,7 +1293,11 @@ project(':core') { srcDirs = [] } scala { - srcDirs = ["src/test/java", "src/test/scala"] + srcDirs = [ + "build/generated/source/proto/test/java", + "src/test/java", + "src/test/scala" + ] } } } @@ -1483,7 +1530,7 @@ project(':transaction-coordinator') { implementation project(':clients') generator project(':generator') } - + sourceSets { main { java { @@ -3685,4 +3732,4 @@ task aggregatedJavadoc(type: Javadoc, dependsOn: compileJava) { classpath = files(projectsWithJavadoc.collect { it.sourceSets.main.compileClasspath }) includes = projectsWithJavadoc.collectMany { it.javadoc.getIncludes() } excludes = projectsWithJavadoc.collectMany { it.javadoc.getExcludes() } -} +} \ No newline at end of file diff --git a/checkstyle/checkstyle.xml b/checkstyle/checkstyle.xml index 69fc6db1df..1bbe119a5b 100644 --- a/checkstyle/checkstyle.xml +++ b/checkstyle/checkstyle.xml @@ -27,6 +27,9 @@ + + + @@ -152,7 +155,7 @@ - + diff --git a/core/src/main/proto/common/v1/common.proto b/core/src/main/proto/common/v1/common.proto new file mode 100644 index 0000000000..8931b0e7d9 --- /dev/null +++ b/core/src/main/proto/common/v1/common.proto @@ -0,0 +1,81 @@ +// Copyright 2019, OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package kafka.automq.telemetry.proto.common.v1; + +option csharp_namespace = "OpenTelemetry.Proto.Common.V1"; +option java_multiple_files = true; +option java_package = "kafka.automq.telemetry.proto.common.v1"; +option java_outer_classname = "CommonProto"; +option go_package = "go.opentelemetry.io/proto/otlp/common/v1"; + +// AnyValue is used to represent any type of attribute value. AnyValue may contain a +// primitive value such as a string or integer or it may contain an arbitrary nested +// object containing arrays, key-value lists and primitives. +message AnyValue { + // The value is one of the listed fields. It is valid for all values to be unspecified + // in which case this AnyValue is considered to be "empty". + oneof value { + string string_value = 1; + bool bool_value = 2; + int64 int_value = 3; + double double_value = 4; + ArrayValue array_value = 5; + KeyValueList kvlist_value = 6; + bytes bytes_value = 7; + } +} + +// ArrayValue is a list of AnyValue messages. We need ArrayValue as a message +// since oneof in AnyValue does not allow repeated fields. +message ArrayValue { + // Array of values. The array may be empty (contain 0 elements). + repeated AnyValue values = 1; +} + +// KeyValueList is a list of KeyValue messages. We need KeyValueList as a message +// since `oneof` in AnyValue does not allow repeated fields. Everywhere else where we need +// a list of KeyValue messages (e.g. in Span) we use `repeated KeyValue` directly to +// avoid unnecessary extra wrapping (which slows down the protocol). The 2 approaches +// are semantically equivalent. +message KeyValueList { + // A collection of key/value pairs of key-value pairs. The list may be empty (may + // contain 0 elements). + // The keys MUST be unique (it is not allowed to have more than one + // value with the same key). + repeated KeyValue values = 1; +} + +// KeyValue is a key-value pair that is used to store Span attributes, Link +// attributes, etc. +message KeyValue { + string key = 1; + AnyValue value = 2; +} + +// InstrumentationScope is a message representing the instrumentation scope information +// such as the fully qualified name and version. +message InstrumentationScope { + // An empty instrumentation scope name means the name is unknown. + string name = 1; + string version = 2; + + // Additional attributes that describe the scope. [Optional]. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated KeyValue attributes = 3; + uint32 dropped_attributes_count = 4; +} diff --git a/core/src/main/proto/metrics/v1/metrics.proto b/core/src/main/proto/metrics/v1/metrics.proto new file mode 100644 index 0000000000..aad2f17185 --- /dev/null +++ b/core/src/main/proto/metrics/v1/metrics.proto @@ -0,0 +1,712 @@ +// Copyright 2019, OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package kafka.automq.telemetry.proto.metrics.v1; + +import "common/v1/common.proto"; +import "resource/v1/resource.proto"; + +option csharp_namespace = "OpenTelemetry.Proto.Metrics.V1"; +option java_multiple_files = true; +option java_package = "kafka.automq.telemetry.proto.metrics.v1"; +option java_outer_classname = "MetricsProto"; +option go_package = "go.opentelemetry.io/proto/otlp/metrics/v1"; + +// MetricsData represents the metrics data that can be stored in a persistent +// storage, OR can be embedded by other protocols that transfer OTLP metrics +// data but do not implement the OTLP protocol. +// +// MetricsData +// └─── ResourceMetrics +// ├── Resource +// ├── SchemaURL +// └── ScopeMetrics +// ├── Scope +// ├── SchemaURL +// └── Metric +// ├── Name +// ├── Description +// ├── Unit +// └── data +// ├── Gauge +// ├── Sum +// ├── Histogram +// ├── ExponentialHistogram +// └── Summary +// +// The main difference between this message and collector protocol is that +// in this message there will not be any "control" or "metadata" specific to +// OTLP protocol. +// +// When new fields are added into this message, the OTLP request MUST be updated +// as well. +message MetricsData { + // An array of ResourceMetrics. + // For data coming from a single resource this array will typically contain + // one element. Intermediary nodes that receive data from multiple origins + // typically batch the data before forwarding further and in that case this + // array will contain multiple elements. + repeated ResourceMetrics resource_metrics = 1; +} + +// A collection of ScopeMetrics from a Resource. +message ResourceMetrics { + reserved 1000; + + // The resource for the metrics in this message. + // If this field is not set then no resource info is known. + kafka.automq.telemetry.proto.resource.v1.Resource resource = 1; + + // A list of metrics that originate from a resource. + repeated ScopeMetrics scope_metrics = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the resource data + // is recorded in. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to the data in the "resource" field. It does not apply + // to the data in the "scope_metrics" field which have their own schema_url field. + string schema_url = 3; +} + +// A collection of Metrics produced by an Scope. +message ScopeMetrics { + // The instrumentation scope information for the metrics in this message. + // Semantically when InstrumentationScope isn't set, it is equivalent with + // an empty instrumentation scope name (unknown). + kafka.automq.telemetry.proto.common.v1.InstrumentationScope scope = 1; + + // A list of metrics that originate from an instrumentation library. + repeated Metric metrics = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the metric data + // is recorded in. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to all metrics in the "metrics" field. + string schema_url = 3; +} + +// Defines a Metric which has one or more timeseries. The following is a +// brief summary of the Metric data model. For more details, see: +// +// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md +// +// The data model and relation between entities is shown in the +// diagram below. Here, "DataPoint" is the term used to refer to any +// one of the specific data point value types, and "points" is the term used +// to refer to any one of the lists of points contained in the Metric. +// +// - Metric is composed of a metadata and data. +// - Metadata part contains a name, description, unit. +// - Data is one of the possible types (Sum, Gauge, Histogram, Summary). +// - DataPoint contains timestamps, attributes, and one of the possible value type +// fields. +// +// Metric +// +------------+ +// |name | +// |description | +// |unit | +------------------------------------+ +// |data |---> |Gauge, Sum, Histogram, Summary, ... | +// +------------+ +------------------------------------+ +// +// Data [One of Gauge, Sum, Histogram, Summary, ...] +// +-----------+ +// |... | // Metadata about the Data. +// |points |--+ +// +-----------+ | +// | +---------------------------+ +// | |DataPoint 1 | +// v |+------+------+ +------+ | +// +-----+ ||label |label |...|label | | +// | 1 |-->||value1|value2|...|valueN| | +// +-----+ |+------+------+ +------+ | +// | . | |+-----+ | +// | . | ||value| | +// | . | |+-----+ | +// | . | +---------------------------+ +// | . | . +// | . | . +// | . | . +// | . | +---------------------------+ +// | . | |DataPoint M | +// +-----+ |+------+------+ +------+ | +// | M |-->||label |label |...|label | | +// +-----+ ||value1|value2|...|valueN| | +// |+------+------+ +------+ | +// |+-----+ | +// ||value| | +// |+-----+ | +// +---------------------------+ +// +// Each distinct type of DataPoint represents the output of a specific +// aggregation function, the result of applying the DataPoint's +// associated function of to one or more measurements. +// +// All DataPoint types have three common fields: +// - Attributes includes key-value pairs associated with the data point +// - TimeUnixNano is required, set to the end time of the aggregation +// - StartTimeUnixNano is optional, but strongly encouraged for DataPoints +// having an AggregationTemporality field, as discussed below. +// +// Both TimeUnixNano and StartTimeUnixNano values are expressed as +// UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January 1970. +// +// # TimeUnixNano +// +// This field is required, having consistent interpretation across +// DataPoint types. TimeUnixNano is the moment corresponding to when +// the data point's aggregate value was captured. +// +// Data points with the 0 value for TimeUnixNano SHOULD be rejected +// by consumers. +// +// # StartTimeUnixNano +// +// StartTimeUnixNano in general allows detecting when a sequence of +// observations is unbroken. This field indicates to consumers the +// start time for points with cumulative and delta +// AggregationTemporality, and it should be included whenever possible +// to support correct rate calculation. Although it may be omitted +// when the start time is truly unknown, setting StartTimeUnixNano is +// strongly encouraged. +message Metric { + reserved 4, 6, 8; + + // name of the metric. + string name = 1; + + // description of the metric, which can be used in documentation. + string description = 2; + + // unit in which the metric value is reported. Follows the format + // described by http://unitsofmeasure.org/ucum.html. + string unit = 3; + + // Data determines the aggregation type (if any) of the metric, what is the + // reported value type for the data points, as well as the relatationship to + // the time interval over which they are reported. + oneof data { + Gauge gauge = 5; + Sum sum = 7; + Histogram histogram = 9; + ExponentialHistogram exponential_histogram = 10; + Summary summary = 11; + } + + // Additional metadata attributes that describe the metric. [Optional]. + // Attributes are non-identifying. + // Consumers SHOULD NOT need to be aware of these attributes. + // These attributes MAY be used to encode information allowing + // for lossless roundtrip translation to / from another data model. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated kafka.automq.telemetry.proto.common.v1.KeyValue metadata = 12; +} + +// Gauge represents the type of a scalar metric that always exports the +// "current value" for every data point. It should be used for an "unknown" +// aggregation. +// +// A Gauge does not support different aggregation temporalities. Given the +// aggregation is unknown, points cannot be combined using the same +// aggregation, regardless of aggregation temporalities. Therefore, +// AggregationTemporality is not included. Consequently, this also means +// "StartTimeUnixNano" is ignored for all data points. +message Gauge { + repeated NumberDataPoint data_points = 1; +} + +// Sum represents the type of a scalar metric that is calculated as a sum of all +// reported measurements over a time interval. +message Sum { + repeated NumberDataPoint data_points = 1; + + // aggregation_temporality describes if the aggregator reports delta changes + // since last report time, or cumulative changes since a fixed start time. + AggregationTemporality aggregation_temporality = 2; + + // If "true" means that the sum is monotonic. + bool is_monotonic = 3; +} + +// Histogram represents the type of a metric that is calculated by aggregating +// as a Histogram of all reported measurements over a time interval. +message Histogram { + repeated HistogramDataPoint data_points = 1; + + // aggregation_temporality describes if the aggregator reports delta changes + // since last report time, or cumulative changes since a fixed start time. + AggregationTemporality aggregation_temporality = 2; +} + +// ExponentialHistogram represents the type of a metric that is calculated by aggregating +// as a ExponentialHistogram of all reported double measurements over a time interval. +message ExponentialHistogram { + repeated ExponentialHistogramDataPoint data_points = 1; + + // aggregation_temporality describes if the aggregator reports delta changes + // since last report time, or cumulative changes since a fixed start time. + AggregationTemporality aggregation_temporality = 2; +} + +// Summary metric data are used to convey quantile summaries, +// a Prometheus (see: https://prometheus.io/docs/concepts/metric_types/#summary) +// and OpenMetrics (see: https://github.com/OpenObservability/OpenMetrics/blob/4dbf6075567ab43296eed941037c12951faafb92/protos/prometheus.proto#L45) +// data type. These data points cannot always be merged in a meaningful way. +// While they can be useful in some applications, histogram data points are +// recommended for new applications. +// Summary metrics do not have an aggregation temporality field. This is +// because the count and sum fields of a SummaryDataPoint are assumed to be +// cumulative values. +message Summary { + repeated SummaryDataPoint data_points = 1; +} + +// AggregationTemporality defines how a metric aggregator reports aggregated +// values. It describes how those values relate to the time interval over +// which they are aggregated. +enum AggregationTemporality { + // UNSPECIFIED is the default AggregationTemporality, it MUST not be used. + AGGREGATION_TEMPORALITY_UNSPECIFIED = 0; + + // DELTA is an AggregationTemporality for a metric aggregator which reports + // changes since last report time. Successive metrics contain aggregation of + // values from continuous and non-overlapping intervals. + // + // The values for a DELTA metric are based only on the time interval + // associated with one measurement cycle. There is no dependency on + // previous measurements like is the case for CUMULATIVE metrics. + // + // For example, consider a system measuring the number of requests that + // it receives and reports the sum of these requests every second as a + // DELTA metric: + // + // 1. The system starts receiving at time=t_0. + // 2. A request is received, the system measures 1 request. + // 3. A request is received, the system measures 1 request. + // 4. A request is received, the system measures 1 request. + // 5. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0 to + // t_0+1 with a value of 3. + // 6. A request is received, the system measures 1 request. + // 7. A request is received, the system measures 1 request. + // 8. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0+1 to + // t_0+2 with a value of 2. + AGGREGATION_TEMPORALITY_DELTA = 1; + + // CUMULATIVE is an AggregationTemporality for a metric aggregator which + // reports changes since a fixed start time. This means that current values + // of a CUMULATIVE metric depend on all previous measurements since the + // start time. Because of this, the sender is required to retain this state + // in some form. If this state is lost or invalidated, the CUMULATIVE metric + // values MUST be reset and a new fixed start time following the last + // reported measurement time sent MUST be used. + // + // For example, consider a system measuring the number of requests that + // it receives and reports the sum of these requests every second as a + // CUMULATIVE metric: + // + // 1. The system starts receiving at time=t_0. + // 2. A request is received, the system measures 1 request. + // 3. A request is received, the system measures 1 request. + // 4. A request is received, the system measures 1 request. + // 5. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0 to + // t_0+1 with a value of 3. + // 6. A request is received, the system measures 1 request. + // 7. A request is received, the system measures 1 request. + // 8. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0 to + // t_0+2 with a value of 5. + // 9. The system experiences a fault and loses state. + // 10. The system recovers and resumes receiving at time=t_1. + // 11. A request is received, the system measures 1 request. + // 12. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_1 to + // t_0+1 with a value of 1. + // + // Note: Even though, when reporting changes since last report time, using + // CUMULATIVE is valid, it is not recommended. This may cause problems for + // systems that do not use start_time to determine when the aggregation + // value was reset (e.g. Prometheus). + AGGREGATION_TEMPORALITY_CUMULATIVE = 2; +} + +// DataPointFlags is defined as a protobuf 'uint32' type and is to be used as a +// bit-field representing 32 distinct boolean flags. Each flag defined in this +// enum is a bit-mask. To test the presence of a single flag in the flags of +// a data point, for example, use an expression like: +// +// (point.flags & DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK) == DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK +// +enum DataPointFlags { + // The zero value for the enum. Should not be used for comparisons. + // Instead use bitwise "and" with the appropriate mask as shown above. + DATA_POINT_FLAGS_DO_NOT_USE = 0; + + // This DataPoint is valid but has no recorded value. This value + // SHOULD be used to reflect explicitly missing data in a series, as + // for an equivalent to the Prometheus "staleness marker". + DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK = 1; + + // Bits 2-31 are reserved for future use. +} + +// NumberDataPoint is a single data point in a timeseries that describes the +// time-varying scalar value of a metric. +message NumberDataPoint { + reserved 1; + + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated kafka.automq.telemetry.proto.common.v1.KeyValue attributes = 7; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // The value itself. A point is considered invalid when one of the recognized + // value fields is not present inside this oneof. + oneof value { + double as_double = 4; + sfixed64 as_int = 6; + } + + // (Optional) List of exemplars collected from + // measurements that were used to form the data point + repeated Exemplar exemplars = 5; + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 8; +} + +// HistogramDataPoint is a single data point in a timeseries that describes the +// time-varying values of a Histogram. A Histogram contains summary statistics +// for a population of values, it may optionally contain the distribution of +// those values across a set of buckets. +// +// If the histogram contains the distribution of values, then both +// "explicit_bounds" and "bucket counts" fields must be defined. +// If the histogram does not contain the distribution of values, then both +// "explicit_bounds" and "bucket_counts" must be omitted and only "count" and +// "sum" are known. +message HistogramDataPoint { + reserved 1; + + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated kafka.automq.telemetry.proto.common.v1.KeyValue attributes = 9; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // count is the number of values in the population. Must be non-negative. This + // value must be equal to the sum of the "count" fields in buckets if a + // histogram is provided. + fixed64 count = 4; + + // sum of the values in the population. If count is zero then this field + // must be zero. + // + // Note: Sum should only be filled out when measuring non-negative discrete + // events, and is assumed to be monotonic over the values of these events. + // Negative events *can* be recorded, but sum should not be filled out when + // doing so. This is specifically to enforce compatibility w/ OpenMetrics, + // see: https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#histogram + optional double sum = 5; + + // bucket_counts is an optional field contains the count values of histogram + // for each bucket. + // + // The sum of the bucket_counts must equal the value in the count field. + // + // The number of elements in bucket_counts array must be by one greater than + // the number of elements in explicit_bounds array. + repeated fixed64 bucket_counts = 6; + + // explicit_bounds specifies buckets with explicitly defined bounds for values. + // + // The boundaries for bucket at index i are: + // + // (-infinity, explicit_bounds[i]] for i == 0 + // (explicit_bounds[i-1], explicit_bounds[i]] for 0 < i < size(explicit_bounds) + // (explicit_bounds[i-1], +infinity) for i == size(explicit_bounds) + // + // The values in the explicit_bounds array must be strictly increasing. + // + // Histogram buckets are inclusive of their upper boundary, except the last + // bucket where the boundary is at infinity. This format is intentionally + // compatible with the OpenMetrics histogram definition. + repeated double explicit_bounds = 7; + + // (Optional) List of exemplars collected from + // measurements that were used to form the data point + repeated Exemplar exemplars = 8; + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 10; + + // min is the minimum value over (start_time, end_time]. + optional double min = 11; + + // max is the maximum value over (start_time, end_time]. + optional double max = 12; +} + +// ExponentialHistogramDataPoint is a single data point in a timeseries that describes the +// time-varying values of a ExponentialHistogram of double values. A ExponentialHistogram contains +// summary statistics for a population of values, it may optionally contain the +// distribution of those values across a set of buckets. +// +message ExponentialHistogramDataPoint { + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated kafka.automq.telemetry.proto.common.v1.KeyValue attributes = 1; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // count is the number of values in the population. Must be + // non-negative. This value must be equal to the sum of the "bucket_counts" + // values in the positive and negative Buckets plus the "zero_count" field. + fixed64 count = 4; + + // sum of the values in the population. If count is zero then this field + // must be zero. + // + // Note: Sum should only be filled out when measuring non-negative discrete + // events, and is assumed to be monotonic over the values of these events. + // Negative events *can* be recorded, but sum should not be filled out when + // doing so. This is specifically to enforce compatibility w/ OpenMetrics, + // see: https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#histogram + optional double sum = 5; + + // scale describes the resolution of the histogram. Boundaries are + // located at powers of the base, where: + // + // base = (2^(2^-scale)) + // + // The histogram bucket identified by `index`, a signed integer, + // contains values that are greater than (base^index) and + // less than or equal to (base^(index+1)). + // + // The positive and negative ranges of the histogram are expressed + // separately. Negative values are mapped by their absolute value + // into the negative range using the same scale as the positive range. + // + // scale is not restricted by the protocol, as the permissible + // values depend on the range of the data. + sint32 scale = 6; + + // zero_count is the count of values that are either exactly zero or + // within the region considered zero by the instrumentation at the + // tolerated degree of precision. This bucket stores values that + // cannot be expressed using the standard exponential formula as + // well as values that have been rounded to zero. + // + // Implementations MAY consider the zero bucket to have probability + // mass equal to (zero_count / count). + fixed64 zero_count = 7; + + // positive carries the positive range of exponential bucket counts. + Buckets positive = 8; + + // negative carries the negative range of exponential bucket counts. + Buckets negative = 9; + + // Buckets are a set of bucket counts, encoded in a contiguous array + // of counts. + message Buckets { + // Offset is the bucket index of the first entry in the bucket_counts array. + // + // Note: This uses a varint encoding as a simple form of compression. + sint32 offset = 1; + + // bucket_counts is an array of count values, where bucket_counts[i] carries + // the count of the bucket at index (offset+i). bucket_counts[i] is the count + // of values greater than base^(offset+i) and less than or equal to + // base^(offset+i+1). + // + // Note: By contrast, the explicit HistogramDataPoint uses + // fixed64. This field is expected to have many buckets, + // especially zeros, so uint64 has been selected to ensure + // varint encoding. + repeated uint64 bucket_counts = 2; + } + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 10; + + // (Optional) List of exemplars collected from + // measurements that were used to form the data point + repeated Exemplar exemplars = 11; + + // min is the minimum value over (start_time, end_time]. + optional double min = 12; + + // max is the maximum value over (start_time, end_time]. + optional double max = 13; + + // ZeroThreshold may be optionally set to convey the width of the zero + // region. Where the zero region is defined as the closed interval + // [-ZeroThreshold, ZeroThreshold]. + // When ZeroThreshold is 0, zero count bucket stores values that cannot be + // expressed using the standard exponential formula as well as values that + // have been rounded to zero. + double zero_threshold = 14; +} + +// SummaryDataPoint is a single data point in a timeseries that describes the +// time-varying values of a Summary metric. The count and sum fields represent +// cumulative values. +message SummaryDataPoint { + reserved 1; + + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated kafka.automq.telemetry.proto.common.v1.KeyValue attributes = 7; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // count is the number of values in the population. Must be non-negative. + fixed64 count = 4; + + // sum of the values in the population. If count is zero then this field + // must be zero. + // + // Note: Sum should only be filled out when measuring non-negative discrete + // events, and is assumed to be monotonic over the values of these events. + // Negative events *can* be recorded, but sum should not be filled out when + // doing so. This is specifically to enforce compatibility w/ OpenMetrics, + // see: https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#summary + double sum = 5; + + // Represents the value at a given quantile of a distribution. + // + // To record Min and Max values following conventions are used: + // - The 1.0 quantile is equivalent to the maximum value observed. + // - The 0.0 quantile is equivalent to the minimum value observed. + // + // See the following issue for more context: + // https://github.com/open-telemetry/opentelemetry-proto/issues/125 + message ValueAtQuantile { + // The quantile of a distribution. Must be in the interval + // [0.0, 1.0]. + double quantile = 1; + + // The value at the given quantile of a distribution. + // + // Quantile values must NOT be negative. + double value = 2; + } + + // (Optional) list of values at different quantiles of the distribution calculated + // from the current snapshot. The quantiles must be strictly increasing. + repeated ValueAtQuantile quantile_values = 6; + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 8; +} + +// A representation of an exemplar, which is a sample input measurement. +// Exemplars also hold information about the environment when the measurement +// was recorded, for example the span and trace ID of the active span when the +// exemplar was recorded. +message Exemplar { + reserved 1; + + // The set of key/value pairs that were filtered out by the aggregator, but + // recorded alongside the original measurement. Only key/value pairs that were + // filtered out by the aggregator should be included + repeated kafka.automq.telemetry.proto.common.v1.KeyValue filtered_attributes = 7; + + // time_unix_nano is the exact time when this exemplar was recorded + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 2; + + // The value of the measurement that was recorded. An exemplar is + // considered invalid when one of the recognized value fields is not present + // inside this oneof. + oneof value { + double as_double = 3; + sfixed64 as_int = 6; + } + + // (Optional) Span ID of the exemplar trace. + // span_id may be missing if the measurement is not recorded inside a trace + // or if the trace is not sampled. + bytes span_id = 4; + + // (Optional) Trace ID of the exemplar trace. + // trace_id may be missing if the measurement is not recorded inside a trace + // or if the trace is not sampled. + bytes trace_id = 5; +} diff --git a/core/src/main/proto/resource/v1/resource.proto b/core/src/main/proto/resource/v1/resource.proto new file mode 100644 index 0000000000..6290302531 --- /dev/null +++ b/core/src/main/proto/resource/v1/resource.proto @@ -0,0 +1,37 @@ +// Copyright 2019, OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package kafka.automq.telemetry.proto.resource.v1; + +import "common/v1/common.proto"; + +option csharp_namespace = "OpenTelemetry.Proto.Resource.V1"; +option java_multiple_files = true; +option java_package = "kafka.automq.telemetry.proto.resource.v1"; +option java_outer_classname = "ResourceProto"; +option go_package = "go.opentelemetry.io/proto/otlp/resource/v1"; + +// Resource information. +message Resource { + // Set of attributes that describe the resource. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated kafka.automq.telemetry.proto.common.v1.KeyValue attributes = 1; + + // dropped_attributes_count is the number of dropped attributes. If the value is 0, then + // no attributes were dropped. + uint32 dropped_attributes_count = 2; +} diff --git a/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/KafkaExportURI.java b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/KafkaExportURI.java new file mode 100644 index 0000000000..20aa391414 --- /dev/null +++ b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/KafkaExportURI.java @@ -0,0 +1,188 @@ +package kafka.log.stream.s3.telemetry.exporter; + +import org.apache.kafka.common.utils.Utils; + +import com.automq.stream.utils.URIUtils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * Kafka Export URI format: + *
{@code
+ * kafka://?bootstrapServers=host1:port1;host2:port2
+ *          &topic=metrics
+ *          &securityProtocol=PLAINTEXT
+ *          &saslMechanism=PLAIN
+ *          &saslUsername=admin
+ *          &saslPassword=secret
+ * }
+ * + *

Parameter Description:

+ *
    + *
  • bootstrapServers - Kafka cluster address, required
  • + *
  • topic - Kafka topic to which metrics are written, required
  • + *
  • securityProtocol - Security protocol (default: PLAINTEXT)
  • + *
  • saslMechanism - SASL authentication mechanism (default: PLAIN)
  • + *
  • saslUsername - SASL username (required when using SASL authentication)
  • + *
  • saslPassword - SASL password (required when using SASL authentication)
  • + *
+ */ +public final class KafkaExportURI { + private static final Logger LOGGER = LoggerFactory.getLogger(KafkaExportURI.class); + private static final String DEFAULT_SECURITY_PROTOCOL = "PLAINTEXT"; + private static final String DEFAULT_SASL_MECHANISM = "PLAIN"; + private static final String BOOTSTRAP_SERVERS_KEY = "bootstrapServers"; + private static final String TOPIC_KEY = "topic"; + private static final String SECURITY_PROTOCOL_KEY = "securityProtocol"; + private static final String SASL_MECHANISM_KEY = "saslMechanism"; + private static final String SASL_USERNAME_KEY = "saslUsername"; + private static final String SASL_PASSWORD_KEY = "saslPassword"; + + private final String bootstrapServers; + private final String topic; + private final String securityProtocol; + private final String saslMechanism; + private final String saslUsername; + private final String saslPassword; + + /** + * Constructor + * + * @param bootstrapServers Kafka cluster address, required + * @param topic Kafka topic to which metrics are written, required + * @param securityProtocol Security protocol (default: PLAINTEXT) + * @param saslMechanism SASL authentication mechanism (default: PLAIN) + * @param saslUsername SASL username (required when using SASL authentication) + * @param saslPassword SASL password (required when using SASL authentication) + */ + public KafkaExportURI( + String bootstrapServers, + String topic, + String securityProtocol, + String saslMechanism, + String saslUsername, + String saslPassword + ) { + validate(bootstrapServers, topic, securityProtocol, saslMechanism, saslUsername, saslPassword); + this.bootstrapServers = bootstrapServers; + this.topic = topic; + this.securityProtocol = securityProtocol != null ? securityProtocol : DEFAULT_SECURITY_PROTOCOL; + this.saslMechanism = saslMechanism != null ? saslMechanism : DEFAULT_SASL_MECHANISM; + this.saslUsername = saslUsername; + this.saslPassword = saslPassword; + } + + // Getters keep the same access way as a record + public String bootstrapServers() { + return bootstrapServers; + } + + public String topic() { + return topic; + } + + public String securityProtocol() { + return securityProtocol; + } + + public String saslMechanism() { + return saslMechanism; + } + + public String saslUsername() { + return saslUsername; + } + + public String saslPassword() { + return saslPassword; + } + + private static void validate(String bootstrapServers, String topic, String securityProtocol, + String saslMechanism, String saslUsername, String saslPassword) { + // Keep the original validation logic... + if (Utils.isBlank(bootstrapServers)) { + throw new IllegalArgumentException("bootstrapServers must be specified"); + } + if (Utils.isBlank(topic)) { + throw new IllegalArgumentException("topic must be specified"); + } + if (securityProtocol != null && securityProtocol.startsWith("SASL_")) { + if (Utils.isBlank(saslUsername) || Utils.isBlank(saslPassword)) { + throw new IllegalArgumentException( + "saslUsername and saslPassword must be specified when using SASL security protocol"); + } + if (!"PLAIN".equals(saslMechanism) && !"SCRAM-SHA-256".equals(saslMechanism) && !"SCRAM-SHA-512".equals(saslMechanism)) { + throw new IllegalArgumentException("Invalid SASL mechanism: " + saslMechanism); + } + } + } + + // Keep the original static factory method + public static KafkaExportURI parse(String uriStr) { + try { + URI uri = new URI(uriStr); + Map> queries = URIUtils.splitQuery(uri); + + String bootstrapServers = URIUtils.getString(queries, BOOTSTRAP_SERVERS_KEY, ""); + String topic = URIUtils.getString(queries, TOPIC_KEY, ""); + String securityProtocol = URIUtils.getString(queries, SECURITY_PROTOCOL_KEY, DEFAULT_SECURITY_PROTOCOL); + String saslMechanism = URIUtils.getString(queries, SASL_MECHANISM_KEY, DEFAULT_SASL_MECHANISM); + String saslUsername = URIUtils.getString(queries, SASL_USERNAME_KEY, ""); + String saslPassword = URIUtils.getString(queries, SASL_PASSWORD_KEY, ""); + + return new KafkaExportURI( + bootstrapServers, + topic, + securityProtocol, + saslMechanism, + saslUsername, + saslPassword + ); + } catch (URISyntaxException e) { + LOGGER.error("Invalid Kafka export URI: {}", uriStr, e); + throw new IllegalArgumentException("Invalid Kafka export URI: " + uriStr); + } + } + + // Override equals/hashCode/toString to keep behavior similar to a record + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + KafkaExportURI that = (KafkaExportURI) o; + return Objects.equals(bootstrapServers, that.bootstrapServers) && + Objects.equals(topic, that.topic) && + Objects.equals(securityProtocol, that.securityProtocol) && + Objects.equals(saslMechanism, that.saslMechanism) && + Objects.equals(saslUsername, that.saslUsername) && + Objects.equals(saslPassword, that.saslPassword); + } + + @Override + public int hashCode() { + return Objects.hash(bootstrapServers, topic, securityProtocol, saslMechanism, saslUsername, saslPassword); + } + + @Override + public String toString() { + return "KafkaExportURI[" + + "bootstrapServers=" + bootstrapServers + ", " + + "topic=" + topic + ", " + + "securityProtocol=" + securityProtocol + ", " + + "saslMechanism=" + saslMechanism + ", " + + "saslUsername=" + saslUsername + ", " + + "saslPassword=" + (saslPassword != null ? "******" : null) + + ']'; + } +} diff --git a/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/KafkaExporter.java b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/KafkaExporter.java new file mode 100644 index 0000000000..f387db94ac --- /dev/null +++ b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/KafkaExporter.java @@ -0,0 +1,558 @@ +package kafka.log.stream.s3.telemetry.exporter; + +import kafka.automq.telemetry.proto.metrics.v1.ResourceMetrics; + +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.Metric; +import org.apache.kafka.common.MetricName; +import org.apache.kafka.common.config.SaslConfigs; +import org.apache.kafka.common.serialization.ByteArraySerializer; + +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.metrics.InstrumentType; +import io.opentelemetry.sdk.metrics.data.AggregationTemporality; +import io.opentelemetry.sdk.metrics.data.DoublePointData; +import io.opentelemetry.sdk.metrics.data.HistogramPointData; +import io.opentelemetry.sdk.metrics.data.LongPointData; +import io.opentelemetry.sdk.metrics.data.MetricData; +import io.opentelemetry.sdk.metrics.data.SummaryPointData; +import io.opentelemetry.sdk.metrics.export.MetricExporter; +import io.opentelemetry.sdk.metrics.internal.data.ImmutableGaugeData; +import io.opentelemetry.sdk.metrics.internal.data.ImmutableHistogramData; +import io.opentelemetry.sdk.metrics.internal.data.ImmutableMetricData; +import io.opentelemetry.sdk.metrics.internal.data.ImmutableSumData; +import io.opentelemetry.sdk.metrics.internal.data.ImmutableSummaryData; + +/** + * Kafka Metrics Exporter implementation to export OpenTelemetry metrics to a Kafka topic. + */ +public class KafkaExporter implements MetricExporter { + + private static final Logger LOGGER = LoggerFactory.getLogger(KafkaExporter.class); + + private static final int MAX_SAMPLES_PER_RECORD = 1000; + // 1MB + private static final int MAX_RECORD_SIZE = 1024 * 1024; + + // region Configuration constants + private static final Map DEFAULT_CONFIG = Map.of( + ProducerConfig.ACKS_CONFIG, "all", + ProducerConfig.RETRIES_CONFIG, 3, + ProducerConfig.LINGER_MS_CONFIG, 500, + ProducerConfig.BATCH_SIZE_CONFIG, 1_048_576, + // Explicitly set the buffer size (32MB) + ProducerConfig.BUFFER_MEMORY_CONFIG, 32_000_000, + // Maximum blocking time before sending (5 seconds) + ProducerConfig.MAX_BLOCK_MS_CONFIG, 5_000 + ); + + private final KafkaProducer producer; + private final String topic; + private final MetricsSerializer serializer; + private final ExecutorService callbackExecutor; + + /** + * Constructor required for production environments. + */ + public KafkaExporter(KafkaExportURI config) { + this.topic = validateTopic(config.topic()); + this.serializer = new ProtoMetricsSerializer(); + + Map producerConfig = new HashMap<>(DEFAULT_CONFIG); + // Key: Must forcefully specify key configurations + // Due to the requirement of kafka.automq.AutoMQConfig.S3_TELEMETRY_METRICS_EXPORTER_URI_DOC, when the broker server configuration cannot be separated by ',', ';' is used. When creating the producer, ';' should be converted to ',' according to the Kafka format specification. + producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.bootstrapServers().replaceAll(";", ",")); + producerConfig.put(ProducerConfig.CLIENT_ID_CONFIG, "otel-kafka-exporter-" + UUID.randomUUID()); + configureSecurity(producerConfig, config); + + this.producer = new KafkaProducer<>( + producerConfig, + new ByteArraySerializer(), + new ByteArraySerializer() + ); + + // Use a bounded queue to prevent memory overflow + this.callbackExecutor = new ThreadPoolExecutor( + 1, 1, + 0L, TimeUnit.MILLISECONDS, + new LinkedBlockingQueue<>(1000), + r -> new Thread(r, "kafka-exporter-callback"), + new ThreadPoolExecutor.AbortPolicy() + ); + } + + // Add a method to the KafkaExporter class: + private boolean isProducerOverloaded() { + try { + Map metrics = producer.metrics(); + if (metrics == null) { + return false; + } + + double bufferTotal = -1; + double bufferAvailable = -1; + + // Iterate through all metrics and match by name (avoid directly constructing MetricName) + for (Map.Entry entry : metrics.entrySet()) { + MetricName metricName = entry.getKey(); + Metric metric = entry.getValue(); + // Filter key metrics belonging to the producer-metrics group + if ("producer-metrics".equals(metricName.group())) { + Object value = metric.metricValue(); + switch (metricName.name()) { + case "buffer-total-bytes": + if (value instanceof Double) { + bufferTotal = (double) value; + } + break; + case "buffer-available-bytes": + if (value instanceof Double) { + bufferAvailable = (double) value; + } + break; + default: + break; + } + + } + } + + if (bufferTotal > 0 && bufferAvailable >= 0) { + double used = bufferTotal - bufferAvailable; + return used / bufferTotal > 0.9; + } + return false; + } catch (Exception e) { + LOGGER.error("Failed to check producer buffer state", e); + return false; + } + } + + /** + * Security protocol configuration (supports multiple mechanisms). + */ + private void configureSecurity(Map config, KafkaExportURI exportConfig) { + String protocol = exportConfig.securityProtocol().toUpperCase(Locale.ROOT); + config.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, protocol); + + if (protocol.startsWith("SASL_")) { + String saslMechanism = exportConfig.saslMechanism(); + // Dynamically generate JAAS configuration based on the mechanism + switch (saslMechanism) { + case "PLAIN": + config.put(SaslConfigs.SASL_MECHANISM, saslMechanism); + config.put(SaslConfigs.SASL_JAAS_CONFIG, + String.format("org.apache.kafka.common.security.plain.PlainLoginModule required username=\"%s\" password=\"%s\";", + exportConfig.saslUsername(), + exportConfig.saslPassword())); + break; + default: + throw new IllegalArgumentException("Unsupported SASL mechanism: " + saslMechanism); + } + config.put(SaslConfigs.SASL_MECHANISM, saslMechanism); + } + } + + @Override + public CompletableResultCode export(@NotNull Collection metrics) { + // Use an independent ResultCode to track each batch + CompletableResultCode resultCode = new CompletableResultCode(); + if (metrics.isEmpty()) { + return resultCode.succeed(); + } + + // Check for backpressure + if (isProducerOverloaded()) { + LOGGER.warn("Producer buffer overloaded, discarding metrics to protect memory"); + // Discard data and return failure to make OpenTelemetry temporarily silent + return CompletableResultCode.ofFailure(); + } + + List> futures = new ArrayList<>(); + try { + for (MetricData metric : metrics) { + List serializedChunks = splitMetricData(metric); + for (byte[] value : serializedChunks) { + if (value == null || value.length == 0) { + LOGGER.warn("Skipping invalid metric: {}", metric.getName()); + continue; + } + if (value.length > MAX_RECORD_SIZE) { + LOGGER.warn("Metric data size exceeds 1MB, discarding: {}", metric.getName()); + continue; + } + ProducerRecord record = new ProducerRecord<>(topic, null, value); + // Add callback tracking + futures.add(producer.send(record, new LoggingCallback(metric))); + } + } + + // Asynchronously track the sending status + callbackExecutor.submit(() -> { + boolean allSuccess = true; + for (Future future : futures) { + try { + future.get(30, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + allSuccess = false; + } catch (Exception e) { + LOGGER.error("Failed to send metric batch", e); + allSuccess = false; + } + } + if (allSuccess) { + resultCode.succeed(); + } else { + resultCode.fail(); + } + }); + + return resultCode; + } catch (RejectedExecutionException e) { + LOGGER.error("Callback queue is full, metrics export rejected", e); + return CompletableResultCode.ofFailure(); + } catch (Exception e) { + LOGGER.error("Metric serialization failed", e); + return CompletableResultCode.ofFailure(); + } + } + + @Override + public CompletableResultCode flush() { + CompletableResultCode result = new CompletableResultCode(); + try { + producer.flush(); + result.succeed(); + } catch (Exception e) { + LOGGER.warn("Flush failed", e); + result.fail(); + } + return result; + } + + @Override + public CompletableResultCode shutdown() { + CompletableResultCode result = new CompletableResultCode(); + try { + // Shutdown in stages + callbackExecutor.shutdown(); + if (!callbackExecutor.awaitTermination(30, TimeUnit.SECONDS)) { + LOGGER.warn("Force shutdown callback executor"); + callbackExecutor.shutdownNow(); + } + + // Key: Must close the producer! + // Allow buffered data to be sent + producer.close(Duration.ofSeconds(30)); + result.succeed(); + } catch (Exception e) { + LOGGER.error("Shutdown failed", e); + result.fail(); + } + return result; + } + + // Helper methods + @Override + public AggregationTemporality getAggregationTemporality(@NotNull InstrumentType instrumentType) { + return AggregationTemporality.CUMULATIVE; + } + + private String validateTopic(String topic) { + if (topic == null || topic.trim().isEmpty()) { + throw new IllegalArgumentException("Kafka topic must be specified"); + } + return topic; + } + + /** + * Enhanced callback (binds original metric information). + */ + private static class LoggingCallback implements Callback { + private final String metricName; + + LoggingCallback(MetricData metric) { + this.metricName = metric != null ? metric.getName() : "unknown"; + } + + @Override + public void onCompletion(RecordMetadata metadata, Exception exception) { + if (exception != null) { + // Key: Correctly handle the case where metadata may be null + String topicInfo = metadata != null ? metadata.topic() : "unknown-topic"; + LOGGER.error("Failed to send metric [{}] to {}: {}", + metricName, topicInfo, exception.getMessage()); + } else if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Sent [{}] to {}-{}@{}", + metricName, metadata.topic(), metadata.partition(), metadata.offset()); + } + } + } + + private List splitMetricData(MetricData metric) { + List chunks = new ArrayList<>(); + switch (metric.getType()) { + case LONG_GAUGE: + processLongGauge(metric, chunks); + break; + case DOUBLE_GAUGE: + processDoubleGauge(metric, chunks); + break; + case LONG_SUM: + processLongSum(metric, chunks); + break; + case DOUBLE_SUM: + processDoubleSum(metric, chunks); + break; + case HISTOGRAM: + processHistogram(metric, chunks); + break; + case SUMMARY: + processSummary(metric, chunks); + break; + default: + LOGGER.warn("Unsupported metric type: {}", metric.getType()); + break; + } + return chunks; + } + + // LongGauge processor + private void processLongGauge(MetricData metric, List chunks) { + List points = new ArrayList<>(metric.getLongGaugeData().getPoints()); + int pointCount = points.size(); + + if (pointCount <= MAX_SAMPLES_PER_RECORD) { + chunks.add(serializer.serialize(metric)); + return; + } + + for (int i = 0; i < pointCount; i += MAX_SAMPLES_PER_RECORD) { + List subPoints = getSubList(points, i); + MetricData subMetric = buildLongGaugeMetric(metric, subPoints); + chunks.add(serializer.serialize(subMetric)); + } + } + + private MetricData buildLongGaugeMetric(MetricData original, List subPoints) { + return ImmutableMetricData.createLongGauge( + original.getResource(), + original.getInstrumentationScopeInfo(), + original.getName(), + original.getDescription(), + original.getUnit(), + ImmutableGaugeData.create(subPoints) + ); + } + + // DoubleGauge processor + private void processDoubleGauge(MetricData metric, List chunks) { + List points = new ArrayList<>(metric.getDoubleGaugeData().getPoints()); + int pointCount = points.size(); + + if (pointCount <= MAX_SAMPLES_PER_RECORD) { + chunks.add(serializer.serialize(metric)); + return; + } + + for (int i = 0; i < pointCount; i += MAX_SAMPLES_PER_RECORD) { + List subPoints = getSubList(points, i); + MetricData subMetric = buildDoubleGaugeMetric(metric, subPoints); + chunks.add(serializer.serialize(subMetric)); + } + } + + private MetricData buildDoubleGaugeMetric(MetricData original, List subPoints) { + return ImmutableMetricData.createDoubleGauge( + original.getResource(), + original.getInstrumentationScopeInfo(), + original.getName(), + original.getDescription(), + original.getUnit(), + ImmutableGaugeData.create(subPoints) + ); + } + + // LongSum processor + private void processLongSum(MetricData metric, List chunks) { + List points = new ArrayList<>(metric.getLongSumData().getPoints()); + int pointCount = points.size(); + + if (pointCount <= MAX_SAMPLES_PER_RECORD) { + chunks.add(serializer.serialize(metric)); + return; + } + + for (int i = 0; i < pointCount; i += MAX_SAMPLES_PER_RECORD) { + List subPoints = getSubList(points, i); + MetricData subMetric = buildLongSumMetric(metric, subPoints); + chunks.add(serializer.serialize(subMetric)); + } + } + + private MetricData buildLongSumMetric(MetricData original, List subPoints) { + return ImmutableMetricData.createLongSum( + original.getResource(), + original.getInstrumentationScopeInfo(), + original.getName(), + original.getDescription(), + original.getUnit(), + ImmutableSumData.create( + original.getLongSumData().isMonotonic(), + original.getLongSumData().getAggregationTemporality(), + subPoints + ) + ); + } + + // DoubleSum processor + private void processDoubleSum(MetricData metric, List chunks) { + List points = new ArrayList<>(metric.getDoubleSumData().getPoints()); + int pointCount = points.size(); + + if (pointCount <= MAX_SAMPLES_PER_RECORD) { + chunks.add(serializer.serialize(metric)); + return; + } + + for (int i = 0; i < pointCount; i += MAX_SAMPLES_PER_RECORD) { + List subPoints = getSubList(points, i); + MetricData subMetric = buildDoubleSumMetric(metric, subPoints); + chunks.add(serializer.serialize(subMetric)); + } + } + + private MetricData buildDoubleSumMetric(MetricData original, List subPoints) { + return ImmutableMetricData.createDoubleSum( + original.getResource(), + original.getInstrumentationScopeInfo(), + original.getName(), + original.getDescription(), + original.getUnit(), + ImmutableSumData.create( + original.getDoubleSumData().isMonotonic(), + original.getDoubleSumData().getAggregationTemporality(), + subPoints + ) + ); + } + + // Histogram processor + private void processHistogram(MetricData metric, List chunks) { + List points = new ArrayList<>(metric.getHistogramData().getPoints()); + int pointCount = points.size(); + + if (pointCount <= MAX_SAMPLES_PER_RECORD) { + chunks.add(serializer.serialize(metric)); + return; + } + + for (int i = 0; i < pointCount; i += MAX_SAMPLES_PER_RECORD) { + List subPoints = getSubList(points, i); + MetricData subMetric = buildHistogramMetric(metric, subPoints); + chunks.add(serializer.serialize(subMetric)); + } + } + + private MetricData buildHistogramMetric(MetricData original, List subPoints) { + return ImmutableMetricData.createDoubleHistogram( + original.getResource(), + original.getInstrumentationScopeInfo(), + original.getName(), + original.getDescription(), + original.getUnit(), + ImmutableHistogramData.create( + original.getHistogramData().getAggregationTemporality(), + subPoints + ) + ); + } + + // Summary processor + private void processSummary(MetricData metric, List chunks) { + List points = new ArrayList<>(metric.getSummaryData().getPoints()); + int pointCount = points.size(); + + if (pointCount <= MAX_SAMPLES_PER_RECORD) { + chunks.add(serializer.serialize(metric)); + return; + } + + for (int i = 0; i < pointCount; i += MAX_SAMPLES_PER_RECORD) { + List subPoints = getSubList(points, i); + MetricData subMetric = buildSummaryMetric(metric, subPoints); + chunks.add(serializer.serialize(subMetric)); + } + } + + private MetricData buildSummaryMetric(MetricData original, List subPoints) { + return ImmutableMetricData.createDoubleSummary( + original.getResource(), + original.getInstrumentationScopeInfo(), + original.getName(), + original.getDescription(), + original.getUnit(), + ImmutableSummaryData.create(subPoints) + ); + } + + // region Common utility methods + private List getSubList(List points, int startIndex) { + int endIndex = Math.min(startIndex + MAX_SAMPLES_PER_RECORD, points.size()); + return points.subList(startIndex, endIndex); + } + + /** + * Metric serialization interface (supports extending multiple formats). + */ + interface MetricsSerializer { + byte[] serialize(MetricData metric); + } + + /** + * Protobuf serialization implementation (compatible with OTLP format). + */ + static class ProtoMetricsSerializer implements MetricsSerializer { + + ProtoMetricsSerializer() { + } + + @Override + public byte[] serialize(MetricData metric) { + try { + // Call the internal utility class to convert MetricData to Proto ResourceMetrics + // Directly call the internal utility class to convert data + ResourceMetrics resourceMetrics = new MetricProtoConverter().convertToResourceMetrics(metric); + return resourceMetrics.toByteArray(); + } catch (Throwable e) { + LOGGER.error("Fail to serialize metric to OTLP format", e); + return new byte[0]; + } + } + } +} diff --git a/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/KafkaMetricsExporter.java b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/KafkaMetricsExporter.java new file mode 100644 index 0000000000..65fbb88740 --- /dev/null +++ b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/KafkaMetricsExporter.java @@ -0,0 +1,59 @@ +package kafka.log.stream.s3.telemetry.exporter; + +import org.apache.kafka.common.utils.Utils; + +import com.automq.stream.utils.URIUtils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URI; +import java.time.Duration; +import java.util.List; +import java.util.Map; + +import io.opentelemetry.sdk.metrics.export.MetricReader; +import io.opentelemetry.sdk.metrics.export.PeriodicMetricReader; +import io.opentelemetry.sdk.metrics.export.PeriodicMetricReaderBuilder; + +public class KafkaMetricsExporter implements MetricsExporter { + + private static final Logger LOGGER = LoggerFactory.getLogger(KafkaMetricsExporter.class); + private static final String METRICS_COLLECTION_PERIOD = "collectionPeriod"; + private int intervalSeconds = 60; + private final KafkaExportURI kafkaExportURI; + + public KafkaMetricsExporter(String kafkaExportURIStr) { + if (Utils.isBlank(kafkaExportURIStr)) { + throw new IllegalArgumentException("Kafka export URI is required"); + } + try { + URI uri = new URI(kafkaExportURIStr); + Map> queries = URIUtils.splitQuery(uri); + // default to 60 seconds + this.intervalSeconds = Integer.parseInt(URIUtils.getString(queries, METRICS_COLLECTION_PERIOD, "60")); + } catch (Exception e) { + LOGGER.error("Invalid Kafka export URI: {}", kafkaExportURIStr, e); + throw new IllegalArgumentException("Invalid Kafka export URI: " + kafkaExportURIStr, e); + } + + this.kafkaExportURI = KafkaExportURI.parse(kafkaExportURIStr); + LOGGER.info("KafkaMetricsExporter initialized with kafkaExportURI: {}, intervalMs: {}", + kafkaExportURI, intervalSeconds); + } + + public int getIntervalSeconds() { + return intervalSeconds; + } + + public KafkaExportURI getKafkaExportURI() { + return kafkaExportURI; + } + + @Override + public MetricReader asMetricReader() { + KafkaExporter kafkaExporter = new KafkaExporter(kafkaExportURI); + PeriodicMetricReaderBuilder builder = PeriodicMetricReader.builder(kafkaExporter); + return builder.setInterval(Duration.ofSeconds(intervalSeconds)).build(); + } +} diff --git a/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/MetricProtoConverter.java b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/MetricProtoConverter.java new file mode 100644 index 0000000000..081243f840 --- /dev/null +++ b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/MetricProtoConverter.java @@ -0,0 +1,377 @@ +package kafka.log.stream.s3.telemetry.exporter; + +import kafka.automq.telemetry.proto.common.v1.AnyValue; +import kafka.automq.telemetry.proto.common.v1.ArrayValue; +import kafka.automq.telemetry.proto.common.v1.InstrumentationScope; +import kafka.automq.telemetry.proto.common.v1.KeyValue; +import kafka.automq.telemetry.proto.metrics.v1.AggregationTemporality; +import kafka.automq.telemetry.proto.metrics.v1.Exemplar; +import kafka.automq.telemetry.proto.metrics.v1.ExponentialHistogram; +import kafka.automq.telemetry.proto.metrics.v1.ExponentialHistogramDataPoint; +import kafka.automq.telemetry.proto.metrics.v1.Gauge; +import kafka.automq.telemetry.proto.metrics.v1.Histogram; +import kafka.automq.telemetry.proto.metrics.v1.HistogramDataPoint; +import kafka.automq.telemetry.proto.metrics.v1.Metric; +import kafka.automq.telemetry.proto.metrics.v1.NumberDataPoint; +import kafka.automq.telemetry.proto.metrics.v1.ResourceMetrics; +import kafka.automq.telemetry.proto.metrics.v1.ScopeMetrics; +import kafka.automq.telemetry.proto.metrics.v1.Sum; +import kafka.automq.telemetry.proto.resource.v1.Resource; + +import com.google.protobuf.ByteString; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.sdk.common.InstrumentationScopeInfo; +import io.opentelemetry.sdk.metrics.data.DoubleExemplarData; +import io.opentelemetry.sdk.metrics.data.DoublePointData; +import io.opentelemetry.sdk.metrics.data.ExemplarData; +import io.opentelemetry.sdk.metrics.data.ExponentialHistogramBuckets; +import io.opentelemetry.sdk.metrics.data.ExponentialHistogramData; +import io.opentelemetry.sdk.metrics.data.ExponentialHistogramPointData; +import io.opentelemetry.sdk.metrics.data.GaugeData; +import io.opentelemetry.sdk.metrics.data.HistogramData; +import io.opentelemetry.sdk.metrics.data.HistogramPointData; +import io.opentelemetry.sdk.metrics.data.LongExemplarData; +import io.opentelemetry.sdk.metrics.data.LongPointData; +import io.opentelemetry.sdk.metrics.data.MetricData; +import io.opentelemetry.sdk.metrics.data.PointData; +import io.opentelemetry.sdk.metrics.data.SumData; + +/** + * OpenTelemetry metric data converter in OTLP Protobuf format. + * + *

This utility class is used to convert {@link MetricData} objects into + * {@link org.apache.kafka.shaded.io.opentelemetry.proto.metrics.v1.ResourceMetrics} objects + * that comply with the OTLP/Protobuf specification. + * It is suitable for scenarios where metric data needs to be directly serialized and sent. + * + *

Main Features

+ *
    + *
  • Supports all OpenTelemetry metric types (Gauge/Sum/Histogram/ExponentialHistogram)
  • + *
  • Automatically handles differences between Long and Double type data points
  • + *
  • Correctly maps metadata such as resources, metrics, and instrumentation scope information
  • + *
  • Is compatible with the OpenTelemetry Java SDK version 1.32.0 and above
  • + *
+ * + *

Usage Example

+ *
{@code
+ * // Convert MetricData
+ * MetricProtoConverter converter = new MetricProtoConverter();
+ * ResourceMetrics protoMetrics = converter.convertToResourceMetrics(metricData);
+ *
+ * // Serialize to a byte stream
+ * byte[] bytes = protoMetrics.toByteArray();
+ * }
+ * 
+ * + *

Version Compatibility

+ *
    + *
  • OpenTelemetry SDK: Requires version {@code >= 1.32.0} (due to metric API refactoring)
  • + *
  • Protobuf Dependency: Uses {@code opentelemetry-proto 1.4.0-alpha}
  • + *
+ * + *

Thread Safety

+ * This class has no internal state, and all methods are pure functions without side effects. + * It can be called thread-safely. + * + *

Error Handling

+ * When an unsupported metric type is encountered, an {@link IllegalArgumentException} will be thrown. + * The caller needs to catch and handle it. + * + * @see OTLP Protocol Specification + * @see org.apache.kafka.shaded.io.opentelemetry.proto.metrics.v1.ResourceMetrics Protobuf data structure + */ +public class MetricProtoConverter { + + public ResourceMetrics convertSingleResourceMetrics(List metrics) { + if (metrics.isEmpty()) { + return ResourceMetrics.getDefaultInstance(); + } + + // Assume that all MetricData in the same batch belong to the same Resource + io.opentelemetry.sdk.resources.Resource resource = metrics.get(0).getResource(); + ResourceMetrics.Builder builder = ResourceMetrics.newBuilder() + .setResource(convertResource(resource)); + + // Group by InstrumentationScope + Map> grouped = metrics.stream() + .collect(Collectors.groupingBy(MetricData::getInstrumentationScopeInfo)); + + for (Map.Entry> entry : grouped.entrySet()) { + ScopeMetrics.Builder scopeMetricsBuilder = ScopeMetrics.newBuilder() + .setScope(convertInstrumentationScope(entry.getKey())); + + for (MetricData metric : entry.getValue()) { + scopeMetricsBuilder.addMetrics(convertMetric(metric)); + } + builder.addScopeMetrics(scopeMetricsBuilder.build()); + } + return builder.build(); + } + + /** + * Use the Public API: Single MetricData → ResourceMetrics + * @param metricData A single MetricData + * @return ResourceMetrics + */ + public ResourceMetrics convertToResourceMetrics(MetricData metricData) { + return ResourceMetrics.newBuilder() + .setResource(convertResource(metricData.getResource())) + .addScopeMetrics(convertScopeMetrics(metricData)) + .build(); + } + + private Resource convertResource(io.opentelemetry.sdk.resources.Resource sdkResource) { + Resource.Builder builder = Resource.newBuilder(); + sdkResource.getAttributes().forEach((key, value) -> + builder.addAttributes(KeyValue.newBuilder() + .setKey(key.getKey()) + .setValue(convertAnyValue(value)) + .build()) + ); + return builder.build(); + } + + private ScopeMetrics convertScopeMetrics(MetricData metricData) { + return ScopeMetrics.newBuilder() + .setScope(convertInstrumentationScope(metricData.getInstrumentationScopeInfo())) + .addMetrics(convertMetric(metricData)) + .build(); + } + + private InstrumentationScope convertInstrumentationScope(InstrumentationScopeInfo scope) { + return InstrumentationScope.newBuilder() + .setName(scope.getName() == null ? "" : scope.getName()) + .setVersion(scope.getVersion() == null ? "" : scope.getVersion()) + .build(); + } + + private Metric convertMetric(MetricData metricData) { + Metric.Builder builder = Metric.newBuilder() + .setName(metricData.getName()) + .setDescription(metricData.getDescription()) + .setUnit(metricData.getUnit()); + + switch (metricData.getType()) { + case LONG_GAUGE: + case DOUBLE_GAUGE: + builder.setGauge(convertGauge((GaugeData) metricData.getData())); + break; + case LONG_SUM: + case DOUBLE_SUM: + builder.setSum(convertSum((SumData) metricData.getData())); + break; + case HISTOGRAM: + builder.setHistogram(convertHistogram((HistogramData) metricData.getData())); + break; + case EXPONENTIAL_HISTOGRAM: + builder.setExponentialHistogram(convertExponentialHistogram((ExponentialHistogramData) metricData.getData())); + break; + default: + throw new IllegalArgumentException("Unsupported metric type: " + metricData.getType()); + } + + return builder.build(); + } + + private Sum convertSum(SumData sumData) { + Sum.Builder sumBuilder = Sum.newBuilder() + .setAggregationTemporality(convertTemporality(sumData.getAggregationTemporality())) + .setIsMonotonic(sumData.isMonotonic()); + + for (PointData point : sumData.getPoints()) { + sumBuilder.addDataPoints(convertNumberPoint(point)); + } + + return sumBuilder.build(); + } + + private Gauge convertGauge(GaugeData gaugeData) { + Gauge.Builder gaugeBuilder = Gauge.newBuilder(); + for (PointData point : gaugeData.getPoints()) { + NumberDataPoint protoPoint = convertNumberPoint(point); + gaugeBuilder.addDataPoints(protoPoint); + } + return gaugeBuilder.build(); + } + + private NumberDataPoint convertNumberPoint(PointData point) { + NumberDataPoint.Builder protoPoint = NumberDataPoint.newBuilder() + .setStartTimeUnixNano(point.getStartEpochNanos()) + .setTimeUnixNano(point.getEpochNanos()) + .addAllAttributes(convertAttributes(point.getAttributes())) + .addAllExemplars(convertExemplars(point.getExemplars(), point instanceof LongPointData)); + + // Handle values based on type (compatible with old and new versions) + if (point instanceof LongPointData) { + protoPoint.setAsInt(((LongPointData) point).getValue()); + } else if (point instanceof DoublePointData) { + protoPoint.setAsDouble(((DoublePointData) point).getValue()); + } else { + throw new IllegalArgumentException("Unsupported point type: " + point.getClass()); + } + + return protoPoint.build(); + } + + private Histogram convertHistogram(HistogramData histogramData) { + Histogram.Builder builder = Histogram.newBuilder() + .setAggregationTemporality(convertTemporality(histogramData.getAggregationTemporality())); + + for (HistogramPointData point : histogramData.getPoints()) { + builder.addDataPoints(convertHistogramPoint(point)); + } + return builder.build(); + } + + private HistogramDataPoint convertHistogramPoint(HistogramPointData point) { + HistogramDataPoint.Builder builder = HistogramDataPoint.newBuilder() + .setStartTimeUnixNano(point.getStartEpochNanos()) + .setTimeUnixNano(point.getEpochNanos()) + .setCount(point.getCount()) + .setSum(point.getSum()) + .addAllExplicitBounds(point.getBoundaries()) + .addAllBucketCounts(point.getCounts()) + .addAllAttributes(convertAttributes(point.getAttributes())) + .addAllExemplars(convertExemplars(point.getExemplars(), false)); + + if (point.hasMin()) { + builder.setMin(point.getMin()); + } + if (point.hasMax()) { + builder.setMax(point.getMax()); + } + return builder.build(); + } + + private ExponentialHistogram convertExponentialHistogram(ExponentialHistogramData histogramData) { + ExponentialHistogram.Builder builder = ExponentialHistogram.newBuilder() + .setAggregationTemporality(convertTemporality(histogramData.getAggregationTemporality())); + + for (ExponentialHistogramPointData point : histogramData.getPoints()) { + builder.addDataPoints(convertExponentialHistogramPoint(point)); + } + return builder.build(); + } + + private ExponentialHistogramDataPoint convertExponentialHistogramPoint(ExponentialHistogramPointData point) { + ExponentialHistogramDataPoint.Builder builder = ExponentialHistogramDataPoint.newBuilder() + .setStartTimeUnixNano(point.getStartEpochNanos()) + .setTimeUnixNano(point.getEpochNanos()) + .setScale(point.getScale()) + .setSum(point.getSum()) + .setZeroCount(point.getZeroCount()) + .setCount(point.getCount()) + .addAllAttributes(convertAttributes(point.getAttributes())) + .setPositive(convertBuckets(point.getPositiveBuckets())) + .setNegative(convertBuckets(point.getNegativeBuckets())); + + if (point.hasMin()) { + builder.setMin(point.getMin()); + } + if (point.hasMax()) { + builder.setMax(point.getMax()); + } + return builder.build(); + } + + private ExponentialHistogramDataPoint.Buckets convertBuckets(ExponentialHistogramBuckets buckets) { + return ExponentialHistogramDataPoint.Buckets.newBuilder() + .setOffset(buckets.getOffset()) + .addAllBucketCounts(buckets.getBucketCounts()) + .build(); + } + + private AggregationTemporality convertTemporality(io.opentelemetry.sdk.metrics.data.AggregationTemporality temporality) { + switch (temporality) { + case CUMULATIVE: + return AggregationTemporality.AGGREGATION_TEMPORALITY_CUMULATIVE; + case DELTA: + return AggregationTemporality.AGGREGATION_TEMPORALITY_DELTA; + default: + return AggregationTemporality.AGGREGATION_TEMPORALITY_UNSPECIFIED; + } + } + + /** + * Determine the encoding method of Exemplar.Value based on the data type (Long or Double) of the parent metric point. + * - LongPointData (LONG_GAUGE / LONG_SUM) → as_int + * - DoublePointData (other types) → as_double + */ + private List convertExemplars(List exemplars, boolean isParentLongType) { + List protoExemplars = new ArrayList<>(); + for (ExemplarData exemplar : exemplars) { + Exemplar.Builder protoExemplar = Exemplar.newBuilder() + .setTimeUnixNano(exemplar.getEpochNanos()) + .addAllFilteredAttributes(convertAttributes(exemplar.getFilteredAttributes())); + + if (isParentLongType) { + if (exemplar instanceof LongExemplarData) { + long value = ((LongExemplarData) exemplar).getValue(); + protoExemplar.setAsInt(value); + } + // The SDK ensures that the Exemplar.Value from a LongPoint has been safely converted to a double without loss of precision. + } else { + if (exemplar instanceof DoubleExemplarData) { + double value = ((DoubleExemplarData) exemplar).getValue(); + protoExemplar.setAsDouble(value); + } + } + + // Add SpanContext (if it exists) + if (exemplar.getSpanContext() != null && exemplar.getSpanContext().isValid()) { + protoExemplar.setSpanId(ByteString.copyFrom(exemplar.getSpanContext().getSpanIdBytes())); + protoExemplar.setTraceId(ByteString.copyFrom(exemplar.getSpanContext().getTraceIdBytes())); + } + + protoExemplars.add(protoExemplar.build()); + } + return protoExemplars; + } + + private List convertAttributes(Attributes attributes) { + List keyValues = new ArrayList<>(); + attributes.forEach((key, value) -> + keyValues.add(KeyValue.newBuilder() + .setKey(key.getKey()) + .setValue(convertAnyValue(value)) + .build()) + ); + return keyValues; + } + + private AnyValue convertAnyValue(Object value) { + if (value == null) { + return AnyValue.newBuilder().setStringValue("null").build(); + } + AnyValue.Builder builder = AnyValue.newBuilder(); + if (value instanceof String) { + builder.setStringValue((String) value); + } else if (value instanceof Boolean) { + builder.setBoolValue((Boolean) value); + } else if (value instanceof Long) { + builder.setIntValue((Long) value); + } else if (value instanceof Double) { + builder.setDoubleValue((Double) value); + } else if (value instanceof List) { + // Handle array types + ArrayValue.Builder arrayBuilder = ArrayValue.newBuilder(); + for (Object element : (List) value) { + arrayBuilder.addValues(convertAnyValue(element)); + } + builder.setArrayValue(arrayBuilder.build()); + } else if (value instanceof byte[]) { + // Handle byte arrays + builder.setBytesValue(ByteString.copyFrom((byte[]) value)); + } else { + // Fallback logic + builder.setStringValue(value.toString()); + } + return builder.build(); + } +} diff --git a/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/MetricsExporterType.java b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/MetricsExporterType.java index 23861777f9..50778538a7 100644 --- a/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/MetricsExporterType.java +++ b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/MetricsExporterType.java @@ -14,7 +14,8 @@ public enum MetricsExporterType { OTLP("otlp"), PROMETHEUS("prometheus"), - OPS("ops"); + OPS("ops"), + KAFKA("kafka"); private final String type; diff --git a/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/MetricsExporterURI.java b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/MetricsExporterURI.java index 558d34b522..7a95d91d61 100644 --- a/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/MetricsExporterURI.java +++ b/core/src/main/scala/kafka/log/stream/s3/telemetry/exporter/MetricsExporterURI.java @@ -47,14 +47,14 @@ public static MetricsExporter parseExporter(String clusterId, KafkaConfig kafkaC return null; } Map> queries = URIUtils.splitQuery(uri); - return parseExporter(clusterId, kafkaConfig, type, queries); + return parseExporter(clusterId, kafkaConfig, type, queries, uriStr); } catch (Exception e) { LOGGER.warn("Parse metrics exporter URI {} failed", uriStr, e); return null; } } - public static MetricsExporter parseExporter(String clusterId, KafkaConfig kafkaConfig, String type, Map> queries) { + public static MetricsExporter parseExporter(String clusterId, KafkaConfig kafkaConfig, String type, Map> queries, String uriStr) { MetricsExporterType exporterType = MetricsExporterType.fromString(type); switch (exporterType) { case OTLP: @@ -64,6 +64,8 @@ public static MetricsExporter parseExporter(String clusterId, KafkaConfig kafkaC case OPS: return buildOpsExporter(clusterId, kafkaConfig.nodeId(), kafkaConfig.s3ExporterReportIntervalMs(), kafkaConfig.automq().opsBuckets(), kafkaConfig.automq().baseLabels()); + case KAFKA: + return buildKafkaExporter(uriStr); default: return null; } @@ -105,10 +107,14 @@ public static MetricsExporter buildPrometheusExporter(Map> } public static MetricsExporter buildOpsExporter(String clusterId, int nodeId, int intervalMs, List opsBuckets, - List> baseLabels) { + List> baseLabels) { return new OpsMetricsExporter(clusterId, nodeId, intervalMs, opsBuckets, baseLabels); } + public static MetricsExporter buildKafkaExporter(String uriStr) { + return new KafkaMetricsExporter(uriStr); + } + public List metricsExporters() { return metricsExporters; }