Skip to content

Commit 3c7cba0

Browse files
committed
fix spanner metrics regression
recent changes mean client-side spanner metrics are no longer exposed the prometheus scrape endpoint. The SDK has deprecated OpenCensus support, which was wired into the prometheus endpoint, in favour of OpenTelemetry. This was reflected in a recent SpiceDB change, which made the otel approach the default, and as a consequence all prometheus metrics disappeared. This commit adds the prometheus exporter as a OpenTelemetry MetricsResolver, to add some the metrics back. The naming convention has changed, and I'm not aware of a way around that. Also client-side latency metrics are not supported via the opentelemetry integration. See https://cloud.google.com/spanner/docs/capture-custom-metrics-opentelemetry#capture-client-round-trip-latency Instead they now have to be manually instrumented.
1 parent c8eb9db commit 3c7cba0

File tree

3 files changed

+51
-15
lines changed

3 files changed

+51
-15
lines changed

go.mod

+2-1
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ require (
124124

125125
require (
126126
github.com/caio/go-tdigest/v4 v4.0.1
127+
go.opentelemetry.io/otel/exporters/prometheus v0.57.0
128+
go.opentelemetry.io/otel/sdk/metric v1.35.0
127129
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738
128130
)
129131

@@ -390,7 +392,6 @@ require (
390392
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 // indirect
391393
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0 // indirect
392394
go.opentelemetry.io/otel/metric v1.35.0 // indirect
393-
go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect
394395
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
395396
go.uber.org/automaxprocs v1.6.0 // indirect
396397
go.uber.org/multierr v1.11.0 // indirect

go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -2526,6 +2526,8 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 h1:qFffA
25262526
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0/go.mod h1:MOiCmryaYtc+V0Ei+Tx9o5S1ZjA7kzLucuVuyzBZloQ=
25272527
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0 h1:FyjCyI9jVEfqhUh2MoSkmolPjfh5fp2hnV0b0irxH4Q=
25282528
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0/go.mod h1:hYwym2nDEeZfG/motx0p7L7J1N1vyzIThemQsb4g2qY=
2529+
go.opentelemetry.io/otel/exporters/prometheus v0.57.0 h1:AHh/lAP1BHrY5gBwk8ncc25FXWm/gmmY3BX258z5nuk=
2530+
go.opentelemetry.io/otel/exporters/prometheus v0.57.0/go.mod h1:QpFWz1QxqevfjwzYdbMb4Y1NnlJvqSGwyuU0B4iuc9c=
25292531
go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8=
25302532
go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM=
25312533
go.opentelemetry.io/otel/metric v1.22.0/go.mod h1:evJGjVpZv0mQ5QBRJoBF64yMuOf4xCWdXjK8pzFvliY=

internal/datastore/spanner/spanner.go

+47-14
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ import (
2121
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
2222
"go.opentelemetry.io/otel"
2323
"go.opentelemetry.io/otel/attribute"
24+
otelprom "go.opentelemetry.io/otel/exporters/prometheus"
25+
"go.opentelemetry.io/otel/sdk/metric"
26+
otelres "go.opentelemetry.io/otel/sdk/resource"
27+
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
2428
"go.opentelemetry.io/otel/trace"
2529
"google.golang.org/api/option"
2630
"google.golang.org/grpc"
@@ -98,6 +102,7 @@ type spannerDatastore struct {
98102

99103
tableSizesStatsTable string
100104
filterMaximumIDCount uint16
105+
meterProvider *metric.MeterProvider
101106
}
102107

103108
// NewSpannerDatastore returns a datastore backed by cloud spanner
@@ -122,9 +127,23 @@ func NewSpannerDatastore(ctx context.Context, database string, opts ...Option) (
122127
log.Info().Str("spanner-emulator-host", os.Getenv("SPANNER_EMULATOR_HOST")).Msg("running against spanner emulator")
123128
}
124129

130+
var meterProvider *metric.MeterProvider
125131
if config.datastoreMetricsOption == DatastoreMetricsOptionOpenTelemetry {
126132
log.Info().Msg("enabling OpenTelemetry metrics for Spanner datastore")
127133
spanner.EnableOpenTelemetryMetrics()
134+
135+
res, err := otelres.Merge(otelres.Default(),
136+
otelres.NewWithAttributes(semconv.SchemaURL,
137+
semconv.ServiceName("spicedb"),
138+
))
139+
if err != nil {
140+
return nil, fmt.Errorf("failed to create otel metrics resource: %w", err)
141+
}
142+
143+
meterProvider, err = getMeterProviderWithPromExporter(res)
144+
if err != nil {
145+
return nil, fmt.Errorf("failed to enable Spanner prometheus metrics: %w", err)
146+
}
128147
}
129148

130149
if config.datastoreMetricsOption == DatastoreMetricsOptionLegacyPrometheus {
@@ -133,23 +152,21 @@ func NewSpannerDatastore(ctx context.Context, database string, opts ...Option) (
133152
if err != nil {
134153
return nil, fmt.Errorf("failed to enable spanner session metrics: %w", err)
135154
}
155+
136156
err = spanner.EnableGfeLatencyAndHeaderMissingCountViews() // nolint: staticcheck
137157
if err != nil {
138158
return nil, fmt.Errorf("failed to enable spanner GFE metrics: %w", err)
139159
}
140-
}
141160

142-
// Register Spanner client gRPC metrics (include round-trip latency, received/sent bytes...)
143-
if err := view.Register(ocgrpc.DefaultClientViews...); err != nil {
144-
return nil, fmt.Errorf("failed to enable gRPC metrics for Spanner client: %w", err)
145-
}
161+
// Register Spanner client gRPC metrics (include round-trip latency, received/sent bytes...)
162+
if err := view.Register(ocgrpc.DefaultClientViews...); err != nil {
163+
return nil, fmt.Errorf("failed to enable gRPC metrics for Spanner client: %w", err)
164+
}
146165

147-
_, err = ocprom.NewExporter(ocprom.Options{
148-
Namespace: "spicedb",
149-
Registerer: prometheus.DefaultRegisterer,
150-
})
151-
if err != nil {
152-
return nil, fmt.Errorf("failed to enable spanner GFE latency stats: %w", err)
166+
_, err = ocprom.NewExporter(ocprom.Options{
167+
Namespace: "spicedb",
168+
Registerer: prometheus.DefaultRegisterer,
169+
})
153170
}
154171

155172
cfg := spanner.DefaultSessionPoolConfig
@@ -175,8 +192,9 @@ func NewSpannerDatastore(ctx context.Context, database string, opts ...Option) (
175192
context.Background(),
176193
database,
177194
spanner.ClientConfig{
178-
SessionPoolConfig: cfg,
179-
DisableNativeMetrics: config.datastoreMetricsOption != DatastoreMetricsOptionNative,
195+
SessionPoolConfig: cfg,
196+
DisableNativeMetrics: config.datastoreMetricsOption != DatastoreMetricsOptionNative,
197+
OpenTelemetryMeterProvider: meterProvider,
180198
},
181199
spannerOpts...,
182200
)
@@ -243,6 +261,7 @@ func NewSpannerDatastore(ctx context.Context, database string, opts ...Option) (
243261
tableSizesStatsTable: tableSizesStatsTable,
244262
filterMaximumIDCount: config.filterMaximumIDCount,
245263
schema: *schema,
264+
meterProvider: meterProvider,
246265
}
247266
// Optimized revision and revision checking use a stale read for the
248267
// current timestamp.
@@ -253,6 +272,20 @@ func NewSpannerDatastore(ctx context.Context, database string, opts ...Option) (
253272
return ds, nil
254273
}
255274

275+
func getMeterProviderWithPromExporter(res *otelres.Resource) (*metric.MeterProvider, error) {
276+
exporter, err := otelprom.New()
277+
if err != nil {
278+
return nil, err
279+
}
280+
281+
meterProvider := metric.NewMeterProvider(
282+
metric.WithResource(res),
283+
metric.WithReader(exporter),
284+
)
285+
286+
return meterProvider, nil
287+
}
288+
256289
type traceableRTX struct {
257290
delegate readTX
258291
}
@@ -410,7 +443,7 @@ func (sd *spannerDatastore) OfflineFeatures() (*datastore.Features, error) {
410443

411444
func (sd *spannerDatastore) Close() error {
412445
sd.client.Close()
413-
return nil
446+
return sd.meterProvider.ForceFlush(context.TODO())
414447
}
415448

416449
func statementFromSQL(sql string, args []any) spanner.Statement {

0 commit comments

Comments
 (0)