Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cloud/observability/promql-to-scrape/cmd/genconfig/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ func main() {
if err != nil {
log.Fatalf("Failed to pull metric names: %s", err)
}
fmt.Println(counters)
fmt.Println(gauges)
fmt.Println(histograms)
fmt.Println("counters: ", counters, "\n")
fmt.Println("gauges: ", gauges, "\n")
fmt.Println("histograms: ", histograms, "\n")

conf := internal.Config{}

Expand Down
40 changes: 20 additions & 20 deletions cloud/observability/promql-to-scrape/examples/config.yaml
Original file line number Diff line number Diff line change
@@ -1,43 +1,43 @@
metrics:
- metric_name: temporal_cloud_v0_frontend_service_error_count:rate1m
- metric_name: temporal_cloud_v0_frontend_service_error_count
query: rate(temporal_cloud_v0_frontend_service_error_count[1m])
- metric_name: temporal_cloud_v0_frontend_service_pending_requests
query: temporal_cloud_v0_frontend_service_pending_requests
- metric_name: temporal_cloud_v0_frontend_service_request_count:rate1m
- metric_name: temporal_cloud_v0_frontend_service_request_count
query: rate(temporal_cloud_v0_frontend_service_request_count[1m])
- metric_name: temporal_cloud_v0_poll_success_count:rate1m
- metric_name: temporal_cloud_v0_poll_success_count
query: rate(temporal_cloud_v0_poll_success_count[1m])
- metric_name: temporal_cloud_v0_poll_success_sync_count:rate1m
- metric_name: temporal_cloud_v0_poll_success_sync_count
query: rate(temporal_cloud_v0_poll_success_sync_count[1m])
- metric_name: temporal_cloud_v0_poll_timeout_count:rate1m
- metric_name: temporal_cloud_v0_poll_timeout_count
query: rate(temporal_cloud_v0_poll_timeout_count[1m])
- metric_name: temporal_cloud_v0_resource_exhausted_error_count:rate1m
- metric_name: temporal_cloud_v0_resource_exhausted_error_count
query: rate(temporal_cloud_v0_resource_exhausted_error_count[1m])
- metric_name: temporal_cloud_v0_schedule_action_success_count:rate1m
- metric_name: temporal_cloud_v0_schedule_action_success_count
query: rate(temporal_cloud_v0_schedule_action_success_count[1m])
- metric_name: temporal_cloud_v0_schedule_buffer_overruns_count:rate1m
- metric_name: temporal_cloud_v0_schedule_buffer_overruns_count
query: rate(temporal_cloud_v0_schedule_buffer_overruns_count[1m])
- metric_name: temporal_cloud_v0_schedule_missed_catchup_window_count:rate1m
- metric_name: temporal_cloud_v0_schedule_missed_catchup_window_count
query: rate(temporal_cloud_v0_schedule_missed_catchup_window_count[1m])
- metric_name: temporal_cloud_v0_service_latency_bucket:histogram_quantile_p99_1m
- metric_name: temporal_cloud_v0_service_latency_bucket
query: histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket[1m])) by (le, operation, temporal_namespace))
- metric_name: temporal_cloud_v0_service_latency_count:rate1m
- metric_name: temporal_cloud_v0_service_latency_count
query: rate(temporal_cloud_v0_service_latency_count[1m])
- metric_name: temporal_cloud_v0_service_latency_sum:rate1m
- metric_name: temporal_cloud_v0_service_latency_sum
query: rate(temporal_cloud_v0_service_latency_sum[1m])
- metric_name: temporal_cloud_v0_state_transition_count:rate1m
- metric_name: temporal_cloud_v0_state_transition_count
query: rate(temporal_cloud_v0_state_transition_count[1m])
- metric_name: temporal_cloud_v0_total_action_count:rate1m
- metric_name: temporal_cloud_v0_total_action_count
query: rate(temporal_cloud_v0_total_action_count[1m])
- metric_name: temporal_cloud_v0_workflow_cancel_count:rate1m
- metric_name: temporal_cloud_v0_workflow_cancel_count
query: rate(temporal_cloud_v0_workflow_cancel_count[1m])
- metric_name: temporal_cloud_v0_workflow_continued_as_new_count:rate1m
- metric_name: temporal_cloud_v0_workflow_continued_as_new_count
query: rate(temporal_cloud_v0_workflow_continued_as_new_count[1m])
- metric_name: temporal_cloud_v0_workflow_failed_count:rate1m
- metric_name: temporal_cloud_v0_workflow_failed_count
query: rate(temporal_cloud_v0_workflow_failed_count[1m])
- metric_name: temporal_cloud_v0_workflow_success_count:rate1m
- metric_name: temporal_cloud_v0_workflow_success_count
query: rate(temporal_cloud_v0_workflow_success_count[1m])
- metric_name: temporal_cloud_v0_workflow_terminate_count:rate1m
- metric_name: temporal_cloud_v0_workflow_terminate_count
query: rate(temporal_cloud_v0_workflow_terminate_count[1m])
- metric_name: temporal_cloud_v0_workflow_timeout_count:rate1m
- metric_name: temporal_cloud_v0_workflow_timeout_count
query: rate(temporal_cloud_v0_workflow_timeout_count[1m])
5 changes: 3 additions & 2 deletions cloud/observability/promql-to-scrape/internal/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,10 @@ func (c *APIClient) ListMetrics(metricPrefix string) ([]string, []string, []stri
if !strings.HasPrefix(string(v), metricPrefix) {
continue
}
if strings.HasSuffix(string(v), "_bucket") {
t := getMetricType(string(v))
if t == metricTypeHistogram {
histograms = append(histograms, string(v))
} else if strings.HasSuffix(string(v), "_count") || strings.HasSuffix(string(v), "_sum") {
} else if t == metricTypeCounter {
counts = append(counts, string(v))
} else {
gauges = append(gauges, string(v))
Expand Down
18 changes: 18 additions & 0 deletions cloud/observability/promql-to-scrape/internal/metric.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package internal

import "strings"

const (
metricTypeHistogram = "histogram"
metricTypeCounter = "count"
metricTypeGauge = "gauge"
)

func getMetricType(v string) string {
if strings.HasSuffix(v, "_bucket") {
return metricTypeHistogram
} else if strings.HasSuffix(v, "_count") || strings.HasSuffix(v, "_sum") {
return metricTypeCounter
}
return metricTypeGauge
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func SamplesToString(queriedMetrics map[string][]*model.Sample) string {
sb.WriteString("# TYPE ")
sb.WriteString(nameWithoutSuffix)
sb.WriteByte(' ')
sb.WriteString("gauge")
sb.WriteString(getMetricType(metricName))
sb.WriteByte('\n')

for _, s := range samples {
Expand Down
9 changes: 7 additions & 2 deletions cloud/observability/promql-to-scrape/internal/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ func (s *PromToScrapeServer) metricsHandler(w http.ResponseWriter, r *http.Reque
s.RLock()
defer s.RUnlock()
if time.Since(s.lastSuccessfulTime) < 5*time.Minute {
fmt.Fprint(w, s.data)
_, err := fmt.Fprint(w, s.data)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
slog.Error("can't serve metrics", "error", err)
}
} else {
w.WriteHeader(http.StatusInternalServerError)
slog.Error("can't serve metrics", "error", "metrics queried are stale (more than 5 minutes old)")
Expand All @@ -71,7 +75,7 @@ func (s *PromToScrapeServer) run() string {
//
// keep the objects returned from the query, or convert them into something a bit more ergonomic
// and create ConstMetrics with the prometheus client. I happened to have the code lying around for working
// with model.Sample, but the CosntMetrics route is probably more idiomatic and safe.
// with model.Sample, but the ConstMetrics route is probably more idiomatic and safe.
func (s *PromToScrapeServer) queryMetrics() {
start := time.Now()
queriedMetrics, err := QueryMetrics(s.conf, s.client)
Expand All @@ -88,5 +92,6 @@ func (s *PromToScrapeServer) queryMetrics() {

// Start runs the embedded http.Server.
func (s *PromToScrapeServer) Start() error {
slog.Info("listening on", "addr", s.server.Addr)
return s.server.ListenAndServe()
}