|
| 1 | +# ============================================================================= |
| 2 | +# OpenTelemetry Collector Configuration for FHEVM Coprocessor |
| 3 | +# ============================================================================= |
| 4 | +# Architecture: |
| 5 | +# Services --OTLP gRPC--> [otlp receiver] --traces--> [spanmetrics connector] --> [prometheus exporter] |
| 6 | +# --traces--> [otlp/jaeger exporter] --> Jaeger |
| 7 | +# |
| 8 | +# Spanmetrics dimensions (explicit allowlist): |
| 9 | +# - service.name (default) — 6 known services |
| 10 | +# - span.name (default) — ~45 distinct span names |
| 11 | +# - status.code (default) — OK / ERROR |
| 12 | +# - operation — FHE operation enum name (~30 values) |
| 13 | +# - ct_type — ciphertext type (~15 values) |
| 14 | +# - operation_pattern_id — per-cone DFG hash (bounded by distinct contract patterns) |
| 15 | +# - transaction_pattern_id — whole-tx DFG hash (bounded by distinct contract patterns) |
| 16 | +# |
| 17 | +# Excluded from defaults: |
| 18 | +# - span.kind — all spans are INTERNAL (single value = useless) |
| 19 | +# |
| 20 | +# High-cardinality attributes NOT listed as dimensions are automatically |
| 21 | +# excluded from spanmetrics output (txn_id, count, compressed_size, etc.) |
| 22 | +# ============================================================================= |
| 23 | + |
| 24 | +receivers: |
| 25 | + otlp: |
| 26 | + protocols: |
| 27 | + grpc: |
| 28 | + endpoint: "0.0.0.0:4317" |
| 29 | + |
| 30 | +connectors: |
| 31 | + spanmetrics: |
| 32 | + namespace: coprocessor.span |
| 33 | + |
| 34 | + histogram: |
| 35 | + unit: ms |
| 36 | + explicit: |
| 37 | + buckets: |
| 38 | + - 1ms |
| 39 | + - 5ms |
| 40 | + - 10ms |
| 41 | + - 25ms |
| 42 | + - 50ms |
| 43 | + - 100ms |
| 44 | + - 250ms |
| 45 | + - 500ms |
| 46 | + - 1s |
| 47 | + - 2.5s |
| 48 | + - 5s |
| 49 | + - 10s |
| 50 | + - 30s |
| 51 | + - 60s |
| 52 | + |
| 53 | + # Additional dimensions beyond defaults (service.name, span.name, status.code) |
| 54 | + dimensions: |
| 55 | + - name: operation # FHE op name on fhe_operation / compress_ciphertext spans |
| 56 | + - name: ct_type # Ciphertext type on compress/squash/upload spans |
| 57 | + - name: operation_pattern_id # per-cone DFG fingerprint on execute_transaction / fhe_operation spans |
| 58 | + - name: transaction_pattern_id # whole-tx DFG fingerprint on execute_transaction spans |
| 59 | + |
| 60 | + # Remove unhelpful defaults (all coprocessor spans are INTERNAL — single value) |
| 61 | + exclude_dimensions: |
| 62 | + - span.kind |
| 63 | + |
| 64 | + # Exemplars for metric→trace pivot in Grafana |
| 65 | + exemplars: |
| 66 | + enabled: true |
| 67 | + max_per_data_point: 5 |
| 68 | + |
| 69 | + # Flush and expiration |
| 70 | + metrics_flush_interval: 15s |
| 71 | + metrics_expiration: 5m |
| 72 | + |
| 73 | +exporters: |
| 74 | + otlp/jaeger: |
| 75 | + endpoint: "jaeger:4317" |
| 76 | + tls: |
| 77 | + insecure: true |
| 78 | + |
| 79 | + prometheus: |
| 80 | + endpoint: "0.0.0.0:8889" |
| 81 | + enable_open_metrics: true # Required for exemplars |
| 82 | + |
| 83 | +processors: |
| 84 | + batch: |
| 85 | + send_batch_size: 1024 |
| 86 | + timeout: 5s |
| 87 | + |
| 88 | +service: |
| 89 | + telemetry: |
| 90 | + logs: |
| 91 | + level: info |
| 92 | + metrics: |
| 93 | + address: "0.0.0.0:8888" # Collector's own health metrics |
| 94 | + |
| 95 | + pipelines: |
| 96 | + traces: |
| 97 | + receivers: [otlp] |
| 98 | + processors: [batch] |
| 99 | + exporters: [spanmetrics, otlp/jaeger] |
| 100 | + |
| 101 | + metrics/spanmetrics: |
| 102 | + receivers: [spanmetrics] |
| 103 | + processors: [batch] |
| 104 | + exporters: [prometheus] |
| 105 | + |
| 106 | +# ============================================================================= |
| 107 | +# Sample PromQL Queries for RED Dashboards |
| 108 | +# ============================================================================= |
| 109 | +# Metric names generated (namespace "coprocessor.span" → underscores in Prometheus): |
| 110 | +# coprocessor_span_calls_total — span call counter |
| 111 | +# coprocessor_span_duration_milliseconds_bucket — span duration histogram |
| 112 | +# |
| 113 | +# --- Rate (Request Rate) --- |
| 114 | +# |
| 115 | +# Total span call rate by service and span name: |
| 116 | +# sum(rate(coprocessor_span_calls_total[5m])) by (service_name, span_name) |
| 117 | +# |
| 118 | +# FHE operation rate by operation type: |
| 119 | +# sum(rate(coprocessor_span_calls_total{span_name="fhe_operation"}[5m])) by (operation) |
| 120 | +# |
| 121 | +# --- Error Rate --- |
| 122 | +# |
| 123 | +# Error rate by service: |
| 124 | +# sum(rate(coprocessor_span_calls_total{status_code="STATUS_CODE_ERROR"}[5m])) by (service_name) |
| 125 | +# / sum(rate(coprocessor_span_calls_total[5m])) by (service_name) |
| 126 | +# |
| 127 | +# Error rate by span name: |
| 128 | +# sum(rate(coprocessor_span_calls_total{status_code="STATUS_CODE_ERROR"}[5m])) by (span_name) |
| 129 | +# / sum(rate(coprocessor_span_calls_total[5m])) by (span_name) |
| 130 | +# |
| 131 | +# --- Duration (Latency Percentiles) --- |
| 132 | +# |
| 133 | +# p50 latency by service: |
| 134 | +# histogram_quantile(0.5, sum(rate(coprocessor_span_duration_milliseconds_bucket[5m])) by (le, service_name)) |
| 135 | +# |
| 136 | +# p95 latency by span name: |
| 137 | +# histogram_quantile(0.95, sum(rate(coprocessor_span_duration_milliseconds_bucket[5m])) by (le, span_name)) |
| 138 | +# |
| 139 | +# p99 latency for FHE operations by operation type: |
| 140 | +# histogram_quantile(0.99, sum(rate(coprocessor_span_duration_milliseconds_bucket{span_name="fhe_operation"}[5m])) by (le, operation)) |
| 141 | +# |
| 142 | +# p95 latency by DFG operation pattern: |
| 143 | +# histogram_quantile(0.95, sum(rate(coprocessor_span_duration_milliseconds_bucket{span_name="execute_transaction"}[5m])) by (le, operation_pattern_id)) |
| 144 | +# |
| 145 | +# p95 latency by DFG transaction pattern: |
| 146 | +# histogram_quantile(0.95, sum(rate(coprocessor_span_duration_milliseconds_bucket{span_name="execute_transaction"}[5m])) by (le, transaction_pattern_id)) |
| 147 | +# ============================================================================= |
0 commit comments