|
| 1 | +package host |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + "time" |
| 7 | + |
| 8 | + "go.opentelemetry.io/otel/attribute" |
| 9 | + "go.opentelemetry.io/otel/metric" |
| 10 | + |
| 11 | + "github.com/smartcontractkit/chainlink-common/pkg/beholder" |
| 12 | +) |
| 13 | + |
| 14 | +// Execution phase labels for the execution duration histogram. An execution's |
| 15 | +// wall-clock time is attributed to exactly one of these at any instant: |
| 16 | +// - phaseWasm: executing guest wasm code inside callWasm. |
| 17 | +// - phaseWaiting: suspended, waiting for pending capability responses. |
| 18 | +// - phaseParked: everything else between (re)starts - store setup/teardown, |
| 19 | +// (de)serialisation and scheduling overhead. |
| 20 | +const ( |
| 21 | + phaseWasm = "wasm" |
| 22 | + phaseWaiting = "waiting" |
| 23 | + phaseParked = "parked" |
| 24 | +) |
| 25 | + |
| 26 | +// moduleMetrics holds the beholder instruments used to observe wasm module |
| 27 | +// executions. Instrument names are shared process-wide, so multiple modules can |
| 28 | +// safely construct their own moduleMetrics; the meter returns the same |
| 29 | +// underlying instrument for a given name. |
| 30 | +type moduleMetrics struct { |
| 31 | + activeExecutions metric.Int64UpDownCounter |
| 32 | + suspendedExecutions metric.Int64UpDownCounter |
| 33 | + suspensionsPerExec metric.Int64Histogram |
| 34 | + executionDurationMs metric.Int64Histogram |
| 35 | + memoryBytes metric.Int64Histogram |
| 36 | +} |
| 37 | + |
| 38 | +func newModuleMetrics() (*moduleMetrics, error) { |
| 39 | + meter := beholder.GetMeter() |
| 40 | + |
| 41 | + activeExecutions, err := meter.Int64UpDownCounter("platform_wasm_host_active_executions", |
| 42 | + metric.WithDescription("Number of wasm module executions currently running"), |
| 43 | + metric.WithUnit("{execution}"), |
| 44 | + ) |
| 45 | + if err != nil { |
| 46 | + return nil, fmt.Errorf("failed to create active_executions counter: %w", err) |
| 47 | + } |
| 48 | + |
| 49 | + suspendedExecutions, err := meter.Int64UpDownCounter("platform_wasm_host_suspended_executions", |
| 50 | + metric.WithDescription("Number of wasm module executions currently suspended waiting for capability responses"), |
| 51 | + metric.WithUnit("{execution}"), |
| 52 | + ) |
| 53 | + if err != nil { |
| 54 | + return nil, fmt.Errorf("failed to create suspended_executions counter: %w", err) |
| 55 | + } |
| 56 | + |
| 57 | + suspensionsPerExec, err := meter.Int64Histogram("platform_wasm_host_suspensions_per_execution", |
| 58 | + metric.WithDescription("Number of times an execution suspended to await capability responses before completing"), |
| 59 | + metric.WithUnit("{suspension}"), |
| 60 | + metric.WithExplicitBucketBoundaries(0, 1, 2, 3, 5, 10, 20, 50, 100), |
| 61 | + ) |
| 62 | + if err != nil { |
| 63 | + return nil, fmt.Errorf("failed to create suspensions_per_execution histogram: %w", err) |
| 64 | + } |
| 65 | + |
| 66 | + executionDurationMs, err := meter.Int64Histogram("platform_wasm_host_execution_duration_ms", |
| 67 | + metric.WithDescription("Wall-clock time spent in an execution, broken down by phase (wasm, waiting, parked)"), |
| 68 | + metric.WithUnit("ms"), |
| 69 | + metric.WithExplicitBucketBoundaries(1, 5, 10, 50, 100, 250, 500, 1_000, 2_000, 5_000, 10_000, 30_000, 60_000, 120_000, 300_000, 600_000), |
| 70 | + ) |
| 71 | + if err != nil { |
| 72 | + return nil, fmt.Errorf("failed to create execution_duration_ms histogram: %w", err) |
| 73 | + } |
| 74 | + |
| 75 | + memoryBytes, err := meter.Int64Histogram("platform_wasm_host_memory_bytes", |
| 76 | + metric.WithDescription("Peak linear memory in bytes used by the wasm module across an execution"), |
| 77 | + metric.WithUnit("By"), |
| 78 | + metric.WithExplicitBucketBoundaries(1<<20, 4<<20, 16<<20, 32<<20, 64<<20, 128<<20, 256<<20, 512<<20, 1<<30), |
| 79 | + ) |
| 80 | + if err != nil { |
| 81 | + return nil, fmt.Errorf("failed to create memory_bytes histogram: %w", err) |
| 82 | + } |
| 83 | + |
| 84 | + return &moduleMetrics{ |
| 85 | + activeExecutions: activeExecutions, |
| 86 | + suspendedExecutions: suspendedExecutions, |
| 87 | + suspensionsPerExec: suspensionsPerExec, |
| 88 | + executionDurationMs: executionDurationMs, |
| 89 | + memoryBytes: memoryBytes, |
| 90 | + }, nil |
| 91 | +} |
| 92 | + |
| 93 | +// suspensionEnabledAttr tags a metric with whether the execution has |
| 94 | +// suspend/resume-on-await enabled, so the two populations can be distinguished. |
| 95 | +func suspensionEnabledAttr(suspensionEnabled bool) attribute.KeyValue { |
| 96 | + return attribute.Bool("suspension_enabled", suspensionEnabled) |
| 97 | +} |
| 98 | + |
| 99 | +// IncActiveExecutions marks an execution as started (a). |
| 100 | +func (m *moduleMetrics) IncActiveExecutions(ctx context.Context, suspensionEnabled bool) { |
| 101 | + m.activeExecutions.Add(ctx, 1, metric.WithAttributes(suspensionEnabledAttr(suspensionEnabled))) |
| 102 | +} |
| 103 | + |
| 104 | +// DecActiveExecutions marks an execution as finished (a). |
| 105 | +func (m *moduleMetrics) DecActiveExecutions(ctx context.Context, suspensionEnabled bool) { |
| 106 | + m.activeExecutions.Add(ctx, -1, metric.WithAttributes(suspensionEnabledAttr(suspensionEnabled))) |
| 107 | +} |
| 108 | + |
| 109 | +// IncSuspendedExecutions marks an execution as suspended, waiting for capability |
| 110 | +// responses (b). |
| 111 | +func (m *moduleMetrics) IncSuspendedExecutions(ctx context.Context, suspensionEnabled bool) { |
| 112 | + m.suspendedExecutions.Add(ctx, 1, metric.WithAttributes(suspensionEnabledAttr(suspensionEnabled))) |
| 113 | +} |
| 114 | + |
| 115 | +// DecSuspendedExecutions marks a suspended execution as resumed (b). |
| 116 | +func (m *moduleMetrics) DecSuspendedExecutions(ctx context.Context, suspensionEnabled bool) { |
| 117 | + m.suspendedExecutions.Add(ctx, -1, metric.WithAttributes(suspensionEnabledAttr(suspensionEnabled))) |
| 118 | +} |
| 119 | + |
| 120 | +// RecordSuspensions records how many times an execution suspended before |
| 121 | +// completing (c). |
| 122 | +func (m *moduleMetrics) RecordSuspensions(ctx context.Context, suspensionEnabled bool, suspensions int64) { |
| 123 | + m.suspensionsPerExec.Record(ctx, suspensions, metric.WithAttributes(suspensionEnabledAttr(suspensionEnabled))) |
| 124 | +} |
| 125 | + |
| 126 | +// RecordExecutionPhase records the wall-clock time spent in a single phase of an |
| 127 | +// execution (d). phase is one of phaseWasm, phaseWaiting or phaseParked. |
| 128 | +func (m *moduleMetrics) RecordExecutionPhase(ctx context.Context, suspensionEnabled bool, phase string, d time.Duration) { |
| 129 | + m.executionDurationMs.Record(ctx, d.Milliseconds(), |
| 130 | + metric.WithAttributes(suspensionEnabledAttr(suspensionEnabled), attribute.String("phase", phase)), |
| 131 | + ) |
| 132 | +} |
| 133 | + |
| 134 | +// RecordMemory records the peak linear memory used by an execution (e). Note |
| 135 | +// that the CPU-time counterpart of (e) is measured as wall-clock time spent in |
| 136 | +// wasm - the phaseWasm bucket of the execution duration histogram (d). |
| 137 | +func (m *moduleMetrics) RecordMemory(ctx context.Context, suspensionEnabled bool, memoryBytes int64) { |
| 138 | + m.memoryBytes.Record(ctx, memoryBytes, metric.WithAttributes(suspensionEnabledAttr(suspensionEnabled))) |
| 139 | +} |
0 commit comments