Skip to content

Commit 5a05027

Browse files
[OCC] add metrics for scheduler (#431)
## Describe your changes and provide context - **retries** represents number of tx attempts beyond the first attempt - **max_incarnation** is the highest incarnation seen in a given block ## Testing performed to validate your change - lower environment
1 parent 6b43421 commit 5a05027

File tree

1 file changed

+26
-2
lines changed

1 file changed

+26
-2
lines changed

tasks/scheduler.go

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"github.com/cosmos/cosmos-sdk/store/multiversion"
1111
store "github.com/cosmos/cosmos-sdk/store/types"
12+
"github.com/cosmos/cosmos-sdk/telemetry"
1213
sdk "github.com/cosmos/cosmos-sdk/types"
1314
"github.com/cosmos/cosmos-sdk/types/occ"
1415
"github.com/cosmos/cosmos-sdk/utils/tracing"
@@ -78,6 +79,7 @@ type scheduler struct {
7879
allTasks []*deliverTxTask
7980
executeCh chan func()
8081
validateCh chan func()
82+
metrics *schedulerMetrics
8183
}
8284

8385
// NewScheduler creates a new scheduler
@@ -86,6 +88,7 @@ func NewScheduler(workers int, tracingInfo *tracing.Info, deliverTxFunc func(ctx
8688
workers: workers,
8789
deliverTx: deliverTxFunc,
8890
tracingInfo: tracingInfo,
91+
metrics: &schedulerMetrics{},
8992
}
9093
}
9194

@@ -152,11 +155,16 @@ func toTasks(reqs []*sdk.DeliverTxEntry) []*deliverTxTask {
152155
return res
153156
}
154157

155-
func collectResponses(tasks []*deliverTxTask) []types.ResponseDeliverTx {
158+
func (s *scheduler) collectResponses(tasks []*deliverTxTask) []types.ResponseDeliverTx {
156159
res := make([]types.ResponseDeliverTx, 0, len(tasks))
160+
var maxIncarnation int
157161
for _, t := range tasks {
162+
if t.Incarnation > maxIncarnation {
163+
maxIncarnation = t.Incarnation
164+
}
158165
res = append(res, *t.Response)
159166
}
167+
s.metrics.maxIncarnation = maxIncarnation
160168
return res
161169
}
162170

@@ -202,6 +210,19 @@ func (s *scheduler) PrefillEstimates(reqs []*sdk.DeliverTxEntry) {
202210
}
203211
}
204212

213+
// schedulerMetrics contains metrics for the scheduler
214+
type schedulerMetrics struct {
215+
// maxIncarnation is the highest incarnation seen in this set
216+
maxIncarnation int
217+
// retries is the number of tx attempts beyond the first attempt
218+
retries int
219+
}
220+
221+
func (s *scheduler) emitMetrics() {
222+
telemetry.IncrCounter(float32(s.metrics.retries), "scheduler", "retries")
223+
telemetry.SetGauge(float32(s.metrics.maxIncarnation), "scheduler", "max_incarnation")
224+
}
225+
205226
func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
206227
// initialize mutli-version stores if they haven't been initialized yet
207228
s.tryInitMultiVersionStore(ctx)
@@ -211,6 +232,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
211232
s.allTasks = tasks
212233
s.executeCh = make(chan func(), len(tasks))
213234
s.validateCh = make(chan func(), len(tasks))
235+
defer s.emitMetrics()
214236

215237
// default to number of tasks if workers is negative or 0 by this point
216238
workers := s.workers
@@ -245,11 +267,13 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
245267
if err != nil {
246268
return nil, err
247269
}
270+
// these are retries which apply to metrics
271+
s.metrics.retries += len(toExecute)
248272
}
249273
for _, mv := range s.multiVersionStores {
250274
mv.WriteLatestToStore()
251275
}
252-
return collectResponses(tasks), nil
276+
return s.collectResponses(tasks), nil
253277
}
254278

255279
func (s *scheduler) shouldRerun(task *deliverTxTask) bool {

0 commit comments

Comments
 (0)