Skip to content

Commit cc23405

Browse files
gregns1bbrkstorcolvin
authored
CBG-4465: [3.2.3 backport] Add Database Init and Online fatal error stats (#7381)
Co-authored-by: Ben Brooks <ben.brooks@couchbase.com> Co-authored-by: Tor Colvin <tor.colvin@couchbase.com>
1 parent be0133d commit cc23405

3 files changed

Lines changed: 23 additions & 0 deletions

File tree

base/stats.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ const (
8888
StatAddedVersion3dot2dot0 = "3.2.0"
8989
StatAddedVersion3dot2dot1 = "3.2.1"
9090
StatAddedVersion3dot2dot2 = "3.2.2"
91+
StatAddedVersion3dot2dot3 = "3.2.3"
9192
StatAddedVersion3dot3dot0 = "3.3.0"
9293

9394
StatDeprecatedVersionNotDeprecated = ""
@@ -662,6 +663,11 @@ type DatabaseStats struct {
662663
// prior to Hydrogen release. These are not exported as part of prometheus and only exposed through expvars
663664
CacheFeedMapStats *ExpVarMapWrapper `json:"cache_feed"`
664665
ImportFeedMapStats *ExpVarMapWrapper `json:"import_feed"`
666+
667+
// The total number of errors that occurred that prevented the database from being initialized.
668+
TotalInitFatalErrors *SgwIntStat `json:"total_init_fatal_errors"`
669+
// The total number of errors that occurred that prevented the database from being brought online.
670+
TotalOnlineFatalErrors *SgwIntStat `json:"total_online_fatal_errors"`
665671
}
666672

667673
// This wrapper ensures that an expvar.Map type can be marshalled into JSON. The expvar.Map has no method to go direct to
@@ -1782,6 +1788,14 @@ func (d *DbStats) initDatabaseStats() error {
17821788
if err != nil {
17831789
return err
17841790
}
1791+
resUtil.TotalInitFatalErrors, err = NewIntStat(SubsystemDatabaseKey, "total_init_fatal_errors", StatUnitNoUnits, TotalInitFatalErrorsDesc, StatAddedVersion3dot2dot3, StatDeprecatedVersionNotDeprecated, StatStabilityVolatile, labelKeys, labelVals, prometheus.CounterValue, 0)
1792+
if err != nil {
1793+
return err
1794+
}
1795+
resUtil.TotalOnlineFatalErrors, err = NewIntStat(SubsystemDatabaseKey, "total_online_fatal_errors", StatUnitNoUnits, TotalOnlineFatalErrorsDesc, StatAddedVersion3dot2dot3, StatDeprecatedVersionNotDeprecated, StatStabilityVolatile, labelKeys, labelVals, prometheus.CounterValue, 0)
1796+
if err != nil {
1797+
return err
1798+
}
17851799
resUtil.ImportFeedMapStats = &ExpVarMapWrapper{new(expvar.Map).Init()}
17861800

17871801
resUtil.CacheFeedMapStats = &ExpVarMapWrapper{new(expvar.Map).Init()}
@@ -1830,6 +1844,8 @@ func (d *DbStats) unregisterDatabaseStats() {
18301844
prometheus.Unregister(d.DatabaseStats.NumPublicRestRequests)
18311845
prometheus.Unregister(d.DatabaseStats.TotalSyncTime)
18321846
prometheus.Unregister(d.DatabaseStats.PublicRestBytesRead)
1847+
prometheus.Unregister(d.DatabaseStats.TotalInitFatalErrors)
1848+
prometheus.Unregister(d.DatabaseStats.TotalOnlineFatalErrors)
18331849
}
18341850

18351851
func (d *DbStats) CollectionStat(scopeName, collectionName string) (*CollectionStats, error) {

base/stats_descriptions.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,9 @@ const (
316316

317317
NumIdleKvOpsDesc = "The total number of idle kv operations."
318318
NumIdleQueryOpsDesc = "The total number of idle query operations."
319+
320+
TotalInitFatalErrorsDesc = "The total number of errors that occurred that prevented the database from being initialized."
321+
TotalOnlineFatalErrorsDesc = "The total number of errors that occurred that prevented the database from being brought online."
319322
)
320323

321324
// Delta Sync stats descriptions

rest/server_context.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,7 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config
964964
if dbInitDoneChan != nil {
965965
initError := <-dbInitDoneChan
966966
if initError != nil {
967+
dbcontext.DbStats.DatabaseStats.TotalInitFatalErrors.Add(1)
967968
// report error in building/creating indexes
968969
dbcontext.DatabaseStartupError = db.NewDatabaseError(db.DatabaseInitializationIndexError)
969970
atomic.StoreUint32(&dbcontext.State, db.DBOffline)
@@ -973,6 +974,7 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config
973974
}
974975
base.InfofCtx(ctx, base.KeyAll, "Database init completed, starting online processes")
975976
if err := dbcontext.StartOnlineProcesses(ctx); err != nil {
977+
dbcontext.DbStats.DatabaseStats.TotalOnlineFatalErrors.Add(1)
976978
atomic.StoreUint32(&dbcontext.State, db.DBOffline)
977979
_ = dbcontext.EventMgr.RaiseDBStateChangeEvent(ctx, dbName, "offline", dbLoadedStateChangeMsg, &sc.Config.API.AdminInterface)
978980
return nil, err
@@ -999,6 +1001,7 @@ func (sc *ServerContext) asyncDatabaseOnline(nonCancelCtx base.NonCancellableCon
9991001
if doneChan != nil {
10001002
initError := <-doneChan
10011003
if initError != nil {
1004+
dbc.DbStats.DatabaseStats.TotalInitFatalErrors.Add(1)
10021005
base.WarnfCtx(ctx, "Async database init returned error: %v", initError)
10031006
dbc.DatabaseStartupError = db.NewDatabaseError(db.DatabaseInitializationIndexError)
10041007
atomic.CompareAndSwapUint32(&dbc.State, db.DBStarting, db.DBOffline)
@@ -1017,6 +1020,7 @@ func (sc *ServerContext) asyncDatabaseOnline(nonCancelCtx base.NonCancellableCon
10171020
base.InfofCtx(ctx, base.KeyAll, "Async database initialization complete, starting online processes...")
10181021
err := dbc.StartOnlineProcesses(ctx)
10191022
if err != nil {
1023+
dbc.DbStats.DatabaseStats.TotalOnlineFatalErrors.Add(1)
10201024
base.ErrorfCtx(ctx, "Error starting online processes after async initialization: %v", err)
10211025
atomic.CompareAndSwapUint32(&dbc.State, db.DBStarting, db.DBOffline)
10221026
return

0 commit comments

Comments
 (0)