Skip to content

Commit 9f90dc6

Browse files
feat(oauth): wire metrics and health status for token refresh (smart-mcp-proxy#23)
Complete the OAuth token refresh reliability implementation by: - Adding RefreshMetricsRecorder interface and wiring MetricsManager - Recording oauth_refresh_total and oauth_refresh_duration_seconds metrics - Wiring RefreshManager state into health calculation across all endpoints This satisfies FR-010 (health status surfacing) and FR-011 (Prometheus metrics) from spec 023. Tasks completed: T014, T022, T032 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 0985334 commit 9f90dc6

4 files changed

Lines changed: 105 additions & 13 deletions

File tree

internal/oauth/refresh_manager.go

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,16 @@ type RefreshEventEmitter interface {
9494
EmitOAuthRefreshFailed(serverName string, errorMsg string)
9595
}
9696

97+
// RefreshMetricsRecorder defines metrics recording methods for OAuth refresh operations.
98+
// This interface decouples RefreshManager from the concrete MetricsManager.
99+
type RefreshMetricsRecorder interface {
100+
// RecordOAuthRefresh records an OAuth token refresh attempt.
101+
// Result should be one of: "success", "failed_network", "failed_invalid_grant", "failed_other".
102+
RecordOAuthRefresh(server, result string)
103+
// RecordOAuthRefreshDuration records the duration of an OAuth token refresh attempt.
104+
RecordOAuthRefreshDuration(server, result string, duration time.Duration)
105+
}
106+
97107
// RefreshManagerConfig holds configuration for the RefreshManager.
98108
type RefreshManagerConfig struct {
99109
Threshold float64 // Percentage of lifetime at which to refresh (default: 0.8)
@@ -102,18 +112,19 @@ type RefreshManagerConfig struct {
102112

103113
// RefreshManager coordinates proactive OAuth token refresh across all servers.
104114
type RefreshManager struct {
105-
storage RefreshTokenStore
106-
coordinator *OAuthFlowCoordinator
107-
runtime RefreshRuntimeOperations
108-
eventEmitter RefreshEventEmitter
109-
schedules map[string]*RefreshSchedule
110-
threshold float64
111-
maxRetries int
112-
mu sync.RWMutex
113-
logger *zap.Logger
114-
ctx context.Context
115-
cancel context.CancelFunc
116-
started bool
115+
storage RefreshTokenStore
116+
coordinator *OAuthFlowCoordinator
117+
runtime RefreshRuntimeOperations
118+
eventEmitter RefreshEventEmitter
119+
metricsRecorder RefreshMetricsRecorder
120+
schedules map[string]*RefreshSchedule
121+
threshold float64
122+
maxRetries int
123+
mu sync.RWMutex
124+
logger *zap.Logger
125+
ctx context.Context
126+
cancel context.CancelFunc
127+
started bool
117128
}
118129

119130
// NewRefreshManager creates a new RefreshManager instance.
@@ -160,6 +171,12 @@ func (m *RefreshManager) SetEventEmitter(emitter RefreshEventEmitter) {
160171
m.eventEmitter = emitter
161172
}
162173

174+
// SetMetricsRecorder sets the metrics recorder for Prometheus metrics.
175+
// This enables FR-011: OAuth refresh metrics emission.
176+
func (m *RefreshManager) SetMetricsRecorder(recorder RefreshMetricsRecorder) {
177+
m.metricsRecorder = recorder
178+
}
179+
163180
// Start initializes the refresh manager and loads existing tokens.
164181
// For non-expired tokens, it schedules proactive refresh at 80% lifetime.
165182
// For expired tokens with valid refresh tokens, it attempts immediate refresh.
@@ -323,6 +340,13 @@ func (m *RefreshManager) executeImmediateRefresh(serverName string) {
323340
// Log the result
324341
LogActualTokenRefreshResult(m.logger, serverName, refreshErr == nil, duration, refreshErr)
325342

343+
// Record metrics (T014: Emit metrics on refresh attempt)
344+
if m.metricsRecorder != nil {
345+
result := classifyRefreshError(refreshErr)
346+
m.metricsRecorder.RecordOAuthRefresh(serverName, result)
347+
m.metricsRecorder.RecordOAuthRefreshDuration(serverName, result, duration)
348+
}
349+
326350
if refreshErr != nil {
327351
m.handleRefreshFailure(serverName, refreshErr)
328352
} else {
@@ -532,13 +556,22 @@ func (m *RefreshManager) executeRefresh(serverName string) {
532556
m.logger.Info("Executing proactive token refresh",
533557
zap.String("server", serverName))
534558

535-
// Attempt refresh
559+
// Attempt refresh with timing for metrics (T022: Emit refresh duration metric)
560+
startTime := time.Now()
536561
var refreshErr error
537562
if m.runtime != nil {
538563
refreshErr = m.runtime.RefreshOAuthToken(serverName)
539564
} else {
540565
refreshErr = ErrRefreshFailed
541566
}
567+
duration := time.Since(startTime)
568+
569+
// Record metrics (T022: Emit refresh duration metric on each attempt)
570+
if m.metricsRecorder != nil {
571+
result := classifyRefreshError(refreshErr)
572+
m.metricsRecorder.RecordOAuthRefresh(serverName, result)
573+
m.metricsRecorder.RecordOAuthRefreshDuration(serverName, result, duration)
574+
}
542575

543576
if refreshErr != nil {
544577
m.handleRefreshFailure(serverName, refreshErr)

internal/runtime/runtime.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,6 +1473,20 @@ func (r *Runtime) GetManagementService() interface{} {
14731473
return r.managementService
14741474
}
14751475

1476+
// SetRefreshMetricsRecorder sets the metrics recorder for OAuth token refresh operations.
1477+
// This enables FR-011: OAuth refresh metrics emission.
1478+
func (r *Runtime) SetRefreshMetricsRecorder(recorder oauth.RefreshMetricsRecorder) {
1479+
if r.refreshManager != nil {
1480+
r.refreshManager.SetMetricsRecorder(recorder)
1481+
}
1482+
}
1483+
1484+
// RefreshManager returns the OAuth refresh manager for health status integration.
1485+
// Returns nil if refresh manager hasn't been initialized.
1486+
func (r *Runtime) RefreshManager() *oauth.RefreshManager {
1487+
return r.refreshManager
1488+
}
1489+
14761490
// EmitServersChanged implements the EventEmitter interface for the management service.
14771491
// This delegates to the runtime's internal event emission mechanism.
14781492
func (r *Runtime) EmitServersChanged(reason string, extra map[string]any) {
@@ -1699,6 +1713,16 @@ func (r *Runtime) GetAllServers() ([]map[string]interface{}, error) {
16991713
healthInput.TokenExpiresAt = &tokenExpiresAt
17001714
}
17011715

1716+
// T032: Wire refresh state into health calculation (Spec 023)
1717+
if r.refreshManager != nil {
1718+
if refreshState := r.refreshManager.GetRefreshState(serverStatus.Name); refreshState != nil {
1719+
healthInput.RefreshState = health.RefreshState(refreshState.State)
1720+
healthInput.RefreshRetryCount = refreshState.RetryCount
1721+
healthInput.RefreshLastError = refreshState.LastError
1722+
healthInput.RefreshNextAttempt = refreshState.NextAttempt
1723+
}
1724+
}
1725+
17021726
healthStatus := health.CalculateHealth(healthInput, healthConfig)
17031727
serverMap["health"] = healthStatus
17041728

internal/server/mcp.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1802,6 +1802,19 @@ func (p *MCPProxyServer) handleListUpstreams(_ context.Context) (*mcp.CallToolRe
18021802
MissingSecret: health.ExtractMissingSecret(lastError),
18031803
OAuthConfigErr: health.ExtractOAuthConfigError(lastError),
18041804
}
1805+
1806+
// T032: Wire refresh state into health calculation (Spec 023)
1807+
if p.mainServer != nil && p.mainServer.runtime != nil {
1808+
if refreshMgr := p.mainServer.runtime.RefreshManager(); refreshMgr != nil {
1809+
if refreshState := refreshMgr.GetRefreshState(server.Name); refreshState != nil {
1810+
healthInput.RefreshState = health.RefreshState(refreshState.State)
1811+
healthInput.RefreshRetryCount = refreshState.RetryCount
1812+
healthInput.RefreshLastError = refreshState.LastError
1813+
healthInput.RefreshNextAttempt = refreshState.NextAttempt
1814+
}
1815+
}
1816+
}
1817+
18051818
serverMap["health"] = health.CalculateHealth(healthInput, health.DefaultHealthConfig())
18061819

18071820
// Add Docker isolation information

internal/server/server.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"mcpproxy-go/internal/httpapi"
2525
"mcpproxy-go/internal/logs"
2626
"mcpproxy-go/internal/management"
27+
"mcpproxy-go/internal/observability"
2728
"mcpproxy-go/internal/runtime"
2829
"mcpproxy-go/internal/secret"
2930
"mcpproxy-go/internal/storage"
@@ -79,6 +80,17 @@ func NewServerWithConfigPath(cfg *config.Config, configPath string, logger *zap.
7980
// This must happen before StartBackgroundInitialization is called
8081
rt.SetVersion(httpapi.GetBuildVersion())
8182

83+
// Initialize observability manager for metrics (FR-011: OAuth refresh metrics)
84+
obsConfig := observability.DefaultConfig("mcpproxy", httpapi.GetBuildVersion())
85+
obsManager, err := observability.NewManager(logger.Sugar(), &obsConfig)
86+
if err != nil {
87+
logger.Warn("Failed to create observability manager, metrics will be disabled", zap.Error(err))
88+
} else if obsManager.Metrics() != nil {
89+
// Wire up metrics recorder to RefreshManager for OAuth refresh metrics
90+
rt.SetRefreshMetricsRecorder(obsManager.Metrics())
91+
logger.Info("OAuth refresh metrics enabled")
92+
}
93+
8294
// Initialize management service and set it on runtime
8395
secretResolver := secret.NewResolver()
8496
mgmtService := management.NewService(
@@ -618,6 +630,16 @@ func (s *Server) GetAllServers() ([]map[string]interface{}, error) {
618630
healthInput.OAuthRequired = true
619631
}
620632

633+
// T032: Wire refresh state into health calculation (Spec 023)
634+
if refreshMgr := s.runtime.RefreshManager(); refreshMgr != nil {
635+
if refreshState := refreshMgr.GetRefreshState(serverStatus.Name); refreshState != nil {
636+
healthInput.RefreshState = health.RefreshState(refreshState.State)
637+
healthInput.RefreshRetryCount = refreshState.RetryCount
638+
healthInput.RefreshLastError = refreshState.LastError
639+
healthInput.RefreshNextAttempt = refreshState.NextAttempt
640+
}
641+
}
642+
621643
healthStatus := health.CalculateHealth(healthInput, health.DefaultHealthConfig())
622644

623645
result = append(result, map[string]interface{}{

0 commit comments

Comments
 (0)