Skip to content

Commit 60adfe2

Browse files
feat: make access check timeout configurable via ACCESS_CHECK_TIMEOUT_SECONDS
Allow operators to tune the access validation timeout (default 15s) via the ACCESS_CHECK_TIMEOUT_SECONDS environment variable on the deployment. This provides a safety valve for environments with slower external models where the default may be insufficient. Signed-off-by: Wen Liang <liangwen12year@gmail.com>
1 parent e97ed10 commit 60adfe2

4 files changed

Lines changed: 36 additions & 16 deletions

File tree

maas-api/cmd/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ func registerHandlers(ctx context.Context, log *logger.Logger, router *gin.Engin
148148

149149
subscriptionSelector := subscription.NewSelector(log, cluster.MaaSSubscriptionLister)
150150

151-
modelManager, err := models.NewManager(log)
151+
modelManager, err := models.NewManager(log, cfg.AccessCheckTimeoutSeconds)
152152
if err != nil {
153153
log.Fatal("Failed to create model manager", "error", err)
154154
}

maas-api/internal/config/config.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ type Config struct {
4646
// Default: 30 days. Minimum: 1 day.
4747
APIKeyMaxExpirationDays int
4848

49+
// AccessCheckTimeoutSeconds bounds the total duration of model access validation.
50+
// This limits the staleness window between when access is checked and when the
51+
// response reaches the client. Models whose probes don't complete within this
52+
// window are excluded (fail-closed). Default: 15 seconds. Minimum: 1 second.
53+
AccessCheckTimeoutSeconds int
54+
4955
// Deprecated flag (backward compatibility with pre-TLS version)
5056
deprecatedHTTPPort string
5157
}
@@ -56,6 +62,7 @@ func Load() *Config {
5662
gatewayName := env.GetString("GATEWAY_NAME", constant.DefaultGatewayName)
5763
secure, _ := env.GetBool("SECURE", false)
5864
maxExpirationDays, _ := env.GetInt("API_KEY_MAX_EXPIRATION_DAYS", constant.DefaultAPIKeyMaxExpirationDays)
65+
accessCheckTimeoutSeconds, _ := env.GetInt("ACCESS_CHECK_TIMEOUT_SECONDS", 15)
5966

6067
c := &Config{
6168
Name: env.GetString("INSTANCE_NAME", gatewayName),
@@ -69,6 +76,7 @@ func Load() *Config {
6976
DebugMode: debugMode,
7077
DBConnectionURL: "", // Loaded from K8s secret via LoadDatabaseURL()
7178
APIKeyMaxExpirationDays: maxExpirationDays,
79+
AccessCheckTimeoutSeconds: accessCheckTimeoutSeconds,
7280
// Deprecated env var (backward compatibility with pre-TLS version)
7381
deprecatedHTTPPort: env.GetString("PORT", ""),
7482
}
@@ -141,6 +149,10 @@ func (c *Config) Validate() error {
141149
return errors.New("API_KEY_MAX_EXPIRATION_DAYS must be at least 1")
142150
}
143151

152+
if c.AccessCheckTimeoutSeconds < 1 {
153+
return errors.New("ACCESS_CHECK_TIMEOUT_SECONDS must be at least 1")
154+
}
155+
144156
return nil
145157
}
146158

maas-api/internal/handlers/models_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ func TestListingModels(t *testing.T) {
320320
}
321321
router, _ := fixtures.SetupTestServer(t, config)
322322

323-
modelMgr, errMgr := models.NewManager(testLogger)
323+
modelMgr, errMgr := models.NewManager(testLogger, 15)
324324
require.NoError(t, errMgr)
325325

326326
// Set up test fixtures
@@ -435,7 +435,7 @@ func TestListingModelsWithSubscriptionHeader(t *testing.T) {
435435
}
436436
router, _ := fixtures.SetupTestServer(t, config)
437437

438-
modelMgr, errMgr := models.NewManager(testLogger)
438+
modelMgr, errMgr := models.NewManager(testLogger, 15)
439439
require.NoError(t, errMgr)
440440

441441
_, cleanup := fixtures.StubTokenProviderAPIs(t)
@@ -657,7 +657,7 @@ func TestListModels_ReturnAllModels(t *testing.T) {
657657
},
658658
}
659659

660-
modelMgr, err := models.NewManager(testLogger)
660+
modelMgr, err := models.NewManager(testLogger, 15)
661661
require.NoError(t, err)
662662

663663
subscriptionSelector := subscription.NewSelector(testLogger, subscriptionLister)
@@ -839,7 +839,7 @@ func TestListModels_DeduplicationBySubscription(t *testing.T) {
839839
},
840840
}
841841

842-
modelMgr, err := models.NewManager(testLogger)
842+
modelMgr, err := models.NewManager(testLogger, 15)
843843
require.NoError(t, err)
844844

845845
subscriptionSelector := subscription.NewSelector(testLogger, subscriptionLister)
@@ -950,7 +950,7 @@ func TestListModels_DifferentModelRefsWithSameModelID(t *testing.T) {
950950
},
951951
}
952952

953-
modelMgr, err := models.NewManager(testLogger)
953+
modelMgr, err := models.NewManager(testLogger, 15)
954954
require.NoError(t, err)
955955

956956
subscriptionSelector := subscription.NewSelector(testLogger, subscriptionLister)
@@ -1050,7 +1050,7 @@ func TestListModels_DifferentModelRefsWithSameURLAndModelID(t *testing.T) {
10501050
},
10511051
}
10521052

1053-
modelMgr, err := models.NewManager(testLogger)
1053+
modelMgr, err := models.NewManager(testLogger, 15)
10541054
require.NoError(t, err)
10551055

10561056
subscriptionSelector := subscription.NewSelector(testLogger, subscriptionLister)
@@ -1149,7 +1149,7 @@ func TestListModels_DifferentModelRefsWithSameModelIDAndDifferentSubscriptions(t
11491149
},
11501150
}
11511151

1152-
modelMgr, err := models.NewManager(testLogger)
1152+
modelMgr, err := models.NewManager(testLogger, 15)
11531153
require.NoError(t, err)
11541154

11551155
subscriptionSelector := subscription.NewSelector(testLogger, subscriptionLister)

maas-api/internal/models/discovery.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,27 +37,35 @@ const (
3737
httpIdleConnTimeout = 90 * time.Second
3838
maxDiscoveryConcurrency = 10
3939

40-
// accessCheckTimeout bounds the total duration of FilterModelsByAccess.
40+
// defaultAccessCheckTimeout bounds the total duration of FilterModelsByAccess.
4141
// This limits the staleness window between when access is checked and when
4242
// the response reaches the client. Models whose probes don't complete within
4343
// this window are excluded (fail-closed).
44-
accessCheckTimeout = 15 * time.Second
44+
defaultAccessCheckTimeout = 15 * time.Second
4545
)
4646

4747
// Manager runs access validation (probe model endpoints) for models listed from MaaSModelRef.
4848
type Manager struct {
49-
logger *logger.Logger
50-
httpClient *http.Client
49+
logger *logger.Logger
50+
httpClient *http.Client
51+
accessCheckTimeout time.Duration
5152
}
5253

5354
// NewManager creates a Manager for filtering models by access. The client uses InsecureSkipVerify
5455
// for cluster-internal probes; auth is enforced by the gateway/model server.
55-
func NewManager(log *logger.Logger) (*Manager, error) {
56+
// accessCheckTimeoutSeconds controls the total duration bound for access validation;
57+
// if <= 0, the default of 15 seconds is used.
58+
func NewManager(log *logger.Logger, accessCheckTimeoutSeconds int) (*Manager, error) {
5659
if log == nil {
5760
return nil, errors.New("log is required")
5861
}
62+
timeout := defaultAccessCheckTimeout
63+
if accessCheckTimeoutSeconds > 0 {
64+
timeout = time.Duration(accessCheckTimeoutSeconds) * time.Second
65+
}
5966
return &Manager{
60-
logger: log,
67+
logger: log,
68+
accessCheckTimeout: timeout,
6169
httpClient: &http.Client{
6270
Timeout: httpClientTimeout,
6371
Transport: &http.Transport{
@@ -88,7 +96,7 @@ func (m *Manager) FilterModelsByAccess(ctx context.Context, models []Model, auth
8896
}
8997

9098
// Bound the total access-check duration to limit the staleness window.
91-
ctx, cancel := context.WithTimeout(ctx, accessCheckTimeout)
99+
ctx, cancel := context.WithTimeout(ctx, m.accessCheckTimeout)
92100
defer cancel()
93101

94102
m.logger.Debug("FilterModelsByAccess: validating access for models", "count", len(models), "subscriptionHeaderProvided", subscriptionHeader != "")
@@ -228,7 +236,7 @@ func (m *Manager) fetchModelsWithRetry(ctx context.Context, authHeader string, s
228236
return lastResult != authRetry, nil
229237
}); err != nil {
230238
if errors.Is(err, context.DeadlineExceeded) || ctx.Err() == context.DeadlineExceeded {
231-
m.logger.Debug("Access validation failed: context deadline exceeded", "service", meta.ServiceName, "endpoint", meta.Endpoint, "timeout", accessCheckTimeout)
239+
m.logger.Debug("Access validation failed: context deadline exceeded", "service", meta.ServiceName, "endpoint", meta.Endpoint, "timeout", m.accessCheckTimeout)
232240
} else {
233241
m.logger.Debug("Access validation failed: model fetch backoff exhausted", "service", meta.ServiceName, "endpoint", meta.Endpoint, "error", err)
234242
}

0 commit comments

Comments
 (0)