-
Notifications
You must be signed in to change notification settings - Fork 60
chore: add SAR Admin cache #786
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3a1a1f6
192c530
580f9df
75b4a49
cb63f60
8b3c6c1
5694f11
8b4443f
4959fb7
a27b033
67695fd
45bd2cb
0ccc5eb
e42ad0a
2a2a1e4
f439e9f
c099a19
6f2c694
db1a715
e6be668
010360a
7969fb2
925364c
f0509d1
9723ba3
f554f27
487897a
8c1e001
4bb4753
c0537af
fddd44c
35579fa
f829dc7
d3e6368
527a703
5c4b78e
9f8fa45
3ea597b
1147a3b
0d8c6b5
c18a43e
9d9a764
6af9da2
cac09c5
03a8e73
f27c5a8
7361ec8
79e4466
4723d8f
6d65fe7
6fb985c
eb97650
fdac5f1
7113161
9939a59
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,162 @@ | ||
| package auth | ||
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "slices" | ||
| "strings" | ||
| "sync" | ||
| "time" | ||
|
|
||
| "github.com/prometheus/client_golang/prometheus" | ||
| "k8s.io/utils/clock" | ||
|
|
||
| "github.com/opendatahub-io/models-as-a-service/maas-api/internal/token" | ||
| ) | ||
|
|
||
| type adminChecker interface { | ||
| IsAdmin(ctx context.Context, user *token.UserContext) (bool, error) | ||
| } | ||
|
|
||
| type cacheEntry struct { | ||
| isAdmin bool | ||
| expiresAt time.Time | ||
| } | ||
|
|
||
| type CachedAdminChecker struct { | ||
| delegate adminChecker | ||
| ttl time.Duration | ||
| negativeTTL time.Duration | ||
| maxSize int | ||
| clock clock.Clock | ||
|
|
||
| mu sync.RWMutex | ||
| cache map[string]cacheEntry | ||
|
yu-teo marked this conversation as resolved.
|
||
|
|
||
| hits prometheus.Counter | ||
| misses prometheus.Counter | ||
| } | ||
|
|
||
| func NewCachedAdminChecker(delegate adminChecker, ttl time.Duration, negativeTTL time.Duration, maxSize int, reg prometheus.Registerer, clk clock.Clock) *CachedAdminChecker { | ||
| if delegate == nil { | ||
| panic("delegate cannot be nil for CachedAdminChecker") | ||
| } | ||
| if ttl <= 0 { | ||
| panic("ttl must be positive for CachedAdminChecker") | ||
| } | ||
| if negativeTTL <= 0 { | ||
| panic("negativeTTL must be positive for CachedAdminChecker") | ||
| } | ||
| if maxSize <= 0 { | ||
| panic("maxSize must be positive for CachedAdminChecker") | ||
| } | ||
| if reg == nil { | ||
| reg = prometheus.DefaultRegisterer | ||
| } | ||
| if clk == nil { | ||
| clk = clock.RealClock{} | ||
| } | ||
|
|
||
| return &CachedAdminChecker{ | ||
| delegate: delegate, | ||
| ttl: ttl, | ||
| negativeTTL: negativeTTL, | ||
| maxSize: maxSize, | ||
| clock: clk, | ||
| cache: make(map[string]cacheEntry), | ||
| hits: registerOrReuseCounter(reg, prometheus.NewCounter(prometheus.CounterOpts{ | ||
| Name: "sar_cache_hits_total", | ||
| Help: "Total number of SAR admin check cache hits.", | ||
| })), | ||
| misses: registerOrReuseCounter(reg, prometheus.NewCounter(prometheus.CounterOpts{ | ||
| Name: "sar_cache_misses_total", | ||
| Help: "Total number of SAR admin check cache misses.", | ||
| })), | ||
| } | ||
| } | ||
|
|
||
| func (c *CachedAdminChecker) IsAdmin(ctx context.Context, user *token.UserContext) (bool, error) { | ||
| if c == nil || user == nil || user.Username == "" { | ||
| return false, nil | ||
| } | ||
| if ctx.Err() != nil { | ||
| return false, ctx.Err() | ||
| } | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| key := cacheKey(user) | ||
| now := c.clock.Now() | ||
|
|
||
| c.mu.RLock() | ||
| entry, ok := c.cache[key] | ||
| c.mu.RUnlock() | ||
|
|
||
| if ok && now.Before(entry.expiresAt) { | ||
| c.hits.Inc() | ||
| return entry.isAdmin, nil | ||
| } | ||
|
|
||
| result, err := c.delegate.IsAdmin(ctx, user) | ||
|
|
||
| if err != nil || ctx.Err() != nil { | ||
| c.misses.Inc() | ||
| if err != nil { | ||
| return false, err | ||
| } | ||
| return false, ctx.Err() | ||
| } | ||
|
|
||
| ttl := c.ttl | ||
| if !result { | ||
| ttl = c.negativeTTL | ||
| } | ||
|
|
||
| c.mu.Lock() | ||
| c.evictExpiredLocked(now) | ||
| if len(c.cache) < c.maxSize { | ||
| c.cache[key] = cacheEntry{ | ||
| isAdmin: result, | ||
| expiresAt: now.Add(ttl), | ||
| } | ||
| } | ||
| c.mu.Unlock() | ||
|
Comment on lines
+89
to
+121
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Description: Verify x/sync is available and whether duplicate-miss suppression already exists elsewhere.
rg -nP 'golang\.org/x/sync|singleflight' -C3
rg -nP 'delegate\.IsAdmin\s*\(|type CachedAdminChecker' -C3Repository: opendatahub-io/models-as-a-service Length of output: 5962 🏁 Script executed: cat -n maas-api/internal/auth/cached_admin_checker.goRepository: opendatahub-io/models-as-a-service Length of output: 3420 Use singleflight to suppress duplicate cache misses for the same key. The RLock is released at line 75 before the delegate call at line 82. Concurrent requests for the same key with cache misses or expired entries will all call Remediation sketchAdd type CachedAdminChecker struct {
delegate adminChecker
ttl time.Duration
now func() time.Time
+ inflight singleflight.Group
mu sync.RWMutex
cache map[string]cacheEntry- result := c.delegate.IsAdmin(ctx, user)
+ value, _, _ := c.inflight.Do(key, func() (any, error) {
+ // Re-check after waiting for any earlier caller.
+ now := c.now()
+ c.mu.RLock()
+ entry, ok := c.cache[key]
+ c.mu.RUnlock()
+ if ok && now.Before(entry.expiresAt) {
+ c.hits.Inc()
+ return entry.isAdmin, nil
+ }
+
+ result := c.delegate.IsAdmin(ctx, user)
+ c.mu.Lock()
+ c.cache[key] = cacheEntry{isAdmin: result, expiresAt: now.Add(c.ttl)}
+ c.evictExpiredLocked(now)
+ c.mu.Unlock()
+ c.misses.Inc()
+ return result, nil
+ })
+ return value.(bool)
-
- c.mu.Lock()
- c.cache[key] = cacheEntry{
- isAdmin: result,
- expiresAt: now.Add(c.ttl),
- }
- c.evictExpiredLocked(now)
- c.mu.Unlock()
-
- c.misses.Inc()
- return result🤖 Prompt for AI Agents
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This finding doesn't need a fix. The current behavior (two concurrent misses for the same user both make SAR calls) was a deliberate design choice, not an oversight.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| c.misses.Inc() | ||
| return result, nil | ||
| } | ||
|
|
||
| //nolint:ireturn,nonamedreturns // Prometheus counters are inherently interface-typed; named returns clarify which is which. | ||
| func (c *CachedAdminChecker) Metrics() (hits, misses prometheus.Counter) { | ||
| return c.hits, c.misses | ||
| } | ||
|
|
||
| func (c *CachedAdminChecker) evictExpiredLocked(now time.Time) { | ||
| for k, v := range c.cache { | ||
| if now.After(v.expiresAt) { | ||
| delete(c.cache, k) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| //nolint:ireturn // prometheus.Counter is the canonical type for counters. | ||
| func registerOrReuseCounter(reg prometheus.Registerer, c prometheus.Counter) prometheus.Counter { | ||
| err := reg.Register(c) | ||
| if err == nil { | ||
| return c | ||
| } | ||
| var are prometheus.AlreadyRegisteredError | ||
| if errors.As(err, &are) { | ||
| existing, ok := are.ExistingCollector.(prometheus.Counter) | ||
| if !ok { | ||
| panic("existing collector is not a Counter") | ||
| } | ||
| return existing | ||
| } | ||
| panic(err) | ||
| } | ||
|
|
||
| func cacheKey(user *token.UserContext) string { | ||
| sorted := make([]string, len(user.Groups)) | ||
| copy(sorted, user.Groups) | ||
| slices.Sort(sorted) | ||
| return user.Username + "\x00" + strings.Join(sorted, "\x00") | ||
|
Comment on lines
+157
to
+161
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make the authorization cache key unambiguous. CWE-20 / CWE-863. NUL-delimited concatenation can collide if token-derived usernames or groups contain Remediation sketch import (
"context"
+ "strconv"
"slices"
"strings"
"sync"
"time"
@@
func cacheKey(user *token.UserContext) string {
sorted := make([]string, len(user.Groups))
copy(sorted, user.Groups)
slices.Sort(sorted)
- return user.Username + "\x00" + strings.Join(sorted, "\x00")
+
+ var b strings.Builder
+ writePart := func(value string) {
+ b.WriteString(strconv.Itoa(len(value)))
+ b.WriteByte(':')
+ b.WriteString(value)
+ b.WriteByte('|')
+ }
+ writePart(user.Username)
+ for _, group := range sorted {
+ writePart(group)
+ }
+ return b.String()
}As per coding guidelines, “REVIEW PRIORITIES: 1. Security vulnerabilities.” 🤖 Prompt for AI Agents
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This finding doesn't need a fix. Kubernetes usernames and group names cannot contain NUL bytes — they are validated by the API server and follow DNS/RBAC naming conventions (alphanumeric, -, ., @, :). A NUL separator is unambiguous for this domain.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
One nuance worth noting for the record: groups, like users, are represented by a string, and that string has no format requirements, other than that the prefix That said, if the identity providers in this deployment are known to produce well-constrained strings (OpenShift OAuth, etc.), the NUL separator is indeed safe in practice, and the complexity trade-off of length-prefixed encoding is not justified. I'll drop the finding.
|
||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gate
/metrics; it is currently public on the API router.CWE-200. Any caller that can reach this API can scrape process/runtime metrics and SAR cache hit/miss activity. Exploit scenario: a tenant or external client polls
/metricsto infer admin-check traffic patterns and service internals. Put this route on an internal-only listener or require a Prometheus scrape credential.Remediation sketch
As per coding guidelines, “REVIEW PRIORITIES: 1. Security vulnerabilities.”
🤖 Prompt for AI Agents
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This finding doesn't need a fix. The /metrics endpoint is placed alongside /health on the main router, but in production this service runs inside a Kubernetes cluster where access is controlled at the network level via NetworkPolicy — the same pattern already used for the /internal/v1/* routes (lines 178-182). Those internal routes have no auth middleware either; the comment on line 177 says // Internal routes (no auth required - called by Authorino / CronJob).
Adding a scrape token would complicate the Prometheus scrape config for no real security gain — the network boundary is the access control layer. This is the standard pattern in Kubernetes-native services.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@yu-teo, understood — NetworkPolicy as the enforcement boundary for internal/metrics endpoints is a standard Kubernetes-native pattern, and the existing/internal/v1/*routes already establish that precedent in this codebase. I'll note this for future reviews.( (
\ ( \
\ \
\ \\
\ '.\
\ \
\ \
.-' \
/ \ ~ curious about your setup ~
`--.______\