Skip to content

Commit 56291b5

Browse files
committed
kubeapiserver auditloganalyzer: spot handler panics in audit log
Don't let any useragent cause too many panics in apiserver
1 parent e440b10 commit 56291b5

File tree

3 files changed

+168
-2
lines changed

3 files changed

+168
-2
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
package auditloganalyzer
2+
3+
import (
4+
"fmt"
5+
"sort"
6+
"strings"
7+
"sync"
8+
"time"
9+
10+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11+
"k8s.io/apimachinery/pkg/types"
12+
"k8s.io/apimachinery/pkg/util/sets"
13+
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
14+
)
15+
16+
type panicEvent struct {
17+
auditID types.UID
18+
timestamp time.Time
19+
}
20+
21+
func (p panicEvent) String() string {
22+
return fmt.Sprintf("auditID %s at %s", p.auditID, p.timestamp.String())
23+
}
24+
25+
type PanicEventByTimestamp []panicEvent
26+
27+
func (n PanicEventByTimestamp) Len() int {
28+
return len(n)
29+
}
30+
func (n PanicEventByTimestamp) Swap(i, j int) {
31+
n[i], n[j] = n[j], n[i]
32+
}
33+
func (n PanicEventByTimestamp) Less(i, j int) bool {
34+
diff := n[i].timestamp.Compare(n[j].timestamp)
35+
switch {
36+
case diff < 0:
37+
return true
38+
case diff > 0:
39+
return false
40+
}
41+
42+
return strings.Compare(string(n[i].auditID), string(n[j].auditID)) < 0
43+
}
44+
45+
type panicEventsForEndpoint struct {
46+
panicEvents map[string]sets.Set[panicEvent]
47+
}
48+
49+
func NewPanicEventsForEndpoint() panicEventsForEndpoint {
50+
return panicEventsForEndpoint{
51+
panicEvents: make(map[string]sets.Set[panicEvent]),
52+
}
53+
}
54+
55+
func (p panicEventsForEndpoint) Insert(endpoint string, pe panicEvent) {
56+
events, ok := p.panicEvents[endpoint]
57+
if !ok {
58+
events = sets.New[panicEvent]()
59+
}
60+
events.Insert(pe)
61+
p.panicEvents[endpoint] = events
62+
}
63+
64+
func (p panicEventsForEndpoint) String() string {
65+
result := ""
66+
for endpoint, events := range p.panicEvents {
67+
sortedEvents := events.UnsortedList()
68+
sort.Sort(PanicEventByTimestamp(sortedEvents))
69+
eventsAsStrings := []string{}
70+
for _, event := range sortedEvents {
71+
eventsAsStrings = append(eventsAsStrings, event.String())
72+
}
73+
eventString := fmt.Sprintf(" %s", strings.Join(eventsAsStrings, "\n "))
74+
result = fmt.Sprintf("%s\nFound %d panics for endpoint %q:\n%s", result, len(events), endpoint, eventString)
75+
}
76+
return result
77+
}
78+
79+
func (p panicEventsForEndpoint) Len() int {
80+
sum := 0
81+
for _, endpoints := range p.panicEvents {
82+
sum += endpoints.Len()
83+
}
84+
return sum
85+
}
86+
87+
type panicEventsForUserAgent struct {
88+
panicEvents map[string]panicEventsForEndpoint
89+
}
90+
91+
func NewPanicEventsForUserAgent() panicEventsForUserAgent {
92+
return panicEventsForUserAgent{
93+
panicEvents: make(map[string]panicEventsForEndpoint),
94+
}
95+
}
96+
97+
func (p panicEventsForUserAgent) Insert(useragent string, endpoint string, pe panicEvent) {
98+
events, ok := p.panicEvents[useragent]
99+
if !ok {
100+
events = NewPanicEventsForEndpoint()
101+
}
102+
events.Insert(endpoint, pe)
103+
p.panicEvents[useragent] = events
104+
}
105+
106+
type apiserverPaniced struct {
107+
lock sync.Mutex
108+
panicEventsPerUserAgent panicEventsForUserAgent
109+
}
110+
111+
func CheckForApiserverPaniced() *apiserverPaniced {
112+
return &apiserverPaniced{
113+
panicEventsPerUserAgent: NewPanicEventsForUserAgent(),
114+
}
115+
}
116+
117+
func (s *apiserverPaniced) HandleAuditLogEvent(auditEvent *auditv1.Event, beginning, end *metav1.MicroTime) {
118+
if beginning != nil && auditEvent.RequestReceivedTimestamp.Before(beginning) || end != nil && end.Before(&auditEvent.RequestReceivedTimestamp) {
119+
return
120+
}
121+
122+
if auditEvent.ResponseStatus == nil {
123+
return
124+
}
125+
if auditEvent.ResponseStatus.Code != 500 {
126+
return
127+
}
128+
129+
s.lock.Lock()
130+
defer s.lock.Unlock()
131+
132+
pe := panicEvent{
133+
auditID: auditEvent.AuditID,
134+
timestamp: auditEvent.RequestReceivedTimestamp.Time,
135+
}
136+
s.panicEventsPerUserAgent.Insert(auditEvent.UserAgent, auditEvent.RequestURI, pe)
137+
}

pkg/monitortests/kubeapiserver/auditloganalyzer/handle_excessive_applies.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
package auditloganalyzer
22

33
import (
4+
"strings"
5+
"sync"
6+
47
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
58
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
69
"k8s.io/apiserver/pkg/authentication/serviceaccount"
7-
"strings"
8-
"sync"
910
)
1011

1112
type excessiveApplies struct {

pkg/monitortests/kubeapiserver/auditloganalyzer/monitortest.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@ type auditLogAnalyzer struct {
1919

2020
summarizer *summarizer
2121
excessiveApplyChecker *excessiveApplies
22+
apiserverPaniced *apiserverPaniced
2223
}
2324

2425
func NewAuditLogAnalyzer() monitortestframework.MonitorTest {
2526
return &auditLogAnalyzer{
2627
summarizer: NewAuditLogSummarizer(),
2728
excessiveApplyChecker: CheckForExcessiveApplies(),
29+
apiserverPaniced: CheckForApiserverPaniced(),
2830
}
2931
}
3032

@@ -42,6 +44,7 @@ func (w *auditLogAnalyzer) CollectData(ctx context.Context, storageDir string, b
4244
auditLogHandlers := []AuditEventHandler{
4345
w.summarizer,
4446
w.excessiveApplyChecker,
47+
w.apiserverPaniced,
4548
}
4649
err = GetKubeAuditLogSummary(ctx, kubeClient, &beginning, &end, auditLogHandlers)
4750

@@ -122,6 +125,31 @@ func (w *auditLogAnalyzer) EvaluateTestsFromConstructedIntervals(ctx context.Con
122125

123126
}
124127

128+
for userAgent, userAgentPanics := range w.apiserverPaniced.panicEventsPerUserAgent.panicEvents {
129+
testName := fmt.Sprintf("user %s must not produce too many apiserver handler panics", userAgent)
130+
131+
failures := []string{}
132+
if userAgentPanics.Len() > 5 {
133+
failures := append(failures, userAgentPanics.String())
134+
ret = append(ret,
135+
&junitapi.JUnitTestCase{
136+
Name: testName,
137+
FailureOutput: &junitapi.FailureOutput{
138+
Message: strings.Join(failures, "\n"),
139+
Output: "details in audit log",
140+
},
141+
},
142+
)
143+
} else {
144+
ret = append(ret,
145+
&junitapi.JUnitTestCase{
146+
Name: testName,
147+
},
148+
)
149+
}
150+
151+
}
152+
125153
return ret, nil
126154
}
127155

0 commit comments

Comments
 (0)