Skip to content

Commit d1a2c36

Browse files
authored
Merge pull request #255 from klihub/fixes/release-0.8/refresh-sigsegv-with-nil-watcher
[release/0.8] don't crash in cache refresh/update with nil fsnotify watcher.
2 parents e665440 + 693c957 commit d1a2c36

File tree

2 files changed

+245
-0
lines changed

2 files changed

+245
-0
lines changed

pkg/cdi/cache.go

+8
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,14 @@ func (w *watch) update(dirErrors map[string]error, removed ...string) bool {
564564
update bool
565565
)
566566

567+
// If we failed to create an fsnotify.Watcher we have a nil watcher here
568+
// (but with autoRefresh left on). One known case when this can happen is
569+
// if we have too many open files. In that case we always return true and
570+
// force a refresh.
571+
if w.watcher == nil {
572+
return true
573+
}
574+
567575
for dir, ok = range w.tracked {
568576
if ok {
569577
continue

pkg/cdi/cache_linux_test.go

+237
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
/*
2+
Copyright © The CDI Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package cdi
18+
19+
import (
20+
"fmt"
21+
"os"
22+
"path/filepath"
23+
"strconv"
24+
"strings"
25+
"syscall"
26+
"testing"
27+
28+
oci "github.com/opencontainers/runtime-spec/specs-go"
29+
"github.com/stretchr/testify/require"
30+
)
31+
32+
func TestTooManyOpenFiles(t *testing.T) {
33+
em, err := triggerEmfile()
34+
require.NoError(t, err)
35+
require.NotNil(t, em)
36+
defer func() {
37+
require.NoError(t, em.undo())
38+
}()
39+
40+
_, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_DGRAM, 0)
41+
require.Equal(t, syscall.EMFILE, err)
42+
43+
cache := newCache(
44+
WithAutoRefresh(true),
45+
)
46+
require.NotNil(t, cache)
47+
48+
// try to trigger original crash with a nil fsnotify.Watcher
49+
_, _ = cache.InjectDevices(&oci.Spec{}, "vendor1.com/device=dev1")
50+
}
51+
52+
func TestRecoveryAfterTooManyOpenFiles(t *testing.T) {
53+
var (
54+
etcDir = map[string]string{
55+
"vendor1.yaml": `
56+
cdiVersion: "0.3.0"
57+
kind: "vendor1.com/device"
58+
containerEdits:
59+
env:
60+
- VENDOR1_SPEC_VAR1=VAL1
61+
devices:
62+
- name: "dev1"
63+
containerEdits:
64+
env:
65+
- "VENDOR1_VAR1=VAL1"
66+
deviceNodes:
67+
- path: "/dev/vendor1-dev1"
68+
type: b
69+
major: 10
70+
minor: 1
71+
`,
72+
}
73+
74+
devices = []string{
75+
"vendor1.com/device=dev1",
76+
}
77+
78+
ociSpec = &oci.Spec{}
79+
80+
resultingSpec = &oci.Spec{
81+
Process: &oci.Process{
82+
Env: []string{
83+
"VENDOR1_SPEC_VAR1=VAL1",
84+
"VENDOR1_VAR1=VAL1",
85+
},
86+
},
87+
Linux: &oci.Linux{
88+
Devices: []oci.LinuxDevice{
89+
{
90+
Path: "/dev/vendor1-dev1",
91+
Type: "b",
92+
Major: 10,
93+
Minor: 1,
94+
},
95+
},
96+
Resources: &oci.LinuxResources{
97+
Devices: []oci.LinuxDeviceCgroup{
98+
{
99+
Allow: true,
100+
Type: "b",
101+
Major: int64ptr(10),
102+
Minor: int64ptr(1),
103+
Access: "rwm",
104+
},
105+
},
106+
},
107+
},
108+
}
109+
)
110+
111+
dir, err := createSpecDirs(t, etcDir, nil)
112+
require.NoError(t, err, "failed to create test directory")
113+
114+
// trigger EMFILE for fd creation: exhaust our file descriptor table
115+
em, err := triggerEmfile()
116+
require.NoError(t, err)
117+
require.NotNil(t, em)
118+
defer func() {
119+
require.NoError(t, em.undo())
120+
}()
121+
122+
_, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_DGRAM, 0)
123+
require.Equal(t, syscall.EMFILE, err)
124+
125+
cache := newCache(
126+
WithSpecDirs(
127+
filepath.Join(dir, "etc"),
128+
),
129+
WithAutoRefresh(true),
130+
)
131+
require.NotNil(t, cache)
132+
133+
// try to trigger original crash with a nil fsnotify.Watcher
134+
_, _ = cache.InjectDevices(&oci.Spec{}, devices...)
135+
136+
// undo EMFILE for fd creation
137+
require.NoError(t, em.undo())
138+
139+
// verify that injection works again
140+
unresolved, err := cache.InjectDevices(ociSpec, devices...)
141+
require.NoError(t, err)
142+
require.Nil(t, unresolved)
143+
require.Equal(t, resultingSpec, ociSpec)
144+
}
145+
146+
type emfile struct {
147+
limit syscall.Rlimit
148+
fds []int
149+
undone bool
150+
}
151+
152+
// getFdTableSize reads the process' FD table size.
153+
func getFdTableSize() (uint64, error) {
154+
status, err := os.ReadFile("/proc/self/status")
155+
if err != nil {
156+
return 0, err
157+
}
158+
159+
const fdSizeTag = "FDSize:"
160+
161+
for _, line := range strings.Split(string(status), "\n") {
162+
if strings.HasPrefix(line, fdSizeTag) {
163+
value := strings.TrimSpace(strings.TrimPrefix(line, fdSizeTag))
164+
size, err := strconv.ParseUint(value, 10, 64)
165+
if err != nil {
166+
return 0, err
167+
}
168+
return size, nil
169+
}
170+
}
171+
172+
return 0, fmt.Errorf("tag %s not found in /proc/self/status", fdSizeTag)
173+
}
174+
175+
// triggerEmfile exhausts the file descriptors of the process and triggers
176+
// a failure with an EMFILE for any syscall that needs to create a new fd.
177+
// On success, the returned emfile's undo() method can be used to undo the
178+
// exhausted table and restore everything to a working state.
179+
func triggerEmfile() (*emfile, error) {
180+
// We exhaust our file descriptors by
181+
// - checking the size of our current fd table
182+
// - setting our soft RLIMIT_NOFILE limit to the table size
183+
// - ensuring the fd table is full by creating new fd's
184+
//
185+
// We also save our original RLIMIT_NOFILE limit and any fd's we
186+
// might need to create, so we can eventually restore everything
187+
// to its original state.
188+
189+
fdsize, err := getFdTableSize()
190+
if err != nil {
191+
return nil, err
192+
}
193+
194+
em := &emfile{}
195+
196+
if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &em.limit); err != nil {
197+
return nil, err
198+
}
199+
200+
limit := em.limit
201+
limit.Cur = fdsize
202+
203+
if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, &limit); err != nil {
204+
return nil, err
205+
}
206+
207+
for i := uint64(0); i < fdsize; i++ {
208+
fd, err := syscall.Socket(syscall.AF_INET, syscall.SOCK_DGRAM, 0)
209+
if err != nil {
210+
return em, nil
211+
}
212+
em.fds = append(em.fds, fd)
213+
}
214+
215+
return nil, fmt.Errorf("failed to trigger EMFILE")
216+
}
217+
218+
// undo restores the process' state to its pre-EMFILE condition.
219+
func (em *emfile) undo() error {
220+
if em == nil || em.undone {
221+
return nil
222+
}
223+
224+
// we restore the process' state to pre-EMFILE condition by
225+
// - restoring our saved RLIMIT_NOFILE
226+
// - closing any extra file descriptors we might have created
227+
228+
if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, &em.limit); err != nil {
229+
return err
230+
}
231+
for _, fd := range em.fds {
232+
syscall.Close(fd)
233+
}
234+
em.undone = true
235+
236+
return nil
237+
}

0 commit comments

Comments
 (0)