-
Notifications
You must be signed in to change notification settings - Fork 123
Open
Description
hi.
i'm wondering if the current csi approach is not relying too much on end user pod customization.
Moreover it requires pods to run in privileged modes which may not be suitable in most k8s environments.
what would you think a a new approach where the peerpod vm would embed a new systemd agent service responsible to mount the volumes in the containers, according to pod's specs?
i've managed to do a proof of concept which works this way:
the cloud-api-adaptor generates a volumes.json spec file passed to the VM during cloudinit.
podVolumes, _ := s.volumeService.GetVolumesForPod(ctx, podNamespace, podName)
volumesConfig := volumes.VolumesConfig{
Volumes: make([]volumes.VolumeConfig, len(podVolumes)),
}
for i, vol := range podVolumes {
volumesConfig.Volumes[i] = volumes.VolumeConfig{
VolumeID: vol.VolumeID,
VolumeName: vol.VolumeName,
FSType: vol.FSType,
MountPath: vol.TargetPath,
StagingTargetPath: vol.StagingTargetPath,
MountOptions: vol.MountOptions,
ReadOnly: vol.ReadOnly,
FSGroup: fsGroup,
}
}
volumesJSON, _ := json.MarshalIndent(volumesConfig, "", " ")
cloudConfig.WriteFiles = append(cloudConfig.WriteFiles, cloudinit.WriteFile{
Path: "/run/peerpod/volumes.json",
Content: string(volumesJSON),
})which can then be consumed by the new peerpod-volume-agent
src/cloud-api-adaptor/cmd/peerpod-volume-agent/
1
2 // SPDX-License-Identifier: Apache-2.0
3
4 package main
5
6 import (
7 "encoding/json"
8 "fmt"
9 "log"
10 "os"
11 "os/exec"
12 "path/filepath"
13 "strings"
14 "time"
15
16 "github.com/confidential-containers/cloud-api-adaptor/src/cloud-api-adaptor/pkg/paths"
17 "github.com/confidential-containers/cloud-api-adaptor/src/cloud-api-adaptor/pkg/volumes"
18 )
19
20 var logger = log.New(os.Stdout, "[peerpod-volume-agent] ", log.LstdFlags|log.Lmsgprefix)
21
22 const (
23 // Maximum time to wait for a device to appear
24 deviceWaitTimeout = 5 * time.Minute
25 // Poll interval when waiting for devices
26 devicePollInterval = 2 * time.Second
27 )
28
29 func main() {
30 logger.Println("Starting peerpod-volume-agent")
31
32 // Check if volumes config exists
33 if _, err := os.Stat(paths.VolumesConfigPath); os.IsNotExist(err) {
34 logger.Printf("No volume config found at %s, nothing to do", paths.VolumesConfigPath)
35 signalReady()
36 return
37 }
38
39 // Load volume configuration
40 config, err := loadVolumeConfig(paths.VolumesConfigPath)
41 if err != nil {
42 logger.Fatalf("Failed to load volume config: %v", err)
43 }
44
45 if len(config.Volumes) == 0 {
46 logger.Println("No volumes configured, nothing to do")
47 signalReady()
48 return
49 }
50
51 logger.Printf("Processing %d volume(s)", len(config.Volumes))
52
53 // Ensure mount directory exists
54 if err := os.MkdirAll(paths.VolumesMountDir, 0755); err != nil {
55 logger.Fatalf("Failed to create mount directory %s: %v", paths.VolumesMountDir, err)
56 }
57
58 // Process each volume
59 for i, vol := range config.Volumes {
60 logger.Printf("Processing volume %d/%d: %s", i+1, len(config.Volumes), vol.VolumeID)
61
62 if err := processVolume(vol); err != nil {
63 logger.Fatalf("Failed to process volume %s: %v", vol.VolumeID, err)
64 }
65 }
66
67 logger.Println("All volumes processed successfully")
68 signalReady()
69 }
70
71 func loadVolumeConfig(configPath string) (*volumes.VolumesConfig, error) {
72 data, err := os.ReadFile(configPath)
73 if err != nil {
74 return nil, fmt.Errorf("failed to read config file: %w", err)
75 }
76
77 var config volumes.VolumesConfig
78 if err := json.Unmarshal(data, &config); err != nil {
79 return nil, fmt.Errorf("failed to parse config: %w", err)
80 }
81
82 return &config, nil
83 }
84
85 func processVolume(vol volumes.VolumeConfig) error {
86 // Wait for device to appear
87 devicePath, err := waitForDevice(vol.VolumeID, vol.DevicePath)
88 if err != nil {
89 return fmt.Errorf("failed waiting for device: %w", err)
90 }
91 logger.Printf("Found device %s for volume %s", devicePath, vol.VolumeID)
92
93 // Ensure filesystem exists
94 if err := ensureFilesystem(devicePath, vol.FSType); err != nil {
95 return fmt.Errorf("failed to ensure filesystem: %w", err)
96 }
97
98 // Mount to staging path
99 stagingPath := filepath.Join(paths.VolumesMountDir, vol.VolumeID)
100 if err := mountVolume(devicePath, stagingPath, vol.FSType, vol.MountOptions, vol.ReadOnly); err != nil {
101 return fmt.Errorf("failed to mount volume: %w", err)
102 }
103 logger.Printf("Mounted volume %s at %s", vol.VolumeID, stagingPath)
104
105 // Set permissions on the mount point to allow container access
106 // This mimics kubelet behavior with fsGroup
107 if !vol.ReadOnly {
108 if vol.FSGroup != nil {
109 // fsGroup is set - chown to root:<fsGroup> and chmod 0775
110 // This allows processes running as that group to read/write
111 gid := int(*vol.FSGroup)
112 if err := os.Chown(stagingPath, 0, gid); err != nil {
113 logger.Printf("Warning: failed to chown mount point to gid %d: %v", gid, err)
114 } else {
115 logger.Printf("Set ownership root:%d on %s", gid, stagingPath)
116 }
117 if err := os.Chmod(stagingPath, 0775); err != nil {
118 logger.Printf("Warning: failed to chmod mount point: %v", err)
119 } else {
120 logger.Printf("Set permissions 0775 on %s", stagingPath)
121 }
122 } else {
123 // No fsGroup - set world-writable as fallback
124 if err := os.Chmod(stagingPath, 0777); err != nil {
125 logger.Printf("Warning: failed to chmod mount point: %v", err)
126 } else {
127 logger.Printf("Set permissions 0777 on %s (no fsGroup)", stagingPath)
128 }
129 }
130 }
131
132 // Create bind mount at the expected kubelet path (what kata-agent expects)
133 if vol.MountPath != "" {
134 if err := bindMount(stagingPath, vol.MountPath, vol.ReadOnly); err != nil {
135 return fmt.Errorf("failed to create bind mount at %s: %w", vol.MountPath, err)
136 }
137 logger.Printf("Created bind mount from %s to %s", stagingPath, vol.MountPath)
138 }
139
140 return nil
141 }
142
143 // waitForDevice waits for a block device to appear by volume ID or explicit path
144 func waitForDevice(volumeID, explicitPath string) (string, error) {
145 deadline := time.Now().Add(deviceWaitTimeout)
146
147 for time.Now().Before(deadline) {
148 // If explicit path is provided, check it first
149 if explicitPath != "" {
150 if _, err := os.Stat(explicitPath); err == nil {
151 return explicitPath, nil
152 }
153 }
154
155 // Try to find device by volume ID in /dev/disk/by-id/
156 // OpenStack/Cinder uses virtio-<volumeID[:20]> format
157 devicePath, err := findDeviceByVolumeID(volumeID)
158 if err == nil && devicePath != "" {
159 return devicePath, nil
160 }
161
162 logger.Printf("Waiting for device for volume %s...", volumeID)
163 time.Sleep(devicePollInterval)
164 }
165
166 return "", fmt.Errorf("timeout waiting for device for volume %s", volumeID)
167 }
168
169 // findDeviceByVolumeID searches for a device by volume ID in /dev/disk/by-id/
170 func findDeviceByVolumeID(volumeID string) (string, error) {
171 byIDPath := "/dev/disk/by-id"
172
173 entries, err := os.ReadDir(byIDPath)
174 if err != nil {
175 if os.IsNotExist(err) {
176 return "", nil // Directory doesn't exist yet
177 }
178 return "", err
179 }
180
181 // OpenStack/Cinder typically uses the first 20 chars of volume ID
182 // Format: virtio-<volumeID[:20]>
183 shortID := volumeID
184 if len(shortID) > 20 {
185 shortID = shortID[:20]
186 }
187
188 // Also check without dashes (some hypervisors strip them)
189 shortIDNoDash := strings.ReplaceAll(shortID, "-", "")
190
191 for _, entry := range entries {
192 name := entry.Name()
193
194 // Check for virtio-<volumeID> pattern
195 if strings.HasPrefix(name, "virtio-") {
196 suffix := strings.TrimPrefix(name, "virtio-")
197 if strings.HasPrefix(suffix, shortID) || strings.HasPrefix(suffix, shortIDNoDash) {
198 linkPath := filepath.Join(byIDPath, name)
199 // Resolve the symlink to get actual device path
200 realPath, err := filepath.EvalSymlinks(linkPath)
201 if err != nil {
202 continue
203 }
204 return realPath, nil
205 }
206 }
207
208 // Check for scsi- pattern (some cloud providers use this)
209 if strings.HasPrefix(name, "scsi-") && strings.Contains(name, shortID) {
210 linkPath := filepath.Join(byIDPath, name)
211 realPath, err := filepath.EvalSymlinks(linkPath)
212 if err != nil {
213 continue
214 }
215 return realPath, nil
216 }
217 }
218
219 return "", nil
220 }
221
222 // ensureFilesystem checks if a filesystem exists on the device and creates one if needed
223 func ensureFilesystem(devicePath, fsType string) error {
224 if fsType == "" {
225 fsType = "ext4"
226 }
227
228 // Check if device already has a filesystem using blkid
229 cmd := exec.Command("blkid", "-o", "value", "-s", "TYPE", devicePath)
230 output, err := cmd.Output()
231 if err == nil && len(output) > 0 {
232 existingFS := strings.TrimSpace(string(output))
233 logger.Printf("Device %s already has filesystem: %s", devicePath, existingFS)
234 return nil
235 }
236
237 // No filesystem found, create one
238 logger.Printf("Creating %s filesystem on %s", fsType, devicePath)
239
240 var mkfsCmd *exec.Cmd
241 switch fsType {
242 case "ext4":
243 mkfsCmd = exec.Command("mkfs.ext4", "-F", devicePath)
244 case "xfs":
245 mkfsCmd = exec.Command("mkfs.xfs", "-f", devicePath)
246 case "ext3":
247 mkfsCmd = exec.Command("mkfs.ext3", "-F", devicePath)
248 default:
249 return fmt.Errorf("unsupported filesystem type: %s", fsType)
250 }
251
252 mkfsCmd.Stdout = os.Stdout
253 mkfsCmd.Stderr = os.Stderr
254
255 if err := mkfsCmd.Run(); err != nil {
256 return fmt.Errorf("mkfs failed: %w", err)
257 }
258
259 logger.Printf("Created %s filesystem on %s", fsType, devicePath)
260 return nil
261 }
262
263 // mountVolume mounts a device to the specified path
264 func mountVolume(devicePath, mountPath, fsType string, options []string, readOnly bool) error {
265 // Create mount point directory
266 if err := os.MkdirAll(mountPath, 0755); err != nil {
267 return fmt.Errorf("failed to create mount point: %w", err)
268 }
269
270 // Check if already mounted
271 if isMounted(mountPath) {
272 logger.Printf("Path %s is already mounted", mountPath)
273 return nil
274 }
275
276 // Build mount options
277 mountOpts := []string{}
278 if readOnly {
279 mountOpts = append(mountOpts, "ro")
280 }
281 mountOpts = append(mountOpts, options...)
282
283 // Build mount command
284 args := []string{"-t", fsType}
285 if len(mountOpts) > 0 {
286 args = append(args, "-o", strings.Join(mountOpts, ","))
287 }
288 args = append(args, devicePath, mountPath)
289
290 cmd := exec.Command("mount", args...)
291 cmd.Stdout = os.Stdout
292 cmd.Stderr = os.Stderr
293
294 if err := cmd.Run(); err != nil {
295 return fmt.Errorf("mount failed: %w", err)
296 }
297
298 return nil
299 }
300
301 // isMounted checks if a path is a mount point
302 func isMounted(path string) bool {
303 cmd := exec.Command("mountpoint", "-q", path)
304 return cmd.Run() == nil
305 }
306
307 // bindMount creates a bind mount from source to target
308 func bindMount(source, target string, readOnly bool) error {
309 // Create target directory (and all parent directories)
310 if err := os.MkdirAll(target, 0755); err != nil {
311 return fmt.Errorf("failed to create target directory: %w", err)
312 }
313
314 // Check if already mounted
315 if isMounted(target) {
316 logger.Printf("Path %s is already mounted", target)
317 return nil
318 }
319
320 // Build mount options
321 opts := "bind"
322 if readOnly {
323 opts = "bind,ro"
324 }
325
326 cmd := exec.Command("mount", "--bind", source, target)
327 cmd.Stdout = os.Stdout
328 cmd.Stderr = os.Stderr
329
330 if err := cmd.Run(); err != nil {
331 return fmt.Errorf("bind mount failed: %w", err)
332 }
333
334 // If read-only, remount with ro option
335 if readOnly {
336 cmd = exec.Command("mount", "-o", "remount,ro,bind", target)
337 cmd.Stdout = os.Stdout
338 cmd.Stderr = os.Stderr
339 if err := cmd.Run(); err != nil {
340 return fmt.Errorf("failed to remount read-only: %w", err)
341 }
342 }
343
344 _ = opts // suppress unused variable warning
345 return nil
346 }
347
348 // signalReady creates the ready file to signal that all volumes are mounted
349 func signalReady() {
350 // Create parent directory if needed
351 if err := os.MkdirAll(filepath.Dir(paths.VolumesReadyPath), 0755); err != nil {
352 logger.Printf("Warning: failed to create ready file directory: %v", err)
353 return
354 }
355
356 if err := os.WriteFile(paths.VolumesReadyPath, []byte("ready\n"), 0644); err != nil {
357 logger.Printf("Warning: failed to write ready file: %v", err)
358 return
359 }
360
361 logger.Printf("Signaled ready at %s", paths.VolumesReadyPath)
362 } src/cloud-api-adaptor/podvm/files/etc/systemd/system/multi-user.target.wants/peerpod-volume-agent.service
src/cloud-api-adaptor/podvm/files/etc/systemd/system/peerpod-volume-agent.service
[Unit]
Description=Peer Pod Volume Agent
Documentation=https://github.com/confidential-containers/cloud-api-adaptor
After=network.target cloud-init.service process-user-data.service
Before=kata-agent.service
ConditionPathExists=/run/peerpod/volumes.json
[Service]
Type=oneshot
ExecStart=/usr/local/bin/peerpod-volume-agent
RemainAfterExit=yes
TimeoutStartSec=300
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels