Skip to content

Commit 43c9cca

Browse files
sambucolevski
andcommitted
feat: Code fixes, cleanup and improvements
* Refactor & cleanups (#78) * Propagate context more thoroughly (#79) * Tests after introduction of DriverConfig (#80) * the error handling was creating issues ignored previously (#81) * Use node tmp folder for the mounts recovery state (#82) * Wait for the deamon to be ready (#83) --------- Co-authored-by: Tasko Olevski <olevski90@gmail.com>
1 parent 2a46ac7 commit 43c9cca

11 files changed

Lines changed: 435 additions & 337 deletions

File tree

.devcontainer/rclone/install.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@ chown -R "${USERNAME}:golang" /go
2121
chmod -R g+r+w /go
2222

2323
# Make sure the default folders exists
24-
mkdir -p /var/lib/kubelet/plugins/csi-rclone/
24+
mkdir -p /run/csi-rclone

cmd/csi-rclone-plugin/main.go

Lines changed: 25 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,35 @@ package main
22

33
import (
44
"context"
5+
"errors"
56
"flag"
67
"fmt"
78
"os"
89
"os/signal"
9-
"syscall"
1010
"time"
1111

12+
"github.com/SwissDataScienceCenter/csi-rclone/pkg/common"
1213
"github.com/SwissDataScienceCenter/csi-rclone/pkg/metrics"
1314
"github.com/SwissDataScienceCenter/csi-rclone/pkg/rclone"
1415
"github.com/spf13/cobra"
16+
"github.com/spf13/pflag"
1517
"k8s.io/klog"
16-
mountUtils "k8s.io/mount-utils"
1718
)
1819

19-
var (
20-
endpoint string
21-
nodeID string
22-
cacheDir string
23-
cacheSize string
24-
meters []metrics.Observable
25-
)
20+
func exitOnError(err error) {
21+
// ParseFlags uses errors to return some status information, ignore it here.
22+
if err != nil && !errors.Is(err, pflag.ErrHelp) {
23+
klog.Error(err.Error())
24+
os.Exit(1)
25+
}
26+
}
2627

2728
func init() {
28-
flag.Set("logtostderr", "true")
29+
exitOnError(flag.Set("logtostderr", "true"))
2930
}
3031

3132
func main() {
33+
var meters []metrics.Observable
3234
metricsServerConfig := metrics.ServerConfig{
3335
Host: "localhost",
3436
Port: 9090,
@@ -37,123 +39,49 @@ func main() {
3739
ShutdownTimeout: 5 * time.Second,
3840
Enabled: false,
3941
}
42+
nodeServerConfig := rclone.NodeServerConfig{}
43+
controllerServerConfig := rclone.ControllerServerConfig{}
4044

4145
root := &cobra.Command{
4246
Use: "rclone",
4347
Short: "CSI based rclone driver",
4448
}
49+
// Allow flags to be defined in subcommands, they will be reported at the Execute() step, with the help printed
50+
// before exiting.
51+
root.FParseErrWhitelist.UnknownFlags = true
52+
4553
metricsServerConfig.CommandLineParameters(root)
4654

4755
runCmd := &cobra.Command{
4856
Use: "run",
4957
Short: "Start the CSI driver.",
5058
}
51-
root.AddCommand(runCmd)
59+
exitOnError(nodeServerConfig.CommandLineParameters(runCmd, &meters))
60+
exitOnError(controllerServerConfig.CommandLineParameters(runCmd, &meters))
5261

53-
runNode := &cobra.Command{
54-
Use: "node",
55-
Short: "Start the CSI driver node service - expected to run in a daemonset on every node.",
56-
Run: func(cmd *cobra.Command, args []string) {
57-
handleNode()
58-
},
59-
}
60-
runNode.PersistentFlags().StringVar(&nodeID, "nodeid", "", "node id")
61-
runNode.MarkPersistentFlagRequired("nodeid")
62-
runNode.PersistentFlags().StringVar(&endpoint, "endpoint", "", "CSI endpoint")
63-
runNode.MarkPersistentFlagRequired("endpoint")
64-
runNode.PersistentFlags().StringVar(&cacheDir, "cachedir", "", "cache dir")
65-
runNode.PersistentFlags().StringVar(&cacheSize, "cachesize", "", "cache size")
66-
runCmd.AddCommand(runNode)
67-
runController := &cobra.Command{
68-
Use: "controller",
69-
Short: "Start the CSI driver controller.",
70-
Run: func(cmd *cobra.Command, args []string) {
71-
handleController()
72-
},
73-
}
74-
runController.PersistentFlags().StringVar(&nodeID, "nodeid", "", "node id")
75-
runController.MarkPersistentFlagRequired("nodeid")
76-
runController.PersistentFlags().StringVar(&endpoint, "endpoint", "", "CSI endpoint")
77-
runController.MarkPersistentFlagRequired("endpoint")
78-
runCmd.AddCommand(runController)
62+
root.AddCommand(runCmd)
7963

8064
versionCmd := &cobra.Command{
8165
Use: "version",
8266
Short: "Prints information about this version of csi rclone plugin",
8367
Run: func(cmd *cobra.Command, args []string) {
84-
fmt.Printf("csi-rclone plugin Version: %s", rclone.DriverVersion)
68+
fmt.Printf("csi-rclone plugin Version: %s\n", rclone.DriverVersion)
8569
},
8670
}
8771
root.AddCommand(versionCmd)
8872

89-
root.ParseFlags(os.Args[1:])
73+
exitOnError(root.ParseFlags(os.Args[1:]))
9074

9175
if metricsServerConfig.Enabled {
9276
// Gracefully exit the metrics background servers
93-
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
77+
ctx, stop := signal.NotifyContext(context.Background(), common.InterruptSignals...)
9478
defer stop()
9579

9680
metricsServer := metricsServerConfig.NewServer(ctx, &meters)
9781
go metricsServer.ListenAndServe()
9882
}
9983

100-
if err := root.Execute(); err != nil {
101-
fmt.Fprintf(os.Stderr, "%s", err.Error())
102-
os.Exit(1)
103-
}
84+
exitOnError(root.Execute())
10485

10586
os.Exit(0)
10687
}
107-
108-
func handleNode() {
109-
err := unmountOldVols()
110-
if err != nil {
111-
klog.Warningf("There was an error when trying to unmount old volumes: %v", err)
112-
}
113-
d := rclone.NewDriver(nodeID, endpoint)
114-
ns, err := rclone.NewNodeServer(d.CSIDriver, cacheDir, cacheSize)
115-
if err != nil {
116-
panic(err)
117-
}
118-
meters = append(meters, ns.Metrics()...)
119-
d.WithNodeServer(ns)
120-
err = d.Run()
121-
if err != nil {
122-
panic(err)
123-
}
124-
}
125-
126-
func handleController() {
127-
d := rclone.NewDriver(nodeID, endpoint)
128-
cs := rclone.NewControllerServer(d.CSIDriver)
129-
meters = append(meters, cs.Metrics()...)
130-
d.WithControllerServer(cs)
131-
err := d.Run()
132-
if err != nil {
133-
panic(err)
134-
}
135-
}
136-
137-
// unmountOldVols is used to unmount volumes after a restart on a node
138-
func unmountOldVols() error {
139-
const mountType = "fuse.rclone"
140-
const unmountTimeout = time.Second * 5
141-
klog.Info("Checking for existing mounts")
142-
mounter := mountUtils.Mounter{}
143-
mounts, err := mounter.List()
144-
if err != nil {
145-
return err
146-
}
147-
for _, mount := range mounts {
148-
if mount.Type != mountType {
149-
continue
150-
}
151-
err := mounter.UnmountWithForce(mount.Path, unmountTimeout)
152-
if err != nil {
153-
klog.Warningf("Failed to unmount %s because of %v.", mount.Path, err)
154-
continue
155-
}
156-
klog.Infof("Sucessfully unmounted %s", mount.Path)
157-
}
158-
return nil
159-
}

deploy/csi-rclone/templates/csi-controller-rclone.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ spec:
5454
image: {{ .Values.csiControllerRclone.csiProvisioner.image.repository }}:{{ .Values.csiControllerRclone.csiProvisioner.image.tag | default .Chart.AppVersion }}
5555
imagePullPolicy: {{ .Values.csiControllerRclone.csiProvisioner.imagePullPolicy }}
5656
volumeMounts:
57-
- name: socket-dir
58-
mountPath: /csi
57+
- mountPath: /csi
58+
name: socket-dir
5959
- name: rclone
6060
args:
6161
- run
@@ -85,7 +85,7 @@ spec:
8585
fieldRef:
8686
fieldPath: spec.nodeName
8787
- name: CSI_ENDPOINT
88-
value: "unix://plugin/csi.sock"
88+
value: "unix://csi/csi.sock"
8989
- name: KUBERNETES_CLUSTER_DOMAIN
9090
value: {{ quote .Values.kubernetesClusterDomain }}
9191
{{- if .Values.csiControllerRclone.rclone.goMemLimit }}
@@ -114,7 +114,7 @@ spec:
114114
timeoutSeconds: 3
115115
periodSeconds: 2
116116
volumeMounts:
117-
- mountPath: /plugin
117+
- mountPath: /csi
118118
name: socket-dir
119119
- name: liveness-probe
120120
imagePullPolicy: Always

deploy/csi-rclone/templates/csi-nodeplugin-rclone.yaml

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ spec:
2222
- name: node-driver-registrar
2323
args:
2424
- --v=5
25-
- --csi-address=/plugin/csi.sock
25+
- --csi-address=/csi/csi.sock
2626
- --kubelet-registration-path=/var/lib/kubelet/plugins/{{ .Values.storageClassName }}/csi.sock
2727
env:
2828
- name: KUBE_NODE_NAME
@@ -45,17 +45,17 @@ spec:
4545
resources:
4646
{{- toYaml .Values.csiNodepluginRclone.rclone.resources | nindent 12 }}
4747
volumeMounts:
48-
- mountPath: /plugin
48+
- mountPath: /csi
4949
name: plugin-dir
5050
- mountPath: /registration
5151
name: registration-dir
5252
- name: liveness-probe
5353
imagePullPolicy: Always
5454
image: registry.k8s.io/sig-storage/livenessprobe:v2.15.0
5555
args:
56-
- --csi-address=/plugin/csi.sock
56+
- --csi-address=/csi/csi.sock
5757
volumeMounts:
58-
- mountPath: /plugin
58+
- mountPath: /csi
5959
name: plugin-dir
6060
- name: rclone
6161
args:
@@ -86,7 +86,7 @@ spec:
8686
fieldRef:
8787
fieldPath: spec.nodeName
8888
- name: CSI_ENDPOINT
89-
value: "unix://plugin/csi.sock"
89+
value: "unix://csi/csi.sock"
9090
- name: KUBERNETES_CLUSTER_DOMAIN
9191
value: {{ quote .Values.kubernetesClusterDomain }}
9292
- name: DRIVER_NAME
@@ -134,8 +134,10 @@ spec:
134134
timeoutSeconds: 10
135135
periodSeconds: 30
136136
volumeMounts:
137-
- mountPath: /plugin
137+
- mountPath: /csi
138138
name: plugin-dir
139+
- mountPath: /run/csi-rclone
140+
name: node-temp-dir
139141
- mountPath: /var/lib/kubelet/pods
140142
mountPropagation: Bidirectional
141143
name: pods-mount-dir
@@ -154,6 +156,11 @@ spec:
154156
{{ toYaml . | nindent 8 }}
155157
{{- end }}
156158
volumes:
159+
- hostPath:
160+
# NOTE: We mount on /tmp because we want the saved configuration to not survive a whole node restart.
161+
path: /tmp/{{.Release.Namespace}}-{{.Release.Name}}-{{.Release.Revision}}
162+
type: DirectoryOrCreate
163+
name: node-temp-dir
157164
- hostPath:
158165
path: {{ .Values.kubeletDir }}/plugins/{{ .Values.storageClassName }}
159166
type: DirectoryOrCreate
@@ -167,4 +174,4 @@ spec:
167174
type: DirectoryOrCreate
168175
name: registration-dir
169176
- name: cache-dir
170-
emptyDir:
177+
emptyDir: {}

deploy/csi-rclone/values.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ csiControllerRclone:
2222
# memory: 128Mi
2323
# requests:
2424
# cpu: 100m
25-
# memory: 128M
25+
# memory: 128Mi
2626
# If set, used to set GOMEMLIMIT, it should be strictly lower than
2727
# limits.memory to prevent OOMkills
28-
goMemLimit: # 115Mi
28+
goMemLimit: # 115MiB
2929
# Prometheus metrics
3030
metrics:
3131
enabled: true
@@ -68,7 +68,7 @@ csiNodepluginRclone:
6868
# memory: 128Mi
6969
# If set, used to set GOMEMLIMIT, it should be strictly lower than
7070
# limits.memory to prevent OOMkills
71-
goMemLimit: # 115Mi
71+
goMemLimit: # 115MiB
7272
# Prometheus metrics
7373
metrics:
7474
enabled: true

pkg/common/constants.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package common
2+
3+
import (
4+
"os"
5+
"syscall"
6+
)
7+
8+
// Signals to listen to:
9+
// 1. os.Interrup -> allows devs to easily run a server locally
10+
// 2. syscall.SIGTERM -> sent by kubernetes when stopping a server gracefully
11+
var InterruptSignals = []os.Signal{os.Interrupt, syscall.SIGTERM}

0 commit comments

Comments
 (0)