|
| 1 | += Troubleshooting Rook on OpenShift |
| 2 | +Antonio C. <ac (at) trikorasolutions (dot) com> |
| 3 | +:revdate: {docdate} |
| 4 | +:icons: font |
| 5 | +:toc: left |
| 6 | +:toclevels: 3 |
| 7 | +:toc-title: Table of Contents |
| 8 | +:description: Rook on OpenShift |
| 9 | + |
| 10 | +== Rook Ceph Operator doesn't deploy any POD |
| 11 | + |
| 12 | +*Problem* |
| 13 | + |
| 14 | +Rook operator ReplicaSet doesn't deploy any POD. |
| 15 | + |
| 16 | +*Symptom* |
| 17 | + |
| 18 | +Several ReplicaSet of the Rook Ceph project are in error state. The error is the following. |
| 19 | + |
| 20 | +[source,bash] |
| 21 | +---- |
| 22 | +oc -n rook-ceph describe rs rook-ceph-operator-746677996c |
| 23 | +---- |
| 24 | + |
| 25 | +[source,] |
| 26 | +---- |
| 27 | +Events: |
| 28 | + Type Reason Age From Message |
| 29 | + ---- ------ ---- ---- ------- |
| 30 | + Warning FailedCreate 85s (x119 over 8h) replicaset-controller Error creating: pods "rook-ceph-operator-746677996c-" is forbidden: unable to validate against any security context constraint: [provider "anyuid": Forbidden: not usable by user or serviceaccount, provider "pipelines-scc": Forbidden: not usable by user or serviceaccount, provider "db2u-c-mas-masovh-system-scc": Forbidden: not usable by user or serviceaccount, provider restricted-v2: .containers[0].runAsUser: Invalid value: 2016: must be in the ranges: [1000290000, 1000299999], provider "restricted": Forbidden: not usable by user or serviceaccount, provider "nonroot-v2": Forbidden: not usable by user or serviceaccount, provider "nonroot": Forbidden: not usable by user or serviceaccount, provider "noobaa": Forbidden: not usable by user or serviceaccount, provider "hostmount-anyuid": Forbidden: not usable by user or serviceaccount, provider "machine-api-termination-handler": Forbidden: not usable by user or serviceaccount, provider "hostnetwork-v2": Forbidden: not usable by user or serviceaccount, provider "hostnetwork": Forbidden: not usable by user or serviceaccount, provider "hostaccess": Forbidden: not usable by user or serviceaccount, provider "rook-ceph": Forbidden: not usable by user or serviceaccount, provider "node-exporter": Forbidden: not usable by user or serviceaccount, provider "rook-ceph-csi": Forbidden: not usable by user or serviceaccount, provider "privileged": Forbidden: not usable by user or serviceaccount] |
| 31 | +---- |
| 32 | + |
| 33 | +*Cause* |
| 34 | + |
| 35 | +Resource users don't have access to the required Service Accounts. |
| 36 | + |
| 37 | +*Solution* |
| 38 | + |
| 39 | +*Rook Ceph Operator* |
| 40 | + |
| 41 | +[source,bash] |
| 42 | +---- |
| 43 | +oc -n rook-ceph get rs rook-ceph-operator-746677996c -oyaml | grep serviceAccount |
| 44 | +---- |
| 45 | + |
| 46 | +[source,] |
| 47 | +---- |
| 48 | +serviceAccount: rook-ceph-system |
| 49 | +serviceAccountName: rook-ceph-system |
| 50 | +---- |
| 51 | + |
| 52 | +[source,bash] |
| 53 | +---- |
| 54 | +oc -n rook-ceph get rs rook-ceph-operator-746677996c -oyaml | oc adm policy scc-subject-review --filename - |
| 55 | +---- |
| 56 | + |
| 57 | +[source,] |
| 58 | +---- |
| 59 | +RESOURCE ALLOWED BY |
| 60 | +ReplicaSet/rook-ceph-operator-746677996c anyuid |
| 61 | +---- |
| 62 | + |
| 63 | +[source,bash] |
| 64 | +---- |
| 65 | +oc adm policy add-scc-to-user anyuid -z rook-ceph-system |
| 66 | +
|
| 67 | +oc adm policy add-cluster-role-to-user cluster-admin system:serviceaccount:rook-ceph:rook-ceph-system |
| 68 | +oc adm policy add-scc-to-user anyuid -z rook-ceph-system -n rook-ceph |
| 69 | +oc adm policy add-scc-to-user privileged -z rook-ceph-system -n rook-ceph |
| 70 | +
|
| 71 | +oc adm policy add-cluster-role-to-user cluster-admin system:serviceaccount:rook-ceph:default |
| 72 | +oc adm policy add-scc-to-user anyuid -z default -n rook-ceph |
| 73 | +oc adm policy add-scc-to-user privileged -z default -n rook-ceph |
| 74 | +---- |
| 75 | + |
| 76 | +Redeploy Operator. |
| 77 | + |
| 78 | +[source,bash] |
| 79 | +---- |
| 80 | +oc -n rook-ceph scale deployment rook-ceph-operator --replicas=0 |
| 81 | +oc -n openshift-storage scale deployment ocs-operator --replicas=0 |
| 82 | +oc -n rook-ceph scale deployment -l app=rook-ceph-mon --replicas=0 |
| 83 | +echo "Sleeping 5s..." ; sleep 5 ; echo "...wake up!" |
| 84 | +oc -n rook-ceph scale deployment -l app=rook-ceph-mon --replicas=1 |
| 85 | +oc -n openshift-storage scale deployment ocs-operator --replicas=1 |
| 86 | +oc -n rook-ceph scale deployment rook-ceph-operator --replicas=1 |
| 87 | +---- |
| 88 | + |
| 89 | +*Rook Ceph Monitor* |
| 90 | + |
| 91 | +[source,bash] |
| 92 | +---- |
| 93 | +oc -n rook-ceph get rs rook-ceph-mon-c-64f848668b -oyaml | grep serviceAccount |
| 94 | +---- |
| 95 | + |
| 96 | +[source,] |
| 97 | +---- |
| 98 | +serviceAccount: rook-ceph-default |
| 99 | +serviceAccountName: rook-ceph-default |
| 100 | +---- |
| 101 | + |
| 102 | +[source,bash] |
| 103 | +---- |
| 104 | +oc -n rook-ceph get rs rook-ceph-mon-c-64f848668b -oyaml | oc adm policy scc-subject-review --filename - |
| 105 | +---- |
| 106 | + |
| 107 | +[source,] |
| 108 | +---- |
| 109 | +RESOURCE ALLOWED BY |
| 110 | +ReplicaSet/rook-ceph-mon-c-64f848668b db2u-c-mas-masovh-system-scc |
| 111 | +---- |
| 112 | + |
| 113 | +[source,bash] |
| 114 | +---- |
| 115 | +oc adm policy add-scc-to-user db2u-c-mas-masovh-system-scc -z rook-ceph-default |
| 116 | +oc adm policy add-scc-to-user db2u-c-mas-masovh-system-scc -z rook-ceph-default -n rook-ceph |
| 117 | +---- |
| 118 | + |
| 119 | +*Rook Ceph MDS* |
| 120 | + |
| 121 | +[source,bash] |
| 122 | +---- |
| 123 | +oc -n rook-ceph get rs rook-ceph-mgr-a-95f4697b9 -oyaml | grep serviceAccount |
| 124 | +---- |
| 125 | + |
| 126 | +[source,] |
| 127 | +---- |
| 128 | +serviceAccount: rook-ceph-mgr |
| 129 | +serviceAccountName: rook-ceph-mgr |
| 130 | +---- |
| 131 | + |
| 132 | +[source,bash] |
| 133 | +---- |
| 134 | +oc -n rook-ceph get rs rook-ceph-mgr-a-95f4697b9 -oyaml | oc adm policy scc-subject-review --filename - |
| 135 | +---- |
| 136 | + |
| 137 | +[source,] |
| 138 | +---- |
| 139 | +RESOURCE ALLOWED BY |
| 140 | +ReplicaSet/rook-ceph-mgr-a-95f4697b9 db2u-c-mas-masovh-system-scc |
| 141 | +---- |
| 142 | + |
| 143 | +[source,bash] |
| 144 | +---- |
| 145 | +oc adm policy add-scc-to-user db2u-c-mas-masovh-system-scc -z rook-ceph-mgr |
| 146 | +oc adm policy add-scc-to-user db2u-c-mas-masovh-system-scc -z rook-ceph-mgr -n rook-ceph |
| 147 | +---- |
| 148 | + |
| 149 | +*Rook Ceph OSD* |
| 150 | + |
| 151 | +[source,bash] |
| 152 | +---- |
| 153 | +oc -n rook-ceph get rs rook-ceph-osd-1-c9c7f6b97 -oyaml | grep serviceAccount |
| 154 | +---- |
| 155 | + |
| 156 | +[source,] |
| 157 | +---- |
| 158 | +serviceAccount: rook-ceph-osd |
| 159 | +serviceAccountName: rook-ceph-osd |
| 160 | +---- |
| 161 | + |
| 162 | +[source,bash] |
| 163 | +---- |
| 164 | +oc -n rook-ceph get rs rook-ceph-osd-1-c9c7f6b97 -oyaml | oc adm policy scc-subject-review --filename - |
| 165 | +---- |
| 166 | + |
| 167 | +[source,] |
| 168 | +---- |
| 169 | +RESOURCE ALLOWED BY |
| 170 | +ReplicaSet/rook-ceph-osd-1-c9c7f6b97 db2u-c-mas-masovh-system-scc |
| 171 | +---- |
| 172 | + |
| 173 | +[source,bash] |
| 174 | +---- |
| 175 | +oc adm policy add-scc-to-user db2u-c-mas-masovh-system-scc -z rook-ceph-osd |
| 176 | +oc adm policy add-scc-to-user db2u-c-mas-masovh-system-scc -z rook-ceph-osd -n rook-ceph |
| 177 | +---- |
| 178 | + |
| 179 | +*CSI CephFS plugin* |
| 180 | + |
| 181 | +[source,bash] |
| 182 | +---- |
| 183 | +oc -n rook-ceph get rs csi-cephfsplugin-provisioner-6668bdd9b -oyaml | grep serviceAccount |
| 184 | +---- |
| 185 | + |
| 186 | +[source,] |
| 187 | +---- |
| 188 | +serviceAccount: rook-csi-cephfs-provisioner-sa |
| 189 | +serviceAccountName: rook-csi-cephfs-provisioner-sa |
| 190 | +---- |
| 191 | + |
| 192 | +[source,bash] |
| 193 | +---- |
| 194 | +oc -n rook-ceph get rs csi-cephfsplugin-provisioner-6668bdd9b -oyaml | oc adm policy scc-subject-review --filename - |
| 195 | +---- |
| 196 | + |
| 197 | +[source,] |
| 198 | +---- |
| 199 | +RESOURCE ALLOWED BY |
| 200 | +ReplicaSet/csi-cephfsplugin-provisioner-6668bdd9b db2u-c-mas-masovh-system-scc |
| 201 | +---- |
| 202 | + |
| 203 | +[source,bash] |
| 204 | +---- |
| 205 | +oc adm policy add-scc-to-user db2u-c-mas-masovh-system-scc -z rook-csi-cephfs-provisioner-sa |
| 206 | +oc adm policy add-scc-to-user db2u-c-mas-masovh-system-scc -z rook-csi-cephfs-provisioner-sa -n rook-ceph |
| 207 | +---- |
| 208 | + |
| 209 | +This might take a while to recover from. |
| 210 | + |
| 211 | +*CSI RBD Plugin* |
| 212 | + |
| 213 | + |
| 214 | +[source,bash] |
| 215 | +---- |
| 216 | +oc -n rook-ceph get rs csi-rbdplugin-provisioner-57b5f57b9 -oyaml | grep serviceAccount |
| 217 | +---- |
| 218 | + |
| 219 | +[source,] |
| 220 | +---- |
| 221 | +serviceAccount: rook-csi-rbd-provisioner-sa |
| 222 | +serviceAccountName: rook-csi-rbd-provisioner-sa |
| 223 | +---- |
| 224 | + |
| 225 | +[source,bash] |
| 226 | +---- |
| 227 | +oc -n rook-ceph get rs csi-rbdplugin-provisioner-57b5f57b9 -oyaml | oc adm policy scc-subject-review --filename - |
| 228 | +---- |
| 229 | + |
| 230 | +[source,] |
| 231 | +---- |
| 232 | +RESOURCE ALLOWED BY |
| 233 | +ReplicaSet/csi-cephfsplugin-provisioner-6668bdd9b db2u-c-mas-masovh-system-scc |
| 234 | +---- |
| 235 | + |
| 236 | +[source,bash] |
| 237 | +---- |
| 238 | +oc adm policy add-scc-to-user db2u-c-mas-masovh-system-scc -z rook-csi-rbd-provisioner-sa |
| 239 | +oc adm policy add-scc-to-user db2u-c-mas-masovh-system-scc -z rook-csi-rbd-provisioner-sa -n rook-ceph |
| 240 | +---- |
| 241 | + |
| 242 | +[NOTE] |
| 243 | +==== |
| 244 | +Although it seems thi solution is not working, it ends up generating the POD |
| 245 | + after a while. |
| 246 | +
|
| 247 | +Follow the POD and ReplicaSet state update using the following commands. |
| 248 | +
|
| 249 | +.POD state change |
| 250 | +[source,bash] |
| 251 | +---- |
| 252 | +oc -n rook-ceph get pod -w |
| 253 | +---- |
| 254 | +
|
| 255 | +.ReplicaSet state change |
| 256 | +[source,bash] |
| 257 | +---- |
| 258 | +oc -n rook-ceph get rs -w |
| 259 | +---- |
| 260 | +==== |
| 261 | + |
| 262 | +== driver name rook-ceph.cephfs.csi.ceph.com not found |
| 263 | + |
| 264 | + |
| 265 | +*Problem* |
| 266 | + |
| 267 | +POD stays on Container Creating or Init and doesn't start. |
| 268 | + |
| 269 | +*Symptom* |
| 270 | + |
| 271 | +Describing the POD |
| 272 | +[source,bash] |
| 273 | +---- |
| 274 | +Events: |
| 275 | + Type Reason Age From Message |
| 276 | + ---- ------ ---- ---- ------- |
| 277 | +... |
| 278 | + Warning FailedMount 5m2s (x107 over 3h50m) kubelet MountVolume.MountDevice failed for volume "pvc-6289230b-0c9c-4116-a10d-9dfa830a9677" : kubernetes.io/csi: attacher.MountDevice failed to create newCsiDriverClient: driver name rook-ceph.cephfs.csi.ceph.com not found in the list of registered CSI drivers |
| 279 | +---- |
| 280 | + |
| 281 | +Checking the DRIVERS value for the csinodes is lower than other nodes that |
| 282 | + are working correctly (1 when it should be 3). |
| 283 | + |
| 284 | +[source,bash] |
| 285 | +---- |
| 286 | +oc get csinode |
| 287 | +---- |
| 288 | + |
| 289 | +[source,] |
| 290 | +---- |
| 291 | +NAME DRIVERS AGE |
| 292 | +host-10-13-0-13 1 3h41m |
| 293 | +---- |
| 294 | + |
| 295 | +*Cause* |
| 296 | + |
| 297 | +*Solution* |
| 298 | + |
| 299 | + |
| 300 | + |
| 301 | +== Node missing rook pod (crashcollector, ceph-exporter) |
0 commit comments