Skip to content

MountDevice failed log spamming for statefulset volumes after RKE version upgrade #3203

Open
@bandak2

Description

@bandak2

RKE version
v1.3.18

Docker version
20.10.17-ce

Operating system and kernel
SUSE Linux Enterprise Server 15 SP4 -- Kernel 5.14.21-150400.22-default

Type/provider of hosts
VMware VM

cluster.yml file:

nodes:
- address: worker1
  port: "22"
  internal_address: ""
  hostname_override: ""
  user: "root"
  ssh_key: ""
  ssh_cert: ""
  ssh_cert_path: ""
  labels: {}
  taints: []
  ssh_key_path: ~/.ssh/id_rsa
  docker_socket: /var/run/docker.sock
  role: [worker]
- address: master
  port: "22"
  internal_address: ""
  hostname_override: ""
  user: "root"
  ssh_key: ""
  ssh_cert: ""
  ssh_cert_path: ""
  labels: {}
  taints: []
  ssh_key_path: ~/.ssh/id_rsa
  docker_socket: /var/run/docker.sock
  role: [controlplane, etcd]
- address: worker2
  port: "22"
  internal_address: ""
  hostname_override: ""
  user: "root"
  ssh_key: ""
  ssh_cert: ""
  ssh_cert_path: ""
  labels: {}
  taints: []
  ssh_key_path: ~/.ssh/id_rsa
  docker_socket: /var/run/docker.sock
  role: [worker]
services:
  etcd:
    image: ""
    extra_args: {}
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
    external_urls: []
    ca_cert: ""
    cert: ""
    key: ""
    path: ""
    uid: 0
    gid: 0
    snapshot: true
    retention: 24h
    creation: 12h
    backup_config:
      interval_hours: 12
      retention: 6
  kube-api:
    image: ""
    extra_args: {}
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
    service_cluster_ip_range: 10.43.0.0/16
    service_node_port_range: ""
    pod_security_policy: false
    always_pull_images: false
    secrets_encryption_config: null
    audit_log: null
    admission_configuration: null
    event_rate_limit: null
  kube-controller:
    image: ""
    extra_args: {}
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
    cluster_cidr: 10.42.0.0/16
    service_cluster_ip_range: 10.43.0.0/16
  scheduler:
    image: ""
    extra_args: {}
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
  kubelet:
    image: ""
    extra_args:
      max-pods: 150
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
    cluster_domain: cluster.local
    infra_container_image: ""
    cluster_dns_server: 10.43.0.10
    fail_swap_on: false
    generate_serving_certificate: false
  kubeproxy:
    image: ""
    extra_args: {}
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
network:
  plugin: "calico"
  options:
    canal_flannel_backend_type: vxlan
  mtu: 0
  node_selector: {}
  update_strategy: null
  tolerations: []
authentication:
  strategy: x509
  sans: []
  webhook: null
addons: ""
addons_include: []
system_images:
  etcd: ""
  alpine: ""
  nginx_proxy: ""
  cert_downloader: ""
  kubernetes_services_sidecar: ""
  kubedns: ""
  dnsmasq: ""
  kubedns_sidecar: ""
  kubedns_autoscaler: ""
  coredns: ""
  coredns_autoscaler: ""
  nodelocal: ""
  kubernetes: ""
  flannel: ""
  flannel_cni: ""
  calico_node: ""
  calico_cni: ""
  calico_controllers: ""
  calico_ctl: ""
  calico_flexvol: ""
  canal_node: ""
  canal_cni: ""
  canal_controllers: ""
  canal_flannel: ""
  canal_flexvol: ""
  weave_node: ""
  weave_cni: ""
  pod_infra_container: ""
  ingress: ""
  ingress_backend: ""
  ingress_webhook: ""
  metrics_server: ""
  windows_pod_infra_container: ""
  aci_cni_deploy_container: ""
  aci_host_container: ""
  aci_opflex_container: ""
  aci_mcast_container: ""
  aci_ovs_container: ""
  aci_controller_container: ""
  aci_gbp_server_container: ""
  aci_opflex_server_container: ""
ssh_key_path: ""
ssh_cert_path: ""
ssh_agent_auth: false
authorization:
  mode: rbac
  options: {}
ignore_docker_version: false
enable_cri_dockerd: null
kubernetes_version: v1.24.9-rancher1-1
private_registries: []
ingress:
  provider: nginx
  options:
    use-forwarded-headers: "true"
  node_selector: {}
  extra_args: {}
  dns_policy: ""
  extra_envs: []
  extra_volumes: []
  extra_volume_mounts: []
  update_strategy: null
  http_port: 0
  https_port: 0
  network_mode: ""
  tolerations: []
  default_backend: null
  default_http_backend_priority_class_name: ""
  nginx_ingress_controller_priority_class_name: ""
  default_ingress_class: null
cluster_name: rke
cloud_provider:
  name: ""
prefix_path: ""
win_prefix_path: ""
addon_job_timeout: 0
bastion_host:
  address: bastion
  port: ""
  user: "root"
  ssh_key: ""
  ssh_key_path: "~/.ssh/id_rsa"
  ssh_cert: ""
  ssh_cert_path: ""
  ignore_proxy_env_vars: false
monitoring:
  provider: ""
  options: {}
  node_selector: {}
  update_strategy: null
  replicas: null
  tolerations: []
  metrics_server_priority_class_name: ""
restore:
  restore: false
  snapshot_name: ""
rotate_encryption_key: false
dns: null

Steps to Reproduce:

  • While on k8s 1.22.17, and with stateful running with volume mounts, upgrade the rke kubernetes version from 1.22.17 to 1.24.9.
  • After the upgrade, the stateful pods, and respective volumes from pv are mounted as expected.
  • However, in the event logs it is observed that the events will be populated with multiple warnings and errors, and that the volume is unmounted and mounted elsewhere. The spamming of the events is hight, ~3800 such events in 4d observation period.

Results:

  "log": "I0316 12:14:58.508866   22179 operation_generator.go:626] \"MountVolume.WaitForAttach succeeded for volume \\\"csivol-d8ed8f41a7\\\" (UniqueName: \\\"kubernetes.io/csi/csi-unity.dellemc.com^csivol-d8ed8f41a7-iSCSI-storagearray-sv_268814\\\") pod \\\"web-0\\\" (UID: \\\"98a7514b-414b-4ddf-ab23-572e606c7244\\\") DevicePath \\\"csi-240ae18dc9f334d896c39711cbf58fd23c51db537b8bd6bcaffc35edf25abe1f\\\"\" pod=\"default/web-0\"\n",
  "stream": "stderr",
  "time": "2023-03-16T12:14:58.513491344Z"
}
{
  "log": "I0316 12:14:59.411402   22179 kubelet.go:2182] \"SyncLoop (probe)\" probe=\"readiness\" status=\"\" pod=\"kube-system/coredns-autoscaler-79dcc864f5-pmwrf\"\n",
  "stream": "stderr",
  "time": "2023-03-16T12:14:59.412760158Z"
}
{
  "log": "I0316 12:14:59.412556   22179 kubelet.go:2182] \"SyncLoop (probe)\" probe=\"readiness\" status=\"ready\" pod=\"kube-system/coredns-autoscaler-79dcc864f5-pmwrf\"\n",
  "stream": "stderr",
  "time": "2023-03-16T12:14:59.412787709Z"
}
{
  "log": "I0316 12:15:00.152054   22179 kubelet.go:2182] \"SyncLoop (probe)\" probe=\"readiness\" status=\"\" pod=\"kube-system/calico-node-vfc9r\"\n",
  "stream": "stderr",
  "time": "2023-03-16T12:15:00.15371283Z"
}
{
  "log": "E0316 12:15:00.286052   22179 csi_attacher.go:344] kubernetes.io/csi: attacher.MountDevice failed: rpc error: code = Internal desc =  runid=34 device already in use and mounted elsewhere. Cannot do private mount\n",
  "stream": "stderr",
  "time": "2023-03-16T12:15:00.286173505Z"
}
{
  "log": "E0316 12:15:00.286292   22179 nestedpendingoperations.go:348] Operation for \"{volumeName:kubernetes.io/csi/csi-unity.dellemc.com^csivol-d8ed8f41a7-iSCSI-storagearray-sv_268814 podName: nodeName:}\" failed. No retries permitted until 2023-03-16 12:15:01.286274144 +0000 UTC m=+15.145119628 (durationBeforeRetry 1s). Error: MountVolume.MountDevice failed for volume \"csivol-d8ed8f41a7\" (UniqueName: \"kubernetes.io/csi/csi-unity.dellemc.com^csivol-d8ed8f41a7-iSCSI-storagearray-sv_268814\") pod \"web-0\" (UID: \"98a7514b-414b-4ddf-ab23-572e606c7244\") : rpc error: code = Internal desc =  runid=34 device already in use and mounted elsewhere. Cannot do private mount\n",
  "stream": "stderr",
  "time": "2023-03-16T12:15:00.28633199Z"
}

SURE-6124

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions