generated from canonical/platform-engineering-charm-template
-
Notifications
You must be signed in to change notification settings - Fork 18
Open
Description
Bug Description
After deploying a cluster on top of OpenStack with 3 control plane nodes and X workers, scaling-in (juju remove-unit) the number of workers or control plane nodes breaks the cluster.
From an OpenStack point of view it looks like the VMs have lost their network connectivity i.e. they no longer have Neutron ports attached to them.
To Reproduce
Deploy a cluster using the provided bundle, then scale down the worker nodes using juju remove-unit k8s-worker/X
default-base: [email protected]/stable
saas:
grafana:
url: admin/cos.grafana
loki:
url: admin/cos.loki
prometheus:
url: admin/cos.prometheus
applications:
cinder-csi:
charm: cinder-csi
channel: latest/stable
revision: 34
options:
storage-class-default: true
bindings:
"": juju
certificates: juju
kube-control: juju
openstack: juju
grafana-agent:
charm: grafana-agent
channel: 1/stable
revision: 457
options:
global_scrape_timeout: 60s
tls_insecure_skip_verify: true
bindings:
"": alpha
certificates: alpha
cos-agent: alpha
grafana-cloud-config: alpha
grafana-dashboards-provider: alpha
juju-info: alpha
logging-consumer: alpha
peers: alpha
receive-ca-cert: alpha
send-remote-write: alpha
tracing: alpha
k8s:
charm: k8s
channel: 1.32/stable
revision: 988
resources:
snap-installation: 2
num_units: 3
to:
- "2"
- "4"
- "5"
options:
bootstrap-node-taints: node-role.kubernetes.io/control-plane:NoSchedule
kube-apiserver-extra-args: 'oidc-issuer-url=https://oauth.example.com/realms/abc
oidc-client-id=cl-k8s oidc-groups-claim=groups oidc-groups-prefix=oidc:
oidc-ca-file=/home/ubuntu/oidc_ca.crt'
local-storage-enabled: false
constraints: arch=amd64 root-disk=102400 instance-type=c3i.4c-8G zones=A01,A02,A03,A04,A13,A14
root-disk-source=volume
bindings:
"": juju
aws: juju
azure: juju
ceph-k8s-info: juju
cluster: juju
containerd: juju
cos-agent: juju
cos-tokens: juju
cos-worker-tokens: juju
etcd: juju
external-cloud-provider: juju
external-load-balancer: juju
gcp: juju
k8s-cluster: juju
kube-control: juju
upgrade: juju
k8s-worker:
charm: k8s-worker
channel: 1.32/stable
revision: 984
resources:
snap-installation: 6
num_units: 2
to:
- "0"
- "1"
constraints: arch=amd64 root-disk=102400 instance-type=gp3i.4c-16G zones=A01,A02,A03,A04,A13,A14
root-disk-source=volume
bindings:
"": juju
aws: juju
azure: juju
cluster: juju
containerd: juju
cos-agent: juju
cos-tokens: juju
gcp: juju
upgrade: juju
logrotated:
charm: logrotated
channel: latest/stable
revision: 55
options:
logrotate-retention: 60
bindings:
"": alpha
juju-info: alpha
ntp:
charm: ntp
channel: latest/beta
revision: 51
options:
verify_ntp_servers: true
bindings:
"": alpha
juju-info: alpha
master: alpha
nrpe-external-master: alpha
ntp-peers: alpha
ntpmaster: alpha
openstack-cloud-controller:
charm: openstack-cloud-controller
channel: latest/stable
revision: 34
bindings:
"": juju
certificates: juju
external-cloud-provider: juju
kube-control: juju
openstack: juju
openstack-integrator:
charm: openstack-integrator
channel: latest/stable
revision: 106
resources:
openstackclients: 1
num_units: 1
to:
- "3"
options:
endpoint-tls-ca: |
LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUZNVENDQkJtZ0F3SUJBZ0lVU3cxVnRWY1Uz
...
MXkwNTVMWXlRPT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQoK
ignore-volume-az: true
internal-lb: true
lb-subnet: 5efe8b17-b596-4c08-869c-ce247099a868
project-domain-name: TTC
project-name: canonical-deployment
subnet-id: 5efe8b17-b596-4c08-869c-ce247099a868
constraints: arch=amd64 root-disk=102400 instance-type=c3i.4c-8G zones=A01,A02,A03,A04,A13,A14
root-disk-source=volume
bindings:
"": juju
clients: juju
credentials: juju
lb-consumers: juju
loadbalancer: juju
trust: true
ubuntu-pro:
charm: ubuntu-advantage
channel: latest/stable
revision: 143
options:
token: ***
bindings:
"": alpha
juju-info: alpha
machines:
"0":
constraints: arch=amd64 root-disk=102400 instance-type=gp3i.4c-16G zones=A01,A02,A03,A04,A13,A14
root-disk-source=volume
"1":
constraints: arch=amd64 root-disk=102400 instance-type=gp3i.4c-16G zones=A01,A02,A03,A04,A13,A14
root-disk-source=volume
"2":
constraints: arch=amd64 root-disk=102400 instance-type=c3i.4c-8G zones=A01,A02,A03,A04,A13,A14
root-disk-source=volume
"3":
constraints: arch=amd64 root-disk=102400 instance-type=c3i.4c-8G zones=A01,A02,A03,A04,A13,A14
root-disk-source=volume
"4":
constraints: arch=amd64 root-disk=102400 instance-type=c3i.4c-8G zones=A01,A02,A03,A04,A13,A14
root-disk-source=volume
"5":
constraints: arch=amd64 root-disk=102400 instance-type=c3i.4c-8G zones=A01,A02,A03,A04,A13,A14
root-disk-source=volume
relations:
- - k8s-worker:cos-tokens
- k8s:cos-worker-tokens
- - k8s-worker:cluster
- k8s:k8s-cluster
- - k8s:containerd
- k8s-worker:containerd
- - k8s:kube-control
- openstack-cloud-controller:kube-control
- - k8s:external-cloud-provider
- openstack-cloud-controller:external-cloud-provider
- - cinder-csi:kube-control
- k8s:kube-control
- - openstack-integrator:clients
- openstack-cloud-controller:openstack
- - openstack-integrator:clients
- cinder-csi:openstack
- - k8s:external-load-balancer
- openstack-integrator:lb-consumers
- - grafana-agent:cos-agent
- k8s:cos-agent
- - k8s:juju-info
- logrotated:juju-info
--- # overlay.yaml
applications:
k8s:
exposed-endpoints:
"":
expose-to-cidrs:
- 0.0.0.0/32
Environment
juju with OpenStack provider
Relevant log output
juju debug-log (can't retrieve anything from the machines as they no longer have network connectivity)
unit-grafana-agent-8: 10:30:36 INFO juju.worker.uniter.operation ran "update-status" hook (via hook dispatching script: dispatch)
controller-0: 10:30:42 INFO juju.worker.provisioner provisioning in zones: [A01 A02 A03 A04 A13 A14]
controller-0: 10:30:42 INFO juju.worker.provisioner stopping known instances [c30bf5e5-a7ef-478b-b848-3a0b8a66c625]
controller-0: 10:30:45 INFO juju.worker.provisioner provisioning in zones: [A01 A02 A03 A04 A13 A14]
controller-0: 10:30:54 INFO juju.worker.provisioner removing dead machine "3"
controller-0: 10:36:23 INFO juju.worker.instancepoller machine "4" (instance ID "690879a4-9e9d-4a3d-a4f6-75a652672088") has new addresses: []
controller-0: 10:36:23 INFO juju.worker.instancepoller machine "0" (instance ID "2ea4b7d0-feb6-40ba-8ac3-b1944df3540a") has new addresses: []
controller-0: 10:36:23 INFO juju.worker.instancepoller machine "1" (instance ID "3e5fc67e-d9ae-4749-bf7e-6d2acf42d0be") has new addresses: []
controller-0: 10:36:23 INFO juju.worker.instancepoller machine "6" (instance ID "d7f9c5a4-0500-411e-8dc4-7cd2e5a305f8") has new addresses: []
controller-0: 10:36:23 INFO juju.worker.instancepoller machine "2" (instance ID "0f65bb05-f18c-4517-bb90-ce42ec91b02c") has new addresses: []
controller-0: 10:36:23 INFO juju.worker.instancepoller machine "7" (instance ID "ca0a98bf-a61c-4399-9a4a-36d6a05748f4") has new addresses: []
controller-0: 10:36:23 INFO juju.worker.instancepoller machine "5" (instance ID "f6fdfd61-451d-44e4-b59f-096cbbaa56f0") has new addresses: []Additional context
No response
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels