Skip to content

Commit 5f76b55

Browse files
committed
[Draft] Split nvidia-mdev arch in two stages
1 parent 2944874 commit 5f76b55

File tree

14 files changed

+311
-64
lines changed

14 files changed

+311
-64
lines changed

automation/vars/nvidia-mdev.yaml

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ vas:
3737
src_file: values.yaml
3838
build_output: nodeset.yaml
3939
post_stage_run:
40-
- name: Install nvidia driver
40+
- name: Run phase 1 playbook
4141
type: playbook
4242
# As a reminder, the job needs to set the nvidia driver URL
43-
source: "../../playbooks/nvidia-mdev.yml"
43+
source: "../../playbooks/nvidia-mdev-phase1.yml"
4444
inventory: "${HOME}/ci-framework-data/artifacts/zuul_inventory.yml"
4545

4646
- path: examples/va/nvidia-mdev/edpm/deployment
@@ -53,3 +53,30 @@ vas:
5353
- name: edpm-deployment-values
5454
src_file: values.yaml
5555
build_output: deployment.yaml
56+
57+
- path: examples/va/nvidia-mdev/edpm-post-driver/nodeset
58+
wait_conditions:
59+
- >-
60+
oc -n openstack wait
61+
osdpns openstack-edpm --for condition=SetupReady
62+
--timeout=60m
63+
values:
64+
- name: edpm-post-driver-nodeset-values
65+
src_file: values.yaml
66+
build_output: nodeset-post-driver.yaml
67+
post_stage_run:
68+
- name: Run phase 2 playbook
69+
type: playbook
70+
source: "../../playbooks/nvidia-mdev-phase2.yml"
71+
inventory: "${HOME}/ci-framework-data/artifacts/zuul_inventory.yml"
72+
73+
- path: examples/va/nvidia-mdev/edpm-post-driver/deployment
74+
wait_conditions:
75+
- >-
76+
oc -n openstack wait
77+
osdpns openstack-edpm --for condition=Ready
78+
--timeout=60m
79+
values:
80+
- name: edpm-post-driver-deployment-values
81+
src_file: values.yaml
82+
build_output: deployment-post-driver.yaml
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
dataplane-deployment.yaml
2+
dataplane-nodeset.yaml
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
dataplane-deployment.yaml
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
apiVersion: kustomize.config.k8s.io/v1beta1
3+
kind: Kustomization
4+
5+
components:
6+
- ../../../../../va/nvidia-mdev/edpm-post-driver/deployment
7+
# - https://github.com/openstack-k8s-operators/architecture/va/nvidia-mdev/edpm-post-driver/deployment?ref=main
8+
## It's possible to replace ../../../../../va/nvidia-mdev/edpm-post-driver/deployment/ with a git checkout URL as per:
9+
## https://github.com/kubernetes-sigs/kustomize/blob/master/examples/remoteBuild.md
10+
11+
resources:
12+
- values.yaml
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# yamllint disable rule:line-length
2+
# local-config: referenced, but not emitted by kustomize
3+
---
4+
apiVersion: v1
5+
kind: ConfigMap
6+
metadata:
7+
name: edpm-post-driver-deployment-values
8+
annotations:
9+
config.kubernetes.io/local-config: "true"
10+
data: {}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
dataplane-nodeset.yaml
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
apiVersion: kustomize.config.k8s.io/v1beta1
3+
kind: Kustomization
4+
5+
components:
6+
- ../../../../../va/nvidia-mdev/edpm-post-driver/nodeset
7+
# - https://github.com/openstack-k8s-operators/architecture/va/nvidia-mdev/edpm-post-driver/nodeset?ref=main
8+
## It's possible to replace ../../../../../va/nvidia-mdev/edpm-post-driver/nodeset/ with a git checkout URL as per:
9+
## https://github.com/kubernetes-sigs/kustomize/blob/master/examples/remoteBuild.md
10+
11+
resources:
12+
- values.yaml
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# yamllint disable rule:line-length
2+
# local-config: referenced, but not emitted by kustomize
3+
---
4+
apiVersion: v1
5+
kind: ConfigMap
6+
metadata:
7+
name: edpm-post-driver-nodeset-values
8+
annotations:
9+
config.kubernetes.io/local-config: "true"
10+
data:
11+
root_password: cmVkaGF0Cg==
12+
preProvisioned: false
13+
baremetalSetTemplate:
14+
ctlplaneInterface: eno2 # CHANGEME
15+
cloudUserName: cloud-admin
16+
provisioningInterface: enp1s0 # CHANGEME
17+
bmhLabelSelector:
18+
app: openstack # CHANGEME
19+
passwordSecret:
20+
name: baremetalset-password-secret
21+
namespace: openstack
22+
ssh_keys:
23+
# Authorized keys that will have access to the dataplane computes via SSH
24+
authorized: CHANGEME
25+
# The private key that will have access to the dataplane computes via SSH
26+
private: CHANGEME2
27+
# The public key that will have access to the dataplane computes via SSH
28+
public: CHANGEME3
29+
nodeset:
30+
ansible:
31+
ansibleUser: cloud-admin
32+
ansiblePort: 22
33+
ansibleVars:
34+
# CHANGEME -- see https://access.redhat.com/solutions/253273
35+
# edpm_bootstrap_command: |
36+
# subscription-manager register --username <subscription_manager_username> --password <subscription_manager_password>
37+
# podman login -u <registry_username> -p <registry_password> registry.redhat.io
38+
timesync_ntp_servers:
39+
- hostname: pool.ntp.org
40+
# CPU pinning settings
41+
edpm_kernel_args: "default_hugepagesz=1GB hugepagesz=1G hugepages=16 intel_iommu=on iommu=pt isolcpus=4-23,28-47"
42+
edpm_tuned_profile: "cpu-partitioning-powersave"
43+
edpm_tuned_isolated_cores: "4-23,28-47"
44+
# edpm_network_config
45+
# These vars are edpm_network_config role vars
46+
edpm_network_config_hide_sensitive_logs: false
47+
edpm_network_config_os_net_config_mappings:
48+
edpm-compute-0:
49+
nic2: 6c:fe:54:3f:8a:02 # CHANGEME
50+
nic3: 6c:fe:54:3f:8a:03 # CHANGEME
51+
edpm-compute-1:
52+
nic2: 6b:fe:54:3f:8a:02 # CHANGEME
53+
nic3: 6b:fe:54:3f:8a:03 # CHANGEME
54+
edpm_network_config_template: |
55+
---
56+
{% set mtu_list = [ctlplane_mtu] %}
57+
{% for network in nodeset_networks %}
58+
{{ mtu_list.append(lookup('vars', networks_lower[network] ~ '_mtu')) }}
59+
{%- endfor %}
60+
{% set min_viable_mtu = mtu_list | max %}
61+
network_config:
62+
- type: ovs_bridge
63+
name: {{ neutron_physical_bridge_name }}
64+
mtu: {{ min_viable_mtu }}
65+
use_dhcp: false
66+
dns_servers: {{ ctlplane_dns_nameservers }}
67+
domain: {{ dns_search_domains }}
68+
addresses:
69+
- ip_netmask: {{ ctlplane_ip }}/{{ ctlplane_cidr }}
70+
routes: {{ ctlplane_host_routes }}
71+
members:
72+
- type: interface
73+
name: nic2
74+
mtu: {{ min_viable_mtu }}
75+
# force the MAC address of the bridge to this interface
76+
primary: true
77+
{% for network in nodeset_networks %}
78+
- type: vlan
79+
mtu: {{ lookup('vars', networks_lower[network] ~ '_mtu') }}
80+
vlan_id: {{ lookup('vars', networks_lower[network] ~ '_vlan_id') }}
81+
addresses:
82+
- ip_netmask:
83+
{{ lookup('vars', networks_lower[network] ~ '_ip') }}/{{ lookup('vars', networks_lower[network] ~ '_cidr') }}
84+
routes: {{ lookup('vars', networks_lower[network] ~ '_host_routes') }}
85+
{% endfor %}
86+
- type: sriov_pf
87+
name: nic3
88+
numvfs: 10
89+
use_dhcp: false
90+
promisc: true
91+
92+
# These vars are for the network config templates themselves and are
93+
# considered EDPM network defaults.
94+
neutron_physical_bridge_name: br-ex
95+
neutron_public_interface_name: eth0
96+
# edpm_nodes_validation
97+
edpm_nodes_validation_validate_controllers_icmp: false
98+
edpm_nodes_validation_validate_gateway_icmp: false
99+
dns_search_domains: []
100+
gather_facts: false
101+
# edpm firewall, change the allowed CIDR if needed
102+
edpm_sshd_configure_firewall: true
103+
edpm_sshd_allowed_ranges:
104+
- 192.168.122.0/24
105+
# SRIOV settings
106+
edpm_neutron_sriov_agent_SRIOV_NIC_physical_device_mappings: 'sriov-phy4:eno4'
107+
networks:
108+
- defaultRoute: true
109+
name: ctlplane
110+
subnetName: subnet1
111+
- name: internalapi
112+
subnetName: subnet1
113+
- name: storage
114+
subnetName: subnet1
115+
- name: tenant
116+
subnetName: subnet1
117+
nodes:
118+
edpm-compute-0:
119+
hostName: edpm-compute-0
120+
edpm-compute-1:
121+
hostName: edpm-compute-1
122+
services:
123+
- neutron-ovn
124+
- nova-custom-sriov
125+
- neutron-sriov
126+
- neutron-metadata
127+
nova:
128+
compute:
129+
conf: |
130+
# CHANGEME
131+
[DEFAULT]
132+
reserved_host_memory_mb = 4096
133+
reserved_huge_pages = node:0,size:4,count:524160
134+
reserved_huge_pages = node:1,size:4,count:524160
135+
[compute]
136+
cpu_shared_set = 0-3,24-27
137+
cpu_dedicated_set = 8-23,32-47
138+
[devices]
139+
mdev_enabled_types = nvidia-268
140+
migration:
141+
ssh_keys:
142+
private: CHANGEME4
143+
public: CHANGEME5
144+
pci:
145+
conf: |
146+
# CHANGEME
147+
[pci]
148+
device_spec = {"vendor_id":"8086", "product_id":"1572", "address": "0000:19:00.3", "physical_network":"sriov-phy4", "trusted":"true"}

examples/va/nvidia-mdev/edpm/nodeset/values.yaml

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -120,41 +120,7 @@ data:
120120
edpm-compute-1:
121121
hostName: edpm-compute-1
122122
services:
123-
- bootstrap
124-
- download-cache
125-
- configure-network
126-
- validate-network
127-
- install-os
128-
- configure-os
129-
- ssh-known-hosts
130-
- run-os
131-
- reboot-os
132-
- install-certs
133-
- libvirt
134-
- ovn
135123
- neutron-ovn
136124
- nova-custom-sriov
137125
- neutron-sriov
138126
- neutron-metadata
139-
nova:
140-
compute:
141-
conf: |
142-
# CHANGEME
143-
[DEFAULT]
144-
reserved_host_memory_mb = 4096
145-
reserved_huge_pages = node:0,size:4,count:524160
146-
reserved_huge_pages = node:1,size:4,count:524160
147-
[compute]
148-
cpu_shared_set = 0-3,24-27
149-
cpu_dedicated_set = 8-23,32-47
150-
[devices]
151-
mdev_enabled_types = nvidia-268
152-
migration:
153-
ssh_keys:
154-
private: CHANGEME4
155-
public: CHANGEME5
156-
pci:
157-
conf: |
158-
# CHANGEME
159-
[pci]
160-
device_spec = {"vendor_id":"8086", "product_id":"1572", "address": "0000:19:00.3", "physical_network":"sriov-phy4", "trusted":"true"}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
---
2+
apiVersion: kustomize.config.k8s.io/v1alpha1
3+
kind: Component
4+
5+
transformers:
6+
# Set namespace to OpenStack on all namespaced objects without a namespace
7+
- |-
8+
apiVersion: builtin
9+
kind: NamespaceTransformer
10+
metadata:
11+
name: _ignored_
12+
namespace: openstack
13+
setRoleBindingSubjects: none
14+
unsetOnly: true
15+
fieldSpecs:
16+
- path: metadata/name
17+
kind: Namespace
18+
create: true
19+
20+
components:
21+
- ../../../../lib/dataplane/deployment
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
apiVersion: v1
3+
data:
4+
NodeRootPassword: _replaced_
5+
kind: Secret
6+
metadata:
7+
name: baremetalset-password-secret
8+
namespace: openstack
9+
type: Opaque
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
---
2+
apiVersion: kustomize.config.k8s.io/v1alpha1
3+
kind: Component
4+
5+
transformers:
6+
# Set namespace to OpenStack on all namespaced objects without a namespace
7+
- |-
8+
apiVersion: builtin
9+
kind: NamespaceTransformer
10+
metadata:
11+
name: _ignored_
12+
namespace: openstack
13+
setRoleBindingSubjects: none
14+
unsetOnly: true
15+
fieldSpecs:
16+
- path: metadata/name
17+
kind: Namespace
18+
create: true
19+
20+
components:
21+
- ../../../../lib/dataplane/nodeset
22+
23+
resources:
24+
- baremetalset-password-secret.yaml
25+
- nova_sriov.yaml
26+
27+
replacements:
28+
- source:
29+
kind: ConfigMap
30+
name: edpm-nodeset-values
31+
fieldPath: data.root_password
32+
targets:
33+
- select:
34+
kind: Secret
35+
name: baremetalset-password-secret
36+
fieldPaths:
37+
- data.NodeRootPassword
38+
options:
39+
create: true
40+
41+
# Nova compute CPU pinning customization
42+
- source:
43+
kind: ConfigMap
44+
name: edpm-nodeset-values
45+
fieldPath: data.nova.compute.conf
46+
targets:
47+
- select:
48+
kind: ConfigMap
49+
name: cpu-pinning-nova
50+
fieldPaths:
51+
- data.25-cpu-pinning-nova\.conf
52+
options:
53+
create: true
54+
# Nova compute PCI passthrough customization
55+
- source:
56+
kind: ConfigMap
57+
name: edpm-nodeset-values
58+
fieldPath: data.nova.pci.conf
59+
targets:
60+
- select:
61+
kind: ConfigMap
62+
name: sriov-nova
63+
fieldPaths:
64+
- data.03-sriov-nova\.conf
65+
options:
66+
create: true

0 commit comments

Comments
 (0)