Skip to content

Commit 2f58454

Browse files
committed
Merge remote-tracking branch 'NVIDIA/main' into feature-nvos-health
2 parents f2482c0 + 1ad3362 commit 2f58454

29 files changed

Lines changed: 280 additions & 444 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/api-db/src/machine_interface.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use std::net::IpAddr;
2020
use std::str::FromStr;
2121

2222
use carbide_network::ip::{IdentifyAddressFamily, IpAddressFamily};
23+
use carbide_utils::redfish::BmcAccessInfo;
2324
use carbide_uuid::domain::DomainId;
2425
use carbide_uuid::machine::{MachineId, MachineInterfaceId};
2526
use carbide_uuid::network::{NetworkPrefixId, NetworkSegmentId};
@@ -295,6 +296,25 @@ pub async fn lookup_bmc_ip_by_mac_address(
295296
.map_err(|e| DatabaseError::query(query, e))
296297
}
297298

299+
pub async fn lookup_bmc_access_info(
300+
db: impl DbReader<'_>,
301+
ip: IpAddr,
302+
port: Option<u16>,
303+
) -> DatabaseResult<BmcAccessInfo> {
304+
let mac_address = find_by_ip(db, ip)
305+
.await?
306+
.ok_or_else(|| DatabaseError::NotFoundError {
307+
kind: "Machine Interface",
308+
id: ip.to_string(),
309+
})?
310+
.mac_address;
311+
Ok(BmcAccessInfo {
312+
host: ip.to_string(),
313+
port,
314+
mac_address,
315+
})
316+
}
317+
298318
pub async fn find_by_ip(
299319
txn: impl DbReader<'_>,
300320
ip: IpAddr,

crates/api/src/handlers/attestation.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,16 @@ pub(crate) async fn trigger_machine_attestation(
6565
}));
6666
}
6767
};
68+
let bmc_ip_addr = bmc_info.ip_addr().map_err(|e| CarbideError::Internal {
69+
message: format!("{}", e),
70+
})?;
6871

69-
let redfish_client_future = api.redfish_pool.create_client_for_ingested_host(
70-
bmc_info.ip_addr().map_err(|e| CarbideError::Internal {
71-
message: format!("{}", e),
72-
})?,
73-
bmc_info.port,
74-
&api.database_connection,
75-
);
72+
let bmc_access_info =
73+
db::machine_interface::lookup_bmc_access_info(&mut db_reader, bmc_ip_addr, bmc_info.port)
74+
.await?;
75+
drop(db_reader);
76+
77+
let redfish_client_future = api.redfish_pool.client_by_info(&bmc_access_info);
7678

7779
let redfish_client = match tt::timeout(redfish_timeout_duration, redfish_client_future).await {
7880
Ok(redfish_result) => redfish_result.map_err(|e| CarbideError::RedfishClientCreation {

crates/api/src/handlers/uefi.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,20 @@ pub(crate) async fn clear_host_uefi_password(
8080
id: machine_id.to_string(),
8181
})?;
8282

83+
let addr = snapshot.host_snapshot.bmc_addr().ok_or_else(|| {
84+
CarbideError::InvalidArgument("Specified machine does not have BMC address".into())
85+
})?;
86+
87+
let bmc_access_info =
88+
db::machine_interface::lookup_bmc_access_info(&mut txn, addr.ip(), Some(addr.port()))
89+
.await?;
90+
8391
// Don't hold the transaction across an await point
8492
txn.commit().await?;
8593

8694
let redfish_client = api
8795
.redfish_pool
88-
.create_client_from_machine(&snapshot.host_snapshot, &api.database_connection)
96+
.client_by_info(&bmc_access_info)
8997
.await
9098
.map_err(|e| {
9199
tracing::error!("unable to create redfish client: {}", e);
@@ -165,12 +173,21 @@ pub(crate) async fn set_host_uefi_password(
165173
kind: "machine",
166174
id: machine_id.to_string(),
167175
})?;
176+
177+
let addr = snapshot.host_snapshot.bmc_addr().ok_or_else(|| {
178+
CarbideError::InvalidArgument("Specified machine does not have BMC address".into())
179+
})?;
180+
181+
let bmc_access_info =
182+
db::machine_interface::lookup_bmc_access_info(&mut txn, addr.ip(), Some(addr.port()))
183+
.await?;
184+
168185
// Let txn drop so we don't hold it across a redfish request
169186
txn.commit().await?;
170187

171188
let redfish_client = api
172189
.redfish_pool
173-
.create_client_from_machine(&snapshot.host_snapshot, &api.database_connection)
190+
.client_by_info(&bmc_access_info)
174191
.await
175192
.map_err(|e| {
176193
tracing::error!("unable to create redfish client: {}", e);

crates/api/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ mod network_segment;
5959
mod run;
6060
mod scout_stream;
6161
mod setup;
62-
mod state_controller;
6362
mod storage;
6463
#[cfg(test)]
6564
mod tests;

crates/api/src/setup.rs

Lines changed: 44 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ use crate::machine_update_manager::MachineUpdateManager;
107107
use crate::measured_boot::metrics_collector::MeasuredBootMetricsCollector;
108108
use crate::mqtt_state_change_hook::hook::MqttStateChangeHook;
109109
use crate::scout_stream::ConnectionRegistry;
110-
use crate::state_controller::common_services::CommonStateHandlerServices;
111110
use crate::{attestation, db_init, ethernet_virtualization, listener};
112111

113112
/// The resolved set of network declarations passed from `start_api` into
@@ -1023,34 +1022,22 @@ pub async fn initialize_and_start_controllers<'a>(
10231022
emitter_builder.build()
10241023
};
10251024

1026-
let handler_services = Arc::new(CommonStateHandlerServices {
1027-
db_pool: db_pool.clone(),
1028-
db_reader: db_pool.clone().into(),
1029-
redfish_client_pool: shared_redfish_pool.clone(),
1030-
ib_fabric_manager: ib_fabric_manager.clone(),
1031-
ib_pools: common_pools.infiniband.clone(),
1032-
ipmi_tool: ipmi_tool.clone(),
1033-
site_config: carbide_config.clone(),
1034-
dpa_info,
1035-
rms_client: rms_client.clone(),
1036-
switch_system_image_rms_client: carbide_config
1037-
.rms
1038-
.api_url
1039-
.as_deref()
1040-
.filter(|url| !url.is_empty())
1041-
.map(|url| {
1042-
let rms_client_config = librms::client_config::RmsClientConfig::new(
1043-
carbide_config.rms.root_ca_path.clone(),
1044-
carbide_config.rms.client_cert.clone(),
1045-
carbide_config.rms.client_key.clone(),
1046-
carbide_config.rms.enforce_tls,
1047-
);
1048-
let rms_api_config = librms::client::RmsApiConfig::new(url, &rms_client_config);
1049-
Arc::new(librms::RackManagerApi::new(&rms_api_config))
1050-
as Arc<dyn carbide_rack::rms_client::SwitchSystemImageRmsClient>
1051-
}),
1052-
credential_manager: credential_manager.clone(),
1053-
});
1025+
let switch_system_image_rms_client = carbide_config
1026+
.rms
1027+
.api_url
1028+
.as_deref()
1029+
.filter(|url| !url.is_empty())
1030+
.map(|url| {
1031+
let rms_client_config = librms::client_config::RmsClientConfig::new(
1032+
carbide_config.rms.root_ca_path.clone(),
1033+
carbide_config.rms.client_cert.clone(),
1034+
carbide_config.rms.client_key.clone(),
1035+
carbide_config.rms.enforce_tls,
1036+
);
1037+
let rms_api_config = librms::client::RmsApiConfig::new(url, &rms_client_config);
1038+
Arc::new(librms::RackManagerApi::new(&rms_api_config))
1039+
as Arc<dyn carbide_rack::rms_client::SwitchSystemImageRmsClient>
1040+
});
10541041

10551042
// Use the hostname as cluster-wide state controller ID
10561043
// The expectation here is that either the host only runs a single
@@ -1069,14 +1056,11 @@ pub async fn initialize_and_start_controllers<'a>(
10691056
.processor_id(state_controller_id.clone())
10701057
.services(
10711058
MachineStateHandlerServices {
1072-
db_pool: handler_services.db_pool.clone(),
1073-
db_reader: handler_services.db_reader.clone(),
1074-
redfish_client_pool: handler_services.redfish_client_pool.clone(),
1075-
ipmi_tool: handler_services.ipmi_tool.clone(),
1076-
site_config: handler_services
1077-
.site_config
1078-
.machine_state_handler_site_config()
1079-
.into(),
1059+
db_pool: db_pool.clone(),
1060+
db_reader: db_pool.clone().into(),
1061+
redfish_client_pool: shared_redfish_pool.clone(),
1062+
ipmi_tool: ipmi_tool.clone(),
1063+
site_config: carbide_config.machine_state_handler_site_config().into(),
10801064
}
10811065
.into(),
10821066
)
@@ -1151,7 +1135,7 @@ pub async fn initialize_and_start_controllers<'a>(
11511135
.processor_id(state_controller_id.clone())
11521136
.services(
11531137
NetworkSegmentStateHandlerServices {
1154-
db_pool: handler_services.db_pool.clone(),
1138+
db_pool: db_pool.clone(),
11551139
}
11561140
.into(),
11571141
);
@@ -1175,10 +1159,10 @@ pub async fn initialize_and_start_controllers<'a>(
11751159
.processor_id(state_controller_id.clone())
11761160
.services(
11771161
DpaInterfaceStateHandlerServices {
1178-
db_pool: handler_services.db_pool.clone(),
1179-
db_reader: handler_services.db_reader.clone(),
1180-
dpa_info: handler_services.dpa_info.clone(),
1181-
hb_interval: handler_services.site_config.get_hb_interval(),
1162+
db_pool: db_pool.clone(),
1163+
db_reader: db_pool.clone().into(),
1164+
dpa_info,
1165+
hb_interval: carbide_config.get_hb_interval(),
11821166
}
11831167
.into(),
11841168
)
@@ -1203,8 +1187,8 @@ pub async fn initialize_and_start_controllers<'a>(
12031187
.processor_id(state_controller_id.clone())
12041188
.services(
12051189
SpdmStateHandlerServices {
1206-
db_pool: handler_services.db_pool.clone(),
1207-
redfish_client_pool: handler_services.redfish_client_pool.clone(),
1190+
db_pool: db_pool.clone(),
1191+
redfish_client_pool: shared_redfish_pool.clone(),
12081192
}
12091193
.into(),
12101194
)
@@ -1223,9 +1207,9 @@ pub async fn initialize_and_start_controllers<'a>(
12231207
.processor_id(state_controller_id.clone())
12241208
.services(
12251209
IBPartitionStateHandlerServices {
1226-
db_pool: handler_services.db_pool.clone(),
1227-
ib_fabric_manager: handler_services.ib_fabric_manager.clone(),
1228-
ib_pools: handler_services.ib_pools.clone(),
1210+
db_pool: db_pool.clone(),
1211+
ib_fabric_manager: ib_fabric_manager.clone(),
1212+
ib_pools: common_pools.infiniband.clone(),
12291213
}
12301214
.into(),
12311215
)
@@ -1240,9 +1224,9 @@ pub async fn initialize_and_start_controllers<'a>(
12401224
.processor_id(state_controller_id.clone())
12411225
.services(
12421226
PowerShelfStateHandlerServices {
1243-
db_pool: handler_services.db_pool.clone(),
1244-
rms_client: handler_services.rms_client.clone(),
1245-
credential_manager: handler_services.credential_manager.clone(),
1227+
db_pool: db_pool.clone(),
1228+
rms_client: rms_client.clone(),
1229+
credential_manager: credential_manager.clone(),
12461230
}
12471231
.into(),
12481232
)
@@ -1257,21 +1241,16 @@ pub async fn initialize_and_start_controllers<'a>(
12571241
.processor_id(state_controller_id.clone())
12581242
.services(
12591243
RackStateHandlerServices {
1260-
db_pool: handler_services.db_pool.clone(),
1261-
rms_client: handler_services.rms_client.clone(),
1244+
db_pool: db_pool.clone(),
1245+
rms_client: rms_client.clone(),
12621246
site_config: RackConfig {
1263-
rms: handler_services.site_config.rms.clone(),
1264-
rack_validation_config: handler_services
1265-
.site_config
1266-
.rack_validation_config
1267-
.clone(),
1268-
rack_profiles: handler_services.site_config.rack_profiles.clone(),
1247+
rms: carbide_config.rms.clone(),
1248+
rack_validation_config: carbide_config.rack_validation_config.clone(),
1249+
rack_profiles: carbide_config.rack_profiles.clone(),
12691250
}
12701251
.into(),
1271-
switch_system_image_rms_client: handler_services
1272-
.switch_system_image_rms_client
1273-
.clone(),
1274-
credential_manager: handler_services.credential_manager.clone(),
1252+
switch_system_image_rms_client,
1253+
credential_manager: credential_manager.clone(),
12751254
}
12761255
.into(),
12771256
)
@@ -1285,9 +1264,9 @@ pub async fn initialize_and_start_controllers<'a>(
12851264
.processor_id(state_controller_id.clone())
12861265
.services(
12871266
SwitchStateHandlerServices {
1288-
db_pool: handler_services.db_pool.clone(),
1289-
rms_client: handler_services.rms_client.clone(),
1290-
credential_manager: handler_services.credential_manager.clone(),
1267+
db_pool: db_pool.clone(),
1268+
rms_client: rms_client.clone(),
1269+
credential_manager: credential_manager.clone(),
12911270
}
12921271
.into(),
12931272
)

crates/api/src/state_controller/common_services.rs

Lines changed: 0 additions & 69 deletions
This file was deleted.

0 commit comments

Comments
 (0)