Skip to content

Commit 8722fd0

Browse files
committed
Address PR comments
Signed-off-by: Felicity Xu <hanyux@nvidia.com>
1 parent 0930c91 commit 8722fd0

5 files changed

Lines changed: 332 additions & 23 deletions

File tree

crates/api-core/src/cfg/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ applicable.
2020
| `ib_config` | `Option<IBFabricConfig>` || InfiniBand fabric configuration (see [IBFabricConfig](#ibfabricconfig)). |
2121
| `asn` | `u32` | **required** | Autonomous System Number, fixed per environment. Used by nico-dpu-agent for `frr.conf` BGP routing. |
2222
| `dhcp_servers` | `Vec<Ipv4Addr>` | `[]` | DHCP server addresses announced to DPUs during network provisioning. |
23+
| `ntp_servers` | `Vec<Ipv4Addr>` | `[]` | Site-level NTP server IPs used for BMC time configuration and DHCP NTP Server configuration. |
2324
| `route_servers` | `Vec<String>` | `[]` | Route server IPs for L2VPN Ethernet Virtual network support. |
2425
| `enable_route_servers` | `bool` | `false` | Enables route server injection into DPU FRR configs for L2VPN. |
2526
| `deny_prefixes` | `Vec<Ipv4Network>` | `[]` | IPv4 CIDR prefixes that tenant instances are blocked from reaching. Generates iptables DROP rules and nvue ACL policies. |

crates/api-core/src/tests/host_bmc_firmware_test.rs

Lines changed: 167 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,14 @@ async fn test_preingestion_bmc_upgrade(
9292
.await?
9393
.into_inner();
9494

95-
// First, a host where it's already up to date; it should immediately go to complete.
95+
// First, a host where it's already up to date; it should go to complete
96+
// after passing through the explicit NTP setup state.
9697
let addr = response.address.as_str();
9798
insert_endpoint_version(&mut txn, addr, "6.00.30.00", "1.13.2", false).await?;
9899
txn.commit().await?;
99100

101+
mgr.run_single_iteration().await?;
102+
100103
mgr.run_single_iteration().await?;
101104
let mut txn = pool.begin().await.unwrap();
102105
assert!(
@@ -117,6 +120,7 @@ async fn test_preingestion_bmc_upgrade(
117120
txn.commit().await?;
118121
let mut txn = pool.begin().await.unwrap();
119122

123+
mgr.run_single_iteration().await?;
120124
mgr.run_single_iteration().await?;
121125

122126
assert!(
@@ -136,6 +140,7 @@ async fn test_preingestion_bmc_upgrade(
136140
insert_endpoint_version(&mut txn, addr, "4.9", "1.13.2", false).await?;
137141
txn.commit().await?;
138142

143+
mgr.run_single_iteration().await?;
139144
mgr.run_single_iteration().await?;
140145
// The "upload" is synchronous now and will be complete at this point.
141146

@@ -287,6 +292,7 @@ async fn test_preingestion_upgrade_script(
287292
insert_endpoint_version(&mut txn, addr, "0", "0", false).await?;
288293
txn.commit().await?;
289294

295+
mgr.run_single_iteration().await?;
290296
mgr.run_single_iteration().await?;
291297

292298
let mut txn = pool.begin().await.unwrap();
@@ -999,6 +1005,7 @@ async fn test_preingestion_preupdate_powercycling(
9991005
insert_endpoint_version(&mut txn, addr, "4.9", "1.1", true).await?;
10001006
txn.commit().await?;
10011007

1008+
mgr.run_single_iteration().await?;
10021009
mgr.run_single_iteration().await?;
10031010
// The "upload" is synchronous now and will be complete at this point.
10041011

@@ -2299,7 +2306,13 @@ async fn test_explicit_update(pool: sqlx::PgPool) -> CarbideResult<()> {
22992306
async fn test_preingestion_time_sync_ok(
23002307
pool: sqlx::PgPool,
23012308
) -> Result<(), Box<dyn std::error::Error>> {
2302-
let env = common::api_fixtures::create_test_env(pool.clone()).await;
2309+
let mut config = get_config();
2310+
config.ntp_servers = vec![
2311+
"198.51.100.10".parse().unwrap(),
2312+
"198.51.100.11".parse().unwrap(),
2313+
];
2314+
let env =
2315+
create_test_env_with_overrides(pool.clone(), TestEnvOverrides::with_config(config)).await;
23032316

23042317
let mgr = PreingestionManager::new(
23052318
pool.clone(),
@@ -2330,7 +2343,23 @@ async fn test_preingestion_time_sync_ok(
23302343
insert_endpoint_version(&mut txn, addr, "6.00.30.00", "1.13.2", false).await?;
23312344
txn.commit().await?;
23322345

2333-
// Run preingestion manager - should check time sync, pass, then check firmware, and complete
2346+
let timepoint = env.redfish_sim.timepoint();
2347+
2348+
// Run preingestion manager - should apply site NTP servers, check time sync,
2349+
// then check firmware and complete.
2350+
mgr.run_single_iteration().await?;
2351+
2352+
mgr.run_single_iteration().await?;
2353+
2354+
let actions = env.redfish_sim.actions_since(&timepoint);
2355+
assert!(
2356+
actions
2357+
.all_hosts()
2358+
.iter()
2359+
.any(|a| matches!(a, RedfishSimAction::SetNtpServers(_))),
2360+
"Expected SetNtpServers when site NTP is configured"
2361+
);
2362+
23342363
mgr.run_single_iteration().await?;
23352364

23362365
let mut txn = pool.begin().await.unwrap();
@@ -2346,6 +2375,141 @@ async fn test_preingestion_time_sync_ok(
23462375
Ok(())
23472376
}
23482377

2378+
/// Test that an empty NTP server config skips Redfish NTP setup and proceeds
2379+
/// directly to initial checks from the SetNtpServers state.
2380+
#[crate::sqlx_test]
2381+
async fn test_preingestion_set_ntp_servers_empty(
2382+
pool: sqlx::PgPool,
2383+
) -> Result<(), Box<dyn std::error::Error>> {
2384+
let mut config = get_config();
2385+
config.ntp_servers.clear(); // Use empty NTP servers.
2386+
2387+
let env =
2388+
create_test_env_with_overrides(pool.clone(), TestEnvOverrides::with_config(config)).await;
2389+
2390+
let mgr = PreingestionManager::new(
2391+
pool.clone(),
2392+
env.config.preingestion_manager(),
2393+
env.redfish_sim.clone(),
2394+
env.test_meter.meter(),
2395+
None,
2396+
None,
2397+
None,
2398+
env.api.work_lock_manager_handle.clone(),
2399+
env.config.ntp_servers.clone(),
2400+
);
2401+
2402+
let response = env
2403+
.api
2404+
.discover_dhcp(
2405+
DhcpDiscovery::builder("b8:3f:d2:90:97:a6", "192.0.2.1")
2406+
.vendor_string("iDRac")
2407+
.tonic_request(),
2408+
)
2409+
.await?
2410+
.into_inner();
2411+
2412+
let addr = response.address.as_str();
2413+
let ip_addr = IpAddr::from_str(addr).unwrap();
2414+
let mut txn = pool.begin().await.unwrap();
2415+
insert_endpoint_version(&mut txn, addr, "6.00.30.00", "1.13.2", false).await?;
2416+
2417+
// Transition to start of SetNtpServers state with no attempts made so far.
2418+
db::explored_endpoints::set_preingestion_set_ntp_servers(ip_addr, None, 0, &mut txn).await?;
2419+
txn.commit().await?;
2420+
2421+
let timepoint = env.redfish_sim.timepoint();
2422+
mgr.run_single_iteration().await?;
2423+
2424+
// Expect no SetNtpServers actions since NTP server config is empty.
2425+
let actions = env.redfish_sim.actions_since(&timepoint);
2426+
assert!(
2427+
!actions
2428+
.all_hosts()
2429+
.iter()
2430+
.any(|a| matches!(a, RedfishSimAction::SetNtpServers(_))),
2431+
"Did not expect SetNtpServers when NTP server config is empty"
2432+
);
2433+
2434+
// Expect to go to complete since no need to set NTP server config.
2435+
let mut txn = pool.begin().await.unwrap();
2436+
assert_eq!(
2437+
db::explored_endpoints::find_all_preingestion_complete(&mut txn)
2438+
.await?
2439+
.len(),
2440+
1
2441+
);
2442+
txn.commit().await?;
2443+
2444+
Ok(())
2445+
}
2446+
2447+
/// Test that exhausting NTP setup attempts proceeds to initial checks without
2448+
/// failing preingestion or trying to set NTP again.
2449+
#[crate::sqlx_test]
2450+
async fn test_preingestion_set_ntp_servers_max_attempts(
2451+
pool: sqlx::PgPool,
2452+
) -> Result<(), Box<dyn std::error::Error>> {
2453+
let mut config = get_config();
2454+
config.ntp_servers = vec!["198.51.100.10".parse().unwrap()];
2455+
let env =
2456+
create_test_env_with_overrides(pool.clone(), TestEnvOverrides::with_config(config)).await;
2457+
2458+
let mgr = PreingestionManager::new(
2459+
pool.clone(),
2460+
env.config.preingestion_manager(),
2461+
env.redfish_sim.clone(),
2462+
env.test_meter.meter(),
2463+
None,
2464+
None,
2465+
None,
2466+
env.api.work_lock_manager_handle.clone(),
2467+
env.config.ntp_servers.clone(),
2468+
);
2469+
2470+
let response = env
2471+
.api
2472+
.discover_dhcp(
2473+
DhcpDiscovery::builder("b8:3f:d2:90:97:a6", "192.0.2.1")
2474+
.vendor_string("iDRac")
2475+
.tonic_request(),
2476+
)
2477+
.await?
2478+
.into_inner();
2479+
2480+
let addr = response.address.as_str();
2481+
let ip_addr = IpAddr::from_str(addr).unwrap();
2482+
let mut txn = pool.begin().await.unwrap();
2483+
insert_endpoint_version(&mut txn, addr, "6.00.30.00", "1.13.2", false).await?;
2484+
2485+
db::explored_endpoints::set_preingestion_set_ntp_servers(ip_addr, None, 3, &mut txn).await?;
2486+
txn.commit().await?;
2487+
2488+
let timepoint = env.redfish_sim.timepoint();
2489+
mgr.run_single_iteration().await?;
2490+
2491+
let actions = env.redfish_sim.actions_since(&timepoint);
2492+
assert!(
2493+
!actions
2494+
.all_hosts()
2495+
.iter()
2496+
.any(|a| matches!(a, RedfishSimAction::SetNtpServers(_))),
2497+
"Did not expect SetNtpServers after max attempts are exhausted"
2498+
);
2499+
2500+
// The next iteration should go to complete since NTP setup is given up.
2501+
let mut txn = pool.begin().await.unwrap();
2502+
assert_eq!(
2503+
db::explored_endpoints::find_all_preingestion_complete(&mut txn)
2504+
.await?
2505+
.len(),
2506+
1
2507+
);
2508+
txn.commit().await?;
2509+
2510+
Ok(())
2511+
}
2512+
23492513
/// Test that preingestion handles the TimeSyncReset state machine correctly
23502514
#[crate::sqlx_test]
23512515
async fn test_preingestion_time_sync_reset_flow(
@@ -2405,12 +2569,6 @@ async fn test_preingestion_time_sync_reset_flow(
24052569
all_actions.contains(&RedfishSimAction::SetUtcTimezone),
24062570
"Expected SetUtcTimezone action to be called during TimeSyncReset Start phase"
24072571
);
2408-
assert!(
2409-
all_actions
2410-
.iter()
2411-
.any(|a| matches!(a, RedfishSimAction::SetNtpServers(_))),
2412-
"Expected SetNtpServers action to be called during TimeSyncReset Start phase"
2413-
);
24142572

24152573
let mut txn = pool.begin().await.unwrap();
24162574
let endpoints =

crates/api-db/src/explored_endpoints.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
*/
1717
use std::net::IpAddr;
1818

19-
use chrono::Utc;
19+
use chrono::{DateTime, Utc};
2020
use config_version::ConfigVersion;
2121
use mac_address::MacAddress;
2222
use model::firmware::FirmwareComponentType;
@@ -429,6 +429,16 @@ pub async fn set_preingestion_initial_bmc_reset(
429429
set_preingestion(address, state, txn).await
430430
}
431431

432+
pub async fn set_preingestion_set_ntp_servers(
433+
address: IpAddr,
434+
set_at: Option<DateTime<Utc>>,
435+
attempts: u32,
436+
txn: &mut PgConnection,
437+
) -> Result<(), DatabaseError> {
438+
let state = PreingestionState::SetNtpServers { set_at, attempts };
439+
set_preingestion(address, state, txn).await
440+
}
441+
432442
pub async fn set_preingestion_time_sync_reset(
433443
address: IpAddr,
434444
phase: TimeSyncResetPhase,

crates/api-model/src/site_explorer/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,14 @@ pub enum PreingestionState {
407407
InitialBMCReset {
408408
phase: InitialBmcResetPhase,
409409
},
410+
/// Configure site NTP servers on the BMC before checking whether its clock
411+
/// is synchronized. `set_at` records a successful Redfish update so the
412+
/// state machine can wait for the setting to take effect before checking.
413+
SetNtpServers {
414+
set_at: Option<DateTime<Utc>>,
415+
#[serde(default)]
416+
attempts: u32,
417+
},
410418
TimeSyncReset {
411419
phase: TimeSyncResetPhase,
412420
last_time: DateTime<Utc>,

0 commit comments

Comments
 (0)