Skip to content

Commit 2b79bca

Browse files
committed
WIP: Add partial PVTime snapshot persistence
- Changes to PVTime design (use base addr) - Secondary constructor for PVTime using only base addr - Implement Persist interface for PVTime, save base addr - Add pvtime to struct MicrovmState and struct Vmm - Restore PVTime device in build_microvm_from_snapshot Signed-off-by: Dakshin Devanand <[email protected]>
1 parent 690c87e commit 2b79bca

File tree

4 files changed

+123
-36
lines changed

4 files changed

+123
-36
lines changed

Diff for: src/vmm/src/arch/aarch64/pvtime/mod.rs

+80-26
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,24 @@ use std::collections::HashMap;
44

55
use displaydoc::Display;
66
use kvm_bindings::{KVM_ARM_VCPU_PVTIME_CTRL, KVM_ARM_VCPU_PVTIME_IPA};
7-
use log::{debug, info, warn};
7+
use serde::{Deserialize, Serialize};
88
use thiserror::Error;
99
use vm_memory::GuestAddress;
1010

1111
use crate::device_manager::resources::ResourceAllocator;
12+
use crate::snapshot::Persist;
1213

1314
/// 64 bytes due to alignment requirement in 3.1 of https://www.kernel.org/doc/html/v5.8/virt/kvm/devices/vcpu.html#attribute-kvm-arm-vcpu-pvtime-ipa
1415
pub const STEALTIME_STRUCT_MEM_SIZE: u64 = 64;
1516

1617
/// Represent PVTime device for ARM
18+
/// TODO: Decide whether we want to keep the hashmap OR the base IPA
1719
#[derive(Debug)]
1820
pub struct PVTime {
1921
/// Maps vCPU index to IPA location of stolen_time struct as defined in DEN0057A
2022
steal_time_regions: HashMap<u8, u64>,
23+
/// The base IPA of the shared memory region
24+
base_ipa: u64,
2125
}
2226

2327
/// Errors associated with PVTime operations
@@ -32,31 +36,42 @@ pub enum PVTimeError {
3236
}
3337

3438
impl PVTime {
35-
/// Creates a new PVTime device by allocating system memory for all vCPUs
36-
pub fn new(
37-
resource_allocator: &mut ResourceAllocator,
38-
vcpu_count: u8,
39-
) -> Result<Self, PVTimeError> {
40-
41-
// This returns the IPA of the start of our shared memory region for all vCPUs.
42-
let base_addr: GuestAddress = GuestAddress(resource_allocator
43-
.allocate_system_memory(
44-
STEALTIME_STRUCT_MEM_SIZE * vcpu_count as u64,
45-
64,
46-
vm_allocator::AllocPolicy::LastMatch,
47-
)
48-
.map_err(|e| PVTimeError::AllocationFailed(e.to_string()))?);
49-
39+
/// Create a new PVTime device given a base addr
40+
/// - Assumes total shared memory region from base addr is already allocated
41+
fn from_base(base_addr: GuestAddress, vcpu_count: u8) -> Self {
42+
let base_ipa: u64 = base_addr.0;
5043

5144
// Now we need to store the base IPA for each vCPU's steal_time struct.
5245
let mut steal_time_regions = HashMap::new();
5346
for i in 0..vcpu_count {
54-
let ipa = base_addr.0 + (i as u64 * STEALTIME_STRUCT_MEM_SIZE);
47+
let ipa = base_ipa + (i as u64 * STEALTIME_STRUCT_MEM_SIZE);
5548
steal_time_regions.insert(i, ipa);
5649
}
5750

5851
// Return the PVTime device with the steal_time region IPAs mapped to vCPU indices.
59-
Ok(PVTime { steal_time_regions })
52+
PVTime {
53+
steal_time_regions,
54+
base_ipa,
55+
}
56+
}
57+
58+
/// Creates a new PVTime device by allocating new system memory for all vCPUs
59+
pub fn new(
60+
resource_allocator: &mut ResourceAllocator,
61+
vcpu_count: u8,
62+
) -> Result<Self, PVTimeError> {
63+
// This returns the IPA of the start of our shared memory region for all vCPUs.
64+
let base_ipa: GuestAddress = GuestAddress(
65+
resource_allocator
66+
.allocate_system_memory(
67+
STEALTIME_STRUCT_MEM_SIZE * vcpu_count as u64,
68+
64,
69+
vm_allocator::AllocPolicy::LastMatch,
70+
)
71+
.map_err(|e| PVTimeError::AllocationFailed(e.to_string()))?,
72+
);
73+
74+
Ok(Self::from_base(base_ipa, vcpu_count))
6075
}
6176

6277
/// Register a vCPU with its pre-allocated steal time region
@@ -65,7 +80,6 @@ impl PVTime {
6580
vcpu_index: u8,
6681
vcpu_fd: &kvm_ioctls::VcpuFd,
6782
) -> Result<(), PVTimeError> {
68-
6983
// Get IPA of the steal_time region for this vCPU
7084
let ipa = self
7185
.steal_time_regions
@@ -80,15 +94,55 @@ impl PVTime {
8094
flags: 0,
8195
};
8296

83-
vcpu_fd.set_device_attr(&vcpu_device_attr).map_err(|err| {
84-
PVTimeError::DeviceAttribute(err, true, KVM_ARM_VCPU_PVTIME_CTRL)
85-
})?;
97+
vcpu_fd
98+
.set_device_attr(&vcpu_device_attr)
99+
.map_err(|err| PVTimeError::DeviceAttribute(err, true, KVM_ARM_VCPU_PVTIME_CTRL))?;
86100

87101
Ok(())
88102
}
89103
}
90104

91-
// TODO/Q: Would we be correct in implementing Persist for PVTime? Some sort of persistence is
92-
// needed for snapshot capability. We would only need to store base_addr IPA of shared memory region
93-
// assuming the # of vCPUs is constant across snapshots. Also assuming we are correct that
94-
// kvm_set_device_attr takes an IPA and we get an IPA back from resource_allocator.
105+
/// Logic to save/restore the state of a PVTime device
106+
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
107+
pub struct PVTimeState {
108+
/// base IPA of the total shared memory region
109+
pub base_ipa: u64,
110+
}
111+
112+
#[derive(Debug)]
113+
pub struct PVTimeConstructorArgs<'a> {
114+
pub resource_allocator: &'a mut ResourceAllocator,
115+
pub vcpu_count: u8,
116+
}
117+
118+
impl<'a> Persist<'a> for PVTime {
119+
type State = PVTimeState;
120+
type ConstructorArgs = PVTimeConstructorArgs<'a>;
121+
type Error = PVTimeError;
122+
123+
/// Save base IPA of PVTime device for persistence
124+
fn save(&self) -> Self::State {
125+
PVTimeState {
126+
base_ipa: self.base_ipa,
127+
}
128+
}
129+
130+
/// Restore state of PVTime device from given base IPA
131+
fn restore(
132+
constructor_args: Self::ConstructorArgs,
133+
state: &Self::State,
134+
) -> std::result::Result<Self, PVTimeError> {
135+
constructor_args
136+
.resource_allocator
137+
.allocate_system_memory(
138+
STEALTIME_STRUCT_MEM_SIZE * constructor_args.vcpu_count as u64,
139+
64,
140+
vm_allocator::AllocPolicy::ExactMatch(state.base_ipa),
141+
)
142+
.map_err(|e| PVTimeError::AllocationFailed(e.to_string()))?;
143+
Ok(Self::from_base(
144+
GuestAddress(state.base_ipa),
145+
constructor_args.vcpu_count,
146+
))
147+
}
148+
}

Diff for: src/vmm/src/builder.rs

+33-9
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@ use std::sync::{Arc, Mutex};
1111
use event_manager::{MutEventSubscriber, SubscriberOps};
1212
use libc::EFD_NONBLOCK;
1313
use linux_loader::cmdline::Cmdline as LoaderKernelCmdline;
14-
use log::{info, warn};
14+
use log::warn;
1515
use userfaultfd::Uffd;
1616
use utils::time::TimestampUs;
1717
#[cfg(target_arch = "aarch64")]
1818
use vm_superio::Rtc;
1919
use vm_superio::Serial;
2020
use vmm_sys_util::eventfd::EventFd;
2121

22-
use crate::arch::aarch64::pvtime::PVTimeError;
22+
use crate::arch::aarch64::pvtime::{PVTime, PVTimeConstructorArgs, PVTimeError};
2323
use crate::arch::{ConfigurationError, configure_system_for_boot, load_kernel};
2424
#[cfg(target_arch = "aarch64")]
2525
use crate::construct_kvm_mpidrs;
@@ -195,6 +195,7 @@ fn create_vmm_and_vcpus(
195195
#[cfg(target_arch = "x86_64")]
196196
pio_device_manager,
197197
acpi_device_manager,
198+
pv_time: None,
198199
};
199200

200201
Ok((vmm, vcpus))
@@ -420,6 +421,8 @@ pub enum BuildMicrovmFromSnapshotError {
420421
ACPIDeviManager(#[from] ACPIDeviceManagerRestoreError),
421422
/// VMGenID update failed: {0}
422423
VMGenIDUpdate(std::io::Error),
424+
/// Failed to restore PVTime device: {0}
425+
RestorePVTime(#[from] PVTimeError),
423426
}
424427

425428
/// Builds and starts a microVM based on the provided MicrovmState.
@@ -470,6 +473,21 @@ pub fn build_microvm_from_snapshot(
470473
.map_err(BuildMicrovmFromSnapshotError::RestoreVcpus)?;
471474
}
472475

476+
// Restore the PVTime device
477+
let pvtime_state = microvm_state.pvtime_state;
478+
if let Some(pvtime_state) = pvtime_state {
479+
let pvtime_ctor_args = PVTimeConstructorArgs {
480+
resource_allocator: &mut vmm.resource_allocator,
481+
vcpu_count: vcpus.len() as u8,
482+
};
483+
vmm.pv_time = Some(
484+
PVTime::restore(pvtime_ctor_args, &pvtime_state)
485+
.map_err(BuildMicrovmFromSnapshotError::RestorePVTime)?,
486+
);
487+
setup_pv_time_with(vmm.pv_time.as_ref().unwrap(), &mut vcpus) // We can force unwrap here b/c we just set it
488+
.map_err(BuildMicrovmFromSnapshotError::RestorePVTime)?;
489+
}
490+
473491
#[cfg(target_arch = "aarch64")]
474492
{
475493
let mpidrs = construct_kvm_mpidrs(&microvm_state.vcpu_states);
@@ -569,7 +587,7 @@ pub fn setup_serial_device(
569587
#[cfg(target_arch = "aarch64")]
570588
fn setup_pv_time(vmm: &mut Vmm, vcpus: &mut Vec<Vcpu>) -> Result<(), StartMicrovmError> {
571589
use crate::arch::aarch64::pvtime::PVTime;
572-
590+
573591
// Check if pvtime is enabled
574592
let pvtime_device_attr = kvm_bindings::kvm_device_attr {
575593
group: kvm_bindings::KVM_ARM_VCPU_PVTIME_CTRL,
@@ -590,15 +608,21 @@ fn setup_pv_time(vmm: &mut Vmm, vcpus: &mut Vec<Vcpu>) -> Result<(), StartMicrov
590608
let pv_time = PVTime::new(&mut vmm.resource_allocator, vcpus.len() as u8)
591609
.map_err(StartMicrovmError::CreatePVTime)?;
592610

611+
// Register all vcpus with pvtime device
612+
setup_pv_time_with(&pv_time, vcpus).map_err(StartMicrovmError::CreatePVTime)?;
613+
614+
// TODO: Might be a better design to return the device instead of assigning it to vmm here.
615+
vmm.pv_time = Some(pv_time);
616+
Ok(())
617+
}
618+
619+
/// Helper method to register all vcpus with pvtime device
620+
#[cfg(target_arch = "aarch64")]
621+
fn setup_pv_time_with(pv_time: &PVTime, vcpus: &mut Vec<Vcpu>) -> Result<(), PVTimeError> {
593622
// Register the vcpu with the pvtime device to map its steal time region
594623
for i in 0..vcpus.len() {
595-
pv_time
596-
.register_vcpu(i as u8, &vcpus[i].kvm_vcpu.fd)
597-
.map_err(StartMicrovmError::CreatePVTime)?; // TODO: Change this to its own error
624+
pv_time.register_vcpu(i as u8, &vcpus[i].kvm_vcpu.fd)?; // TODO: Change this to its own error
598625
}
599-
600-
// TODO: Store pv_time somewhere (in Vmm ?) for snapshotting later instead of dropping it
601-
602626
Ok(())
603627
}
604628

Diff for: src/vmm/src/lib.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ use crate::vstate::memory::{
157157
use crate::vstate::vcpu::VcpuState;
158158
pub use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuEvent, VcpuHandle, VcpuResponse};
159159
pub use crate::vstate::vm::Vm;
160-
160+
use crate::arch::aarch64::pvtime::PVTime;
161161
/// Shorthand type for the EventManager flavour used by Firecracker.
162162
pub type EventManager = BaseEventManager<Arc<Mutex<dyn MutEventSubscriber>>>;
163163

@@ -324,6 +324,9 @@ pub struct Vmm {
324324
#[cfg(target_arch = "x86_64")]
325325
pio_device_manager: PortIODeviceManager,
326326
acpi_device_manager: ACPIDeviceManager,
327+
#[cfg(target_arch = "aarch64")]
328+
pv_time: Option<PVTime>,
329+
327330
}
328331

329332
impl Vmm {
@@ -523,6 +526,7 @@ impl Vmm {
523526

524527
let memory_state = self.guest_memory.describe();
525528
let acpi_dev_state = self.acpi_device_manager.save();
529+
let pvtime_state = self.pv_time.as_ref().map(|pvtime| pvtime.save());
526530

527531
Ok(MicrovmState {
528532
vm_info: vm_info.clone(),
@@ -532,6 +536,7 @@ impl Vmm {
532536
vcpu_states,
533537
device_states,
534538
acpi_dev_state,
539+
pvtime_state,
535540
})
536541
}
537542

Diff for: src/vmm/src/persist.rs

+4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use serde::{Deserialize, Serialize};
1616
use userfaultfd::{FeatureFlags, Uffd, UffdBuilder};
1717
use vmm_sys_util::sock_ctrl_msg::ScmSocket;
1818

19+
use crate::arch::aarch64::pvtime::PVTimeState;
1920
#[cfg(target_arch = "aarch64")]
2021
use crate::arch::aarch64::vcpu::get_manufacturer_id_from_host;
2122
use crate::builder::{self, BuildMicrovmFromSnapshotError};
@@ -88,6 +89,9 @@ pub struct MicrovmState {
8889
pub device_states: DeviceStates,
8990
/// ACPI devices state.
9091
pub acpi_dev_state: ACPIDeviceManagerState,
92+
/// PVTime device state (optional for platforms that support it).
93+
#[cfg(target_arch = "aarch64")]
94+
pub pvtime_state: Option<PVTimeState>,
9195
}
9296

9397
/// This describes the mapping between Firecracker base virtual address and

0 commit comments

Comments
 (0)