Skip to content

Commit 13b6fa2

Browse files
committed
virtio-blk: support async discard operations
Signed-off-by: Jonas Savulionis <jonas@esnet.lt>
1 parent 44209c5 commit 13b6fa2

8 files changed

Lines changed: 351 additions & 45 deletions

File tree

docs/api_requests/block-io-engine.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ If a block device is configured with the `Async` io_engine on a host kernel
5959
older than 5.10.51, the API call will return a 400 Bad Request, with a
6060
suggestive error message.
6161

62+
When the `discard` block option is enabled with the `Async` IO engine,
63+
Firecracker uses `IORING_OP_FALLOCATE` for regular backing files. For block
64+
device backing stores, async discard requires the kernel block uring discard
65+
command (`BLOCK_URING_CMD_DISCARD`), which is available starting with Linux
66+
6.12. Hosts running Linux 7.x or newer satisfy this requirement.
67+
6268
## Performance considerations
6369

6470
The performance is strictly tied to the host kernel version. The gathered data

src/vmm/src/devices/virtio/block/virtio/device.rs

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,15 @@ impl DiskProperties {
9797
disk_image_path: String,
9898
is_disk_read_only: bool,
9999
file_engine_type: FileEngineType,
100+
discard: bool,
100101
) -> Result<Self, VirtioBlockError> {
101102
let mut disk_image = Self::open_file(&disk_image_path, is_disk_read_only)?;
102103
let disk_size = Self::file_size(&disk_image_path, &mut disk_image)?;
103104
let image_id = Self::build_disk_image_id(&disk_image);
104105

105106
Ok(Self {
106107
file_path: disk_image_path,
107-
file_engine: FileEngine::from_file(disk_image, file_engine_type)
108+
file_engine: FileEngine::from_file(disk_image, file_engine_type, discard)
108109
.map_err(VirtioBlockError::FileEngine)?,
109110
nsectors: disk_size >> SECTOR_SHIFT,
110111
image_id,
@@ -264,7 +265,7 @@ impl From<VirtioBlockConfig> for BlockDeviceConfig {
264265
cache_type: value.cache_type,
265266

266267
is_read_only: Some(value.is_read_only),
267-
discard: Some(value.discard),
268+
discard: value.discard.then_some(true),
268269
path_on_host: Some(value.path_on_host),
269270
rate_limiter: value.rate_limiter,
270271
file_engine_type: Some(value.file_engine_type),
@@ -319,14 +320,11 @@ impl VirtioBlock {
319320
///
320321
/// The given file must be seekable and sizable.
321322
pub fn new(config: VirtioBlockConfig) -> Result<VirtioBlock, VirtioBlockError> {
322-
if config.discard && config.file_engine_type == FileEngineType::Async {
323-
return Err(VirtioBlockError::DiscardAsyncUnsupported);
324-
}
325-
326323
let disk_properties = DiskProperties::new(
327324
config.path_on_host,
328325
config.is_read_only,
329326
config.file_engine_type,
327+
config.discard,
330328
)?;
331329

332330
let rate_limiter = config
@@ -813,16 +811,20 @@ mod tests {
813811
f.as_file().set_len(size).unwrap();
814812

815813
for engine in [FileEngineType::Sync, FileEngineType::Async] {
816-
let disk_properties =
817-
DiskProperties::new(String::from(f.as_path().to_str().unwrap()), true, engine)
818-
.unwrap();
814+
let disk_properties = DiskProperties::new(
815+
String::from(f.as_path().to_str().unwrap()),
816+
true,
817+
engine,
818+
false,
819+
)
820+
.unwrap();
819821

820822
assert_eq!(size, u64::from(SECTOR_SIZE) * num_sectors);
821823
assert_eq!(disk_properties.nsectors, num_sectors);
822824
// Testing `backing_file.virtio_block_disk_image_id()` implies
823825
// duplicating that logic in tests, so skipping it.
824826

825-
let res = DiskProperties::new("invalid-disk-path".to_string(), true, engine);
827+
let res = DiskProperties::new("invalid-disk-path".to_string(), true, engine, false);
826828
assert!(
827829
matches!(res, Err(VirtioBlockError::BackingFile(_, _))),
828830
"{:?}",
@@ -891,10 +893,11 @@ mod tests {
891893
rate_limiter: None,
892894
file_engine_type: FileEngineType::Async,
893895
};
894-
assert!(matches!(
895-
VirtioBlock::new(async_config),
896-
Err(VirtioBlockError::DiscardAsyncUnsupported)
897-
));
896+
let block = VirtioBlock::new(async_config).unwrap();
897+
assert_eq!(
898+
block.avail_features & (1u64 << VIRTIO_BLK_F_DISCARD),
899+
1u64 << VIRTIO_BLK_F_DISCARD
900+
);
898901
}
899902

900903
#[test]

src/vmm/src/devices/virtio/block/virtio/io/async_io.rs

Lines changed: 187 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
// SPDX-License-Identifier: Apache-2.0
33

4+
use std::ffi::CStr;
45
use std::fmt::Debug;
56
use std::fs::File;
67
use std::os::fd::RawFd;
8+
use std::os::unix::fs::FileTypeExt;
79
use std::os::unix::io::AsRawFd;
810

911
use vm_memory::GuestMemoryError;
@@ -21,6 +23,8 @@ use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryExtension, Gue
2123
pub enum AsyncIoError {
2224
/// Not implemented
2325
NotImplemented,
26+
/// Discard is not supported with this async backend on the host kernel.
27+
DiscardUnsupported,
2428
/// IO: {0}
2529
IO(std::io::Error),
2630
/// IoUring: {0}
@@ -40,6 +44,13 @@ pub struct AsyncFileEngine {
4044
file: File,
4145
ring: IoUring<WrappedRequest>,
4246
completion_evt: EventFd,
47+
discard_op: Option<AsyncDiscardOp>,
48+
}
49+
50+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
51+
enum AsyncDiscardOp {
52+
BlockUringCmd,
53+
Fallocate,
4354
}
4455

4556
#[derive(Debug)]
@@ -70,48 +81,135 @@ impl WrappedRequest {
7081
}
7182

7283
impl AsyncFileEngine {
84+
const BLOCK_URING_CMD_DISCARD: u32 = 0x1200;
85+
const FALLOC_FL_KEEP_SIZE: u32 = 0x01;
86+
const FALLOC_FL_PUNCH_HOLE: u32 = 0x02;
87+
const MIN_BLOCK_URING_DISCARD_KERNEL: (u32, u32) = (6, 12);
88+
7389
fn new_ring(
7490
file: &File,
7591
completion_fd: RawFd,
92+
discard_op: Option<AsyncDiscardOp>,
7693
) -> Result<IoUring<WrappedRequest>, IoUringError> {
77-
IoUring::new(
94+
let mut restrictions = vec![
95+
// Make sure we only allow operations on pre-registered fds.
96+
Restriction::RequireFixedFds,
97+
// Allowlist of opcodes.
98+
Restriction::AllowOpCode(OpCode::Read),
99+
Restriction::AllowOpCode(OpCode::Write),
100+
Restriction::AllowOpCode(OpCode::Fsync),
101+
];
102+
let mut required_ops = vec![OpCode::Read, OpCode::Write];
103+
match discard_op {
104+
Some(AsyncDiscardOp::Fallocate) => {
105+
restrictions.push(Restriction::AllowOpCode(OpCode::Fallocate));
106+
required_ops.push(OpCode::Fallocate);
107+
}
108+
Some(AsyncDiscardOp::BlockUringCmd) => {
109+
restrictions.push(Restriction::AllowOpCode(OpCode::UringCmd));
110+
required_ops.push(OpCode::UringCmd);
111+
}
112+
None => {}
113+
}
114+
115+
IoUring::new_with_required_ops(
78116
u32::from(IO_URING_NUM_ENTRIES),
79117
vec![file],
80-
vec![
81-
// Make sure we only allow operations on pre-registered fds.
82-
Restriction::RequireFixedFds,
83-
// Allowlist of opcodes.
84-
Restriction::AllowOpCode(OpCode::Read),
85-
Restriction::AllowOpCode(OpCode::Write),
86-
Restriction::AllowOpCode(OpCode::Fsync),
87-
],
118+
restrictions,
88119
Some(completion_fd),
120+
&required_ops,
89121
)
90122
}
91123

92-
pub fn from_file(file: File) -> Result<AsyncFileEngine, AsyncIoError> {
124+
pub fn from_file(file: File, discard: bool) -> Result<AsyncFileEngine, AsyncIoError> {
93125
log_dev_preview_warning("Async file IO", Option::None);
94126

95127
let completion_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(AsyncIoError::EventFd)?;
96-
let ring =
97-
Self::new_ring(&file, completion_evt.as_raw_fd()).map_err(AsyncIoError::IoUring)?;
128+
let discard_op = Self::discard_op(&file, discard)?;
129+
let ring = Self::new_ring(&file, completion_evt.as_raw_fd(), discard_op)
130+
.map_err(AsyncIoError::IoUring)?;
98131

99132
Ok(AsyncFileEngine {
100133
file,
101134
ring,
102135
completion_evt,
136+
discard_op,
103137
})
104138
}
105139

106140
pub fn update_file(&mut self, file: File) -> Result<(), AsyncIoError> {
107-
let ring = Self::new_ring(&file, self.completion_evt.as_raw_fd())
141+
let discard_op = Self::discard_op(&file, self.discard_op.is_some())?;
142+
let ring = Self::new_ring(&file, self.completion_evt.as_raw_fd(), discard_op)
108143
.map_err(AsyncIoError::IoUring)?;
109144

110-
self.file = file;
111145
self.ring = ring;
146+
self.file = file;
147+
self.discard_op = discard_op;
112148
Ok(())
113149
}
114150

151+
fn discard_op(file: &File, discard: bool) -> Result<Option<AsyncDiscardOp>, AsyncIoError> {
152+
if !discard {
153+
return Ok(None);
154+
}
155+
156+
if file
157+
.metadata()
158+
.map_err(AsyncIoError::IO)?
159+
.file_type()
160+
.is_block_device()
161+
{
162+
// BLOCK_URING_CMD_DISCARD is introduced for block devices in Linux 6.12.
163+
// IORING_OP_URING_CMD probing alone is not enough because older kernels can
164+
// support uring commands for other file operations.
165+
if !Self::host_kernel_at_least(Self::MIN_BLOCK_URING_DISCARD_KERNEL)
166+
.map_err(AsyncIoError::IO)?
167+
{
168+
return Err(AsyncIoError::DiscardUnsupported);
169+
}
170+
Ok(Some(AsyncDiscardOp::BlockUringCmd))
171+
} else {
172+
Ok(Some(AsyncDiscardOp::Fallocate))
173+
}
174+
}
175+
176+
fn host_kernel_at_least((major, minor): (u32, u32)) -> Result<bool, std::io::Error> {
177+
// SAFETY: An all-zeroed value for `libc::utsname` is valid.
178+
let mut name: libc::utsname = unsafe { std::mem::zeroed() };
179+
// SAFETY: The passed arg is a valid mutable reference of `libc::utsname`.
180+
let ret = unsafe { libc::uname(&mut name) };
181+
if ret != 0 {
182+
return Err(std::io::Error::last_os_error());
183+
}
184+
185+
// SAFETY: The fields of `libc::utsname` are terminated by a null byte.
186+
let release = unsafe { CStr::from_ptr(name.release.as_ptr()) }
187+
.to_string_lossy()
188+
.into_owned();
189+
Self::kernel_release_at_least(&release, (major, minor)).ok_or_else(|| {
190+
std::io::Error::new(std::io::ErrorKind::InvalidData, "invalid kernel release")
191+
})
192+
}
193+
194+
fn parse_kernel_release(release: &str) -> Option<(u32, u32)> {
195+
let mut parts = release
196+
.split(|ch: char| !ch.is_ascii_digit() && ch != '.')
197+
.next()
198+
.unwrap_or("")
199+
.split('.');
200+
201+
let host_major = parts.next()?.parse::<u32>().ok()?;
202+
let host_minor = parts.next()?.parse::<u32>().ok()?;
203+
204+
Some((host_major, host_minor))
205+
}
206+
207+
fn kernel_release_at_least(release: &str, (major, minor): (u32, u32)) -> Option<bool> {
208+
let (host_major, host_minor) = Self::parse_kernel_release(release)?;
209+
210+
Some(host_major > major || (host_major == major && host_minor >= minor))
211+
}
212+
115213
#[cfg(test)]
116214
pub fn file(&self) -> &File {
117215
&self.file
@@ -200,8 +298,42 @@ impl AsyncFileEngine {
200298
})
201299
}
202300

203-
pub fn discard(&mut self, _range: (u64, u64)) -> Result<u32, AsyncIoError> {
204-
Err(AsyncIoError::NotImplemented)
301+
pub fn push_discard(
302+
&mut self,
303+
range: (u64, u64),
304+
req: PendingRequest,
305+
) -> Result<(), RequestError<AsyncIoError>> {
306+
let wrapped_user_data = WrappedRequest::new(req);
307+
let (offset, len) = range;
308+
let operation = match self.discard_op {
309+
Some(AsyncDiscardOp::Fallocate) => Operation::fallocate(
310+
0,
311+
Self::FALLOC_FL_KEEP_SIZE | Self::FALLOC_FL_PUNCH_HOLE,
312+
offset,
313+
len,
314+
wrapped_user_data,
315+
),
316+
Some(AsyncDiscardOp::BlockUringCmd) => Operation::block_discard(
317+
0,
318+
Self::BLOCK_URING_CMD_DISCARD,
319+
offset,
320+
len,
321+
wrapped_user_data,
322+
),
323+
None => {
324+
return Err(RequestError {
325+
req: wrapped_user_data.req,
326+
error: AsyncIoError::NotImplemented,
327+
});
328+
}
329+
};
330+
331+
self.ring
332+
.push(operation)
333+
.map_err(|(io_uring_error, data)| RequestError {
334+
req: data.req,
335+
error: AsyncIoError::IoUring(io_uring_error),
336+
})
205337
}
206338

207339
pub fn kick_submission_queue(&mut self) -> Result<(), AsyncIoError> {
@@ -254,3 +386,42 @@ impl AsyncFileEngine {
254386
Ok(cqe)
255387
}
256388
}
389+
390+
#[cfg(test)]
391+
mod tests {
392+
use vmm_sys_util::tempfile::TempFile;
393+
394+
use super::*;
395+
396+
#[test]
397+
fn test_kernel_release_at_least() {
398+
assert_eq!(
399+
AsyncFileEngine::kernel_release_at_least("6.11.0-1018-aws", (6, 12)),
400+
Some(false)
401+
);
402+
assert_eq!(
403+
AsyncFileEngine::kernel_release_at_least("6.12.0-1020-aws", (6, 12)),
404+
Some(true)
405+
);
406+
assert_eq!(
407+
AsyncFileEngine::kernel_release_at_least("6.17.0-29-generic", (6, 12)),
408+
Some(true)
409+
);
410+
assert_eq!(
411+
AsyncFileEngine::kernel_release_at_least("7.0.2-6-pve", (6, 12)),
412+
Some(true)
413+
);
414+
assert_eq!(
415+
AsyncFileEngine::kernel_release_at_least("not-a-kernel", (6, 12)),
416+
None
417+
);
418+
}
419+
420+
#[test]
421+
fn test_discard_regular_file_uses_fallocate() {
422+
let file = TempFile::new().unwrap().into_file();
423+
let engine = AsyncFileEngine::from_file(file, true).unwrap();
424+
425+
assert_eq!(engine.discard_op, Some(AsyncDiscardOp::Fallocate));
426+
}
427+
}

0 commit comments

Comments
 (0)