Skip to content

Commit cd7b1df

Browse files
committed
virtio-blk: support async discard operations
Add async discard execution for virtio-blk drives when the discard feature is enabled. Regular file backends use IORING_OP_FALLOCATE with hole punching. Block-device backends use BLOCK_URING_CMD_DISCARD through IORING_OP_URING_CMD when the host kernel supports it. Require async fsync support during io_uring initialization, document the async discard support matrix, and return VIRTIO_BLK_S_UNSUPP for guest DISCARD requests when the discard feature was not negotiated. Signed-off-by: Jonas Savulionis <jonas@esnet.lt>
1 parent 44209c5 commit cd7b1df

11 files changed

Lines changed: 431 additions & 58 deletions

File tree

docs/api_requests/block-discard.md

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,20 @@ Discard is configured per virtio-block device through the `discard` field in the
99

1010
## Supported configuration
1111

12-
Discard is currently supported only for writable virtio-block devices using the
13-
`Sync` IO engine. It is not supported for:
12+
Discard is currently supported only for writable virtio-block devices. It is not
13+
supported for:
1414

1515
- read-only drives;
16-
- `Async` IO engine drives;
1716
- vhost-user block devices.
1817

1918
For regular backing files, Firecracker uses hole punching. For block-device
20-
backends, Firecracker uses `BLKDISCARD`.
19+
backends, Firecracker uses `BLKDISCARD` with the `Sync` IO engine and
20+
`BLOCK_URING_CMD_DISCARD` with the `Async` IO engine.
21+
22+
When discard is enabled with the `Async` IO engine, regular backing files
23+
require host support for `IORING_OP_FALLOCATE`. Block-device backing stores
24+
additionally require host support for `BLOCK_URING_CMD_DISCARD`, which is
25+
available starting with Linux 6.12.
2126

2227
## Example configuration
2328

@@ -31,7 +36,7 @@ curl --unix-socket ${socket} -i \
3136
\"path_on_host\": \"${drive_path}\",
3237
\"is_root_device\": true,
3338
\"is_read_only\": false,
34-
\"io_engine\": \"Sync\",
39+
\"io_engine\": \"Async\",
3540
\"discard\": true
3641
}"
3742
```

docs/api_requests/block-io-engine.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ If a block device is configured with the `Async` io_engine on a host kernel
5959
older than 5.10.51, the API call will return a 400 Bad Request, with a
6060
suggestive error message.
6161

62+
When the `discard` block option is enabled with the `Async` IO engine,
63+
Firecracker uses `IORING_OP_FALLOCATE` for regular backing files. For block
64+
device backing stores, async discard requires the kernel block uring discard
65+
command (`BLOCK_URING_CMD_DISCARD`), which is available starting with Linux
66+
6.12.
67+
6268
## Performance considerations
6369

6470
The performance is strictly tied to the host kernel version. The gathered data

src/firecracker/swagger/firecracker.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1243,7 +1243,9 @@ definitions:
12431243
description:
12441244
Enables virtio-blk discard support. When enabled, the guest can issue
12451245
discard/TRIM requests for this drive. Only supported for writable
1246-
virtio-block devices using the Sync IO engine.
1246+
virtio-block devices. Async IO engine block-device backends require
1247+
host support for BLOCK_URING_CMD_DISCARD, available starting with
1248+
Linux 6.12.
12471249
This field is optional for virtio-block config and should be omitted for vhost-user-block configuration.
12481250
default: false
12491251
path_on_host:

src/vmm/src/devices/virtio/block/virtio/device.rs

Lines changed: 68 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,15 @@ impl DiskProperties {
9797
disk_image_path: String,
9898
is_disk_read_only: bool,
9999
file_engine_type: FileEngineType,
100+
discard: bool,
100101
) -> Result<Self, VirtioBlockError> {
101102
let mut disk_image = Self::open_file(&disk_image_path, is_disk_read_only)?;
102103
let disk_size = Self::file_size(&disk_image_path, &mut disk_image)?;
103104
let image_id = Self::build_disk_image_id(&disk_image);
104105

105106
Ok(Self {
106107
file_path: disk_image_path,
107-
file_engine: FileEngine::from_file(disk_image, file_engine_type)
108+
file_engine: FileEngine::from_file(disk_image, file_engine_type, discard)
108109
.map_err(VirtioBlockError::FileEngine)?,
109110
nsectors: disk_size >> SECTOR_SHIFT,
110111
image_id,
@@ -319,14 +320,11 @@ impl VirtioBlock {
319320
///
320321
/// The given file must be seekable and sizable.
321322
pub fn new(config: VirtioBlockConfig) -> Result<VirtioBlock, VirtioBlockError> {
322-
if config.discard && config.file_engine_type == FileEngineType::Async {
323-
return Err(VirtioBlockError::DiscardAsyncUnsupported);
324-
}
325-
326323
let disk_properties = DiskProperties::new(
327324
config.path_on_host,
328325
config.is_read_only,
329326
config.file_engine_type,
327+
config.discard,
330328
)?;
331329

332330
let rate_limiter = config
@@ -451,6 +449,7 @@ impl VirtioBlock {
451449
head.index,
452450
&active_state.mem,
453451
&self.metrics,
452+
self.acked_features & (1u64 << VIRTIO_BLK_F_DISCARD) != 0,
454453
)
455454
}
456455
Err(err) => {
@@ -745,9 +744,9 @@ mod tests {
745744
use crate::check_metric_after_block;
746745
use crate::devices::virtio::block::virtio::IO_URING_NUM_ENTRIES;
747746
use crate::devices::virtio::block::virtio::test_utils::{
748-
default_block, read_blk_req_descriptors, set_queue, set_rate_limiter,
749-
simulate_async_completion_event, simulate_queue_and_async_completion_events,
750-
simulate_queue_event,
747+
RequestDescriptorChain, default_block, read_blk_req_descriptors, set_queue,
748+
set_rate_limiter, simulate_async_completion_event,
749+
simulate_queue_and_async_completion_events, simulate_queue_event,
751750
};
752751
use crate::devices::virtio::queue::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE};
753752
use crate::devices::virtio::test_utils::{VirtQueue, default_interrupt, default_mem};
@@ -813,16 +812,20 @@ mod tests {
813812
f.as_file().set_len(size).unwrap();
814813

815814
for engine in [FileEngineType::Sync, FileEngineType::Async] {
816-
let disk_properties =
817-
DiskProperties::new(String::from(f.as_path().to_str().unwrap()), true, engine)
818-
.unwrap();
815+
let disk_properties = DiskProperties::new(
816+
String::from(f.as_path().to_str().unwrap()),
817+
true,
818+
engine,
819+
false,
820+
)
821+
.unwrap();
819822

820823
assert_eq!(size, u64::from(SECTOR_SIZE) * num_sectors);
821824
assert_eq!(disk_properties.nsectors, num_sectors);
822825
// Testing `backing_file.virtio_block_disk_image_id()` implies
823826
// duplicating that logic in tests, so skipping it.
824827

825-
let res = DiskProperties::new("invalid-disk-path".to_string(), true, engine);
828+
let res = DiskProperties::new("invalid-disk-path".to_string(), true, engine, false);
826829
assert!(
827830
matches!(res, Err(VirtioBlockError::BackingFile(_, _))),
828831
"{:?}",
@@ -891,10 +894,59 @@ mod tests {
891894
rate_limiter: None,
892895
file_engine_type: FileEngineType::Async,
893896
};
894-
assert!(matches!(
895-
VirtioBlock::new(async_config),
896-
Err(VirtioBlockError::DiscardAsyncUnsupported)
897-
));
897+
let block = VirtioBlock::new(async_config).unwrap();
898+
assert_eq!(
899+
block.avail_features & (1u64 << VIRTIO_BLK_F_DISCARD),
900+
1u64 << VIRTIO_BLK_F_DISCARD
901+
);
902+
}
903+
904+
#[test]
905+
fn test_discard_requires_negotiated_feature() {
906+
for engine in [FileEngineType::Sync, FileEngineType::Async] {
907+
let f = TempFile::new().unwrap();
908+
f.as_file().set_len(0x1000).unwrap();
909+
let config = VirtioBlockConfig {
910+
drive_id: "test".to_string(),
911+
path_on_host: f.as_path().to_str().unwrap().to_string(),
912+
is_root_device: false,
913+
partuuid: None,
914+
is_read_only: false,
915+
discard: true,
916+
cache_type: CacheType::Unsafe,
917+
rate_limiter: None,
918+
file_engine_type: engine,
919+
};
920+
let mut block = VirtioBlock::new(config).unwrap();
921+
let mem = default_mem();
922+
let interrupt = default_interrupt();
923+
let vq = VirtQueue::new(GuestAddress(0), &mem, 16);
924+
set_queue(&mut block, 0, vq.create_queue());
925+
block.activate(mem.clone(), interrupt).unwrap();
926+
927+
let chain = RequestDescriptorChain::new(&vq);
928+
chain.set_header(RequestHeader::new(VIRTIO_BLK_T_DISCARD, 0));
929+
chain.data_desc.flags.set(VIRTQ_DESC_F_NEXT);
930+
chain.data_desc.len.set(16);
931+
932+
let mut discard_range = [0u8; 16];
933+
discard_range[8..12].copy_from_slice(&1_u32.to_le_bytes());
934+
mem.write_slice(&discard_range, GuestAddress(chain.data_desc.addr.get()))
935+
.unwrap();
936+
937+
simulate_queue_event(&mut block, Some(true));
938+
939+
assert_eq!(vq.used.idx.get(), 1);
940+
assert_eq!(vq.used.ring[0].get().id, 0);
941+
assert_eq!(vq.used.ring[0].get().len, 1);
942+
assert_eq!(
943+
u32::from(
944+
mem.read_obj::<u8>(GuestAddress(chain.status_desc.addr.get()))
945+
.unwrap()
946+
),
947+
VIRTIO_BLK_S_UNSUPP
948+
);
949+
}
898950
}
899951

900952
#[test]

0 commit comments

Comments
 (0)