11// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22// SPDX-License-Identifier: Apache-2.0
33
4+ use std:: ffi:: CStr ;
45use std:: fmt:: Debug ;
56use std:: fs:: File ;
67use std:: os:: fd:: RawFd ;
8+ use std:: os:: unix:: fs:: FileTypeExt ;
79use std:: os:: unix:: io:: AsRawFd ;
810
911use vm_memory:: GuestMemoryError ;
@@ -21,6 +23,8 @@ use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryExtension, Gue
2123pub enum AsyncIoError {
2224 /// Not implemented
2325 NotImplemented ,
26+ /// Discard is not supported with this async backend on the host kernel.
27+ DiscardUnsupported ,
2428 /// IO: {0}
2529 IO ( std:: io:: Error ) ,
2630 /// IoUring: {0}
@@ -40,6 +44,13 @@ pub struct AsyncFileEngine {
4044 file : File ,
4145 ring : IoUring < WrappedRequest > ,
4246 completion_evt : EventFd ,
47+ discard_op : Option < AsyncDiscardOp > ,
48+ }
49+
50+ #[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
51+ enum AsyncDiscardOp {
52+ BlockUringCmd ,
53+ Fallocate ,
4354}
4455
4556#[ derive( Debug ) ]
@@ -70,48 +81,135 @@ impl WrappedRequest {
7081}
7182
7283impl AsyncFileEngine {
84+ const BLOCK_URING_CMD_DISCARD : u32 = 0x1200 ;
85+ const FALLOC_FL_KEEP_SIZE : u32 = 0x01 ;
86+ const FALLOC_FL_PUNCH_HOLE : u32 = 0x02 ;
87+ const MIN_BLOCK_URING_DISCARD_KERNEL : ( u32 , u32 ) = ( 6 , 12 ) ;
88+
7389 fn new_ring (
7490 file : & File ,
7591 completion_fd : RawFd ,
92+ discard_op : Option < AsyncDiscardOp > ,
7693 ) -> Result < IoUring < WrappedRequest > , IoUringError > {
77- IoUring :: new (
94+ let mut restrictions = vec ! [
95+ // Make sure we only allow operations on pre-registered fds.
96+ Restriction :: RequireFixedFds ,
97+ // Allowlist of opcodes.
98+ Restriction :: AllowOpCode ( OpCode :: Read ) ,
99+ Restriction :: AllowOpCode ( OpCode :: Write ) ,
100+ Restriction :: AllowOpCode ( OpCode :: Fsync ) ,
101+ ] ;
102+ let mut required_ops = vec ! [ OpCode :: Read , OpCode :: Write ] ;
103+ match discard_op {
104+ Some ( AsyncDiscardOp :: Fallocate ) => {
105+ restrictions. push ( Restriction :: AllowOpCode ( OpCode :: Fallocate ) ) ;
106+ required_ops. push ( OpCode :: Fallocate ) ;
107+ }
108+ Some ( AsyncDiscardOp :: BlockUringCmd ) => {
109+ restrictions. push ( Restriction :: AllowOpCode ( OpCode :: UringCmd ) ) ;
110+ required_ops. push ( OpCode :: UringCmd ) ;
111+ }
112+ None => { }
113+ }
114+
115+ IoUring :: new_with_required_ops (
78116 u32:: from ( IO_URING_NUM_ENTRIES ) ,
79117 vec ! [ file] ,
80- vec ! [
81- // Make sure we only allow operations on pre-registered fds.
82- Restriction :: RequireFixedFds ,
83- // Allowlist of opcodes.
84- Restriction :: AllowOpCode ( OpCode :: Read ) ,
85- Restriction :: AllowOpCode ( OpCode :: Write ) ,
86- Restriction :: AllowOpCode ( OpCode :: Fsync ) ,
87- ] ,
118+ restrictions,
88119 Some ( completion_fd) ,
120+ & required_ops,
89121 )
90122 }
91123
92- pub fn from_file ( file : File ) -> Result < AsyncFileEngine , AsyncIoError > {
124+ pub fn from_file ( file : File , discard : bool ) -> Result < AsyncFileEngine , AsyncIoError > {
93125 log_dev_preview_warning ( "Async file IO" , Option :: None ) ;
94126
95127 let completion_evt = EventFd :: new ( libc:: EFD_NONBLOCK ) . map_err ( AsyncIoError :: EventFd ) ?;
96- let ring =
97- Self :: new_ring ( & file, completion_evt. as_raw_fd ( ) ) . map_err ( AsyncIoError :: IoUring ) ?;
128+ let discard_op = Self :: discard_op ( & file, discard) ?;
129+ let ring = Self :: new_ring ( & file, completion_evt. as_raw_fd ( ) , discard_op)
130+ . map_err ( AsyncIoError :: IoUring ) ?;
98131
99132 Ok ( AsyncFileEngine {
100133 file,
101134 ring,
102135 completion_evt,
136+ discard_op,
103137 } )
104138 }
105139
106140 pub fn update_file ( & mut self , file : File ) -> Result < ( ) , AsyncIoError > {
107- let ring = Self :: new_ring ( & file, self . completion_evt . as_raw_fd ( ) )
141+ let discard_op = Self :: discard_op ( & file, self . discard_op . is_some ( ) ) ?;
142+ let ring = Self :: new_ring ( & file, self . completion_evt . as_raw_fd ( ) , discard_op)
108143 . map_err ( AsyncIoError :: IoUring ) ?;
109144
110- self . file = file;
111145 self . ring = ring;
146+ self . file = file;
147+ self . discard_op = discard_op;
112148 Ok ( ( ) )
113149 }
114150
151+ fn discard_op ( file : & File , discard : bool ) -> Result < Option < AsyncDiscardOp > , AsyncIoError > {
152+ if !discard {
153+ return Ok ( None ) ;
154+ }
155+
156+ if file
157+ . metadata ( )
158+ . map_err ( AsyncIoError :: IO ) ?
159+ . file_type ( )
160+ . is_block_device ( )
161+ {
162+ // BLOCK_URING_CMD_DISCARD is introduced for block devices in Linux 6.12.
163+ // IORING_OP_URING_CMD probing alone is not enough because older kernels can
164+ // support uring commands for other file operations.
165+ if !Self :: host_kernel_at_least ( Self :: MIN_BLOCK_URING_DISCARD_KERNEL )
166+ . map_err ( AsyncIoError :: IO ) ?
167+ {
168+ return Err ( AsyncIoError :: DiscardUnsupported ) ;
169+ }
170+ Ok ( Some ( AsyncDiscardOp :: BlockUringCmd ) )
171+ } else {
172+ Ok ( Some ( AsyncDiscardOp :: Fallocate ) )
173+ }
174+ }
175+
176+ fn host_kernel_at_least ( ( major, minor) : ( u32 , u32 ) ) -> Result < bool , std:: io:: Error > {
177+ // SAFETY: An all-zeroed value for `libc::utsname` is valid.
178+ let mut name: libc:: utsname = unsafe { std:: mem:: zeroed ( ) } ;
179+ // SAFETY: The passed arg is a valid mutable reference of `libc::utsname`.
180+ let ret = unsafe { libc:: uname ( & mut name) } ;
181+ if ret != 0 {
182+ return Err ( std:: io:: Error :: last_os_error ( ) ) ;
183+ }
184+
185+ // SAFETY: The fields of `libc::utsname` are terminated by a null byte.
186+ let release = unsafe { CStr :: from_ptr ( name. release . as_ptr ( ) ) }
187+ . to_string_lossy ( )
188+ . into_owned ( ) ;
189+ Self :: kernel_release_at_least ( & release, ( major, minor) ) . ok_or_else ( || {
190+ std:: io:: Error :: new ( std:: io:: ErrorKind :: InvalidData , "invalid kernel release" )
191+ } )
192+ }
193+
194+ fn parse_kernel_release ( release : & str ) -> Option < ( u32 , u32 ) > {
195+ let mut parts = release
196+ . split ( |ch : char | !ch. is_ascii_digit ( ) && ch != '.' )
197+ . next ( )
198+ . unwrap_or ( "" )
199+ . split ( '.' ) ;
200+
201+ let host_major = parts. next ( ) ?. parse :: < u32 > ( ) . ok ( ) ?;
202+ let host_minor = parts. next ( ) ?. parse :: < u32 > ( ) . ok ( ) ?;
203+
204+ Some ( ( host_major, host_minor) )
205+ }
206+
207+ fn kernel_release_at_least ( release : & str , ( major, minor) : ( u32 , u32 ) ) -> Option < bool > {
208+ let ( host_major, host_minor) = Self :: parse_kernel_release ( release) ?;
209+
210+ Some ( host_major > major || ( host_major == major && host_minor >= minor) )
211+ }
212+
115213 #[ cfg( test) ]
116214 pub fn file ( & self ) -> & File {
117215 & self . file
@@ -200,8 +298,42 @@ impl AsyncFileEngine {
200298 } )
201299 }
202300
203- pub fn discard ( & mut self , _range : ( u64 , u64 ) ) -> Result < u32 , AsyncIoError > {
204- Err ( AsyncIoError :: NotImplemented )
301+ pub fn push_discard (
302+ & mut self ,
303+ range : ( u64 , u64 ) ,
304+ req : PendingRequest ,
305+ ) -> Result < ( ) , RequestError < AsyncIoError > > {
306+ let wrapped_user_data = WrappedRequest :: new ( req) ;
307+ let ( offset, len) = range;
308+ let operation = match self . discard_op {
309+ Some ( AsyncDiscardOp :: Fallocate ) => Operation :: fallocate (
310+ 0 ,
311+ Self :: FALLOC_FL_KEEP_SIZE | Self :: FALLOC_FL_PUNCH_HOLE ,
312+ offset,
313+ len,
314+ wrapped_user_data,
315+ ) ,
316+ Some ( AsyncDiscardOp :: BlockUringCmd ) => Operation :: block_discard (
317+ 0 ,
318+ Self :: BLOCK_URING_CMD_DISCARD ,
319+ offset,
320+ len,
321+ wrapped_user_data,
322+ ) ,
323+ None => {
324+ return Err ( RequestError {
325+ req : wrapped_user_data. req ,
326+ error : AsyncIoError :: NotImplemented ,
327+ } ) ;
328+ }
329+ } ;
330+
331+ self . ring
332+ . push ( operation)
333+ . map_err ( |( io_uring_error, data) | RequestError {
334+ req : data. req ,
335+ error : AsyncIoError :: IoUring ( io_uring_error) ,
336+ } )
205337 }
206338
207339 pub fn kick_submission_queue ( & mut self ) -> Result < ( ) , AsyncIoError > {
@@ -254,3 +386,42 @@ impl AsyncFileEngine {
254386 Ok ( cqe)
255387 }
256388}
389+
390+ #[ cfg( test) ]
391+ mod tests {
392+ use vmm_sys_util:: tempfile:: TempFile ;
393+
394+ use super :: * ;
395+
396+ #[ test]
397+ fn test_kernel_release_at_least ( ) {
398+ assert_eq ! (
399+ AsyncFileEngine :: kernel_release_at_least( "6.11.0-1018-aws" , ( 6 , 12 ) ) ,
400+ Some ( false )
401+ ) ;
402+ assert_eq ! (
403+ AsyncFileEngine :: kernel_release_at_least( "6.12.0-1020-aws" , ( 6 , 12 ) ) ,
404+ Some ( true )
405+ ) ;
406+ assert_eq ! (
407+ AsyncFileEngine :: kernel_release_at_least( "6.17.0-29-generic" , ( 6 , 12 ) ) ,
408+ Some ( true )
409+ ) ;
410+ assert_eq ! (
411+ AsyncFileEngine :: kernel_release_at_least( "7.0.2-6-pve" , ( 6 , 12 ) ) ,
412+ Some ( true )
413+ ) ;
414+ assert_eq ! (
415+ AsyncFileEngine :: kernel_release_at_least( "not-a-kernel" , ( 6 , 12 ) ) ,
416+ None
417+ ) ;
418+ }
419+
420+ #[ test]
421+ fn test_discard_regular_file_uses_fallocate ( ) {
422+ let file = TempFile :: new ( ) . unwrap ( ) . into_file ( ) ;
423+ let engine = AsyncFileEngine :: from_file ( file, true ) . unwrap ( ) ;
424+
425+ assert_eq ! ( engine. discard_op, Some ( AsyncDiscardOp :: Fallocate ) ) ;
426+ }
427+ }
0 commit comments