diff --git a/HOWTO.rst b/HOWTO.rst index 55ebc388cc..e627448a3f 100644 --- a/HOWTO.rst +++ b/HOWTO.rst @@ -4005,6 +4005,33 @@ Verification used to speed up the process of writing each block on a device with its offset. Default: 0 (disabled). +.. option:: verify_type=str + + Controls which write operations are included during the verification + phase. This option only affects offline verification when using + :option:`verify_state_save` to save completion state and later verify + with a separate job. The allowed values are: + + **flush** + Only verify writes that completed at or before the last + fsync operation. This mode filters out writes that + completed after the last fsync, which may not be + persistent on storage. Writes with the Force Unit + Access (FUA) flag are always included regardless of + fsync timing, as they bypass the cache and are + immediately persistent. This is useful for testing data + persistence guarantees across power failures or system + crashes. fio tracks fsync completion times and write + completion times during the write phase. During + verification, only writes that meet the fsync timing + criteria are verified. This allows testing scenarios + where only data that was properly synced before a + simulated failure should be verified. + This option requires :option:`verify_state_save` to be + enabled and is only effective during offline + verification (separate verify job). Default: none + (verify all completed writes). + .. option:: verify_fatal=bool Normally fio will keep checking the entire contents before quitting on a diff --git a/backend.c b/backend.c index 0cdee86465..86efbaf09f 100644 --- a/backend.c +++ b/backend.c @@ -1259,7 +1259,7 @@ static int init_file_completion_logging(struct thread_data *td, for_each_file(td, f, i) { f->last_write_comp = scalloc(td->last_write_comp_depth, - sizeof(uint64_t)); + sizeof(struct fio_write_comp)); if (!f->last_write_comp) goto cleanup; } diff --git a/cconv.c b/cconv.c index 4e72ae16a9..aa041c52a1 100644 --- a/cconv.c +++ b/cconv.c @@ -178,6 +178,7 @@ int convert_thread_options_to_cpu(struct thread_options *o, o->sync_io = le32_to_cpu(top->sync_io); o->write_hint = le32_to_cpu(top->write_hint); o->verify = le32_to_cpu(top->verify); + o->verify_type = le32_to_cpu(top->verify_type); o->do_verify = le32_to_cpu(top->do_verify); o->experimental_verify = le32_to_cpu(top->experimental_verify); o->verify_state = le32_to_cpu(top->verify_state); @@ -443,6 +444,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->sync_io = cpu_to_le32(o->sync_io); top->write_hint = cpu_to_le32(o->write_hint); top->verify = cpu_to_le32(o->verify); + top->verify_type = cpu_to_le32(o->verify_type); top->do_verify = cpu_to_le32(o->do_verify); top->experimental_verify = cpu_to_le32(o->experimental_verify); top->verify_state = cpu_to_le32(o->verify_state); diff --git a/engines/io_uring.c b/engines/io_uring.c index 5bbcc97ab9..985c7b6794 100644 --- a/engines/io_uring.c +++ b/engines/io_uring.c @@ -562,6 +562,10 @@ static int fio_ioring_cmd_prep(struct thread_data *td, struct io_u *io_u) io_u_set(td, io_u, IO_U_F_VER_IN_DEV); } + /* Mark FUA writes for verification state tracking */ + if (io_u->ddir == DDIR_WRITE && o->writefua) + io_u_set(td, io_u, IO_U_F_FUA); + return fio_nvme_uring_cmd_prep(cmd, io_u, o->nonvectored ? NULL : &ld->iovecs[io_u->index], dsm, read_opcode, ld->write_opcode, diff --git a/engines/sg.c b/engines/sg.c index 9df70bd28b..86f3104ec9 100644 --- a/engines/sg.c +++ b/engines/sg.c @@ -668,6 +668,10 @@ static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u) fio_sgio_rw_lba(hdr, lba, nr_blocks, o->write_mode == FIO_SG_WRITE_SAME_NDOB); + /* Mark FUA writes for verification state tracking */ + if (o->writefua) + io_u_set(td, io_u, IO_U_F_FUA); + } else if (io_u->ddir == DDIR_TRIM) { struct sgio_trim *st; diff --git a/file.h b/file.h index e38ed2f123..c946844d4e 100644 --- a/file.h +++ b/file.h @@ -37,6 +37,9 @@ enum fio_file_flags { FIO_FILE_smalloc = 1 << 9, /* smalloc file/file_name */ }; +/* Flags for fio_write_comp.flags */ +#define FIO_WRITE_COMP_FUA 0x1 /* Write had Force Unit Access flag */ + enum file_lock_mode { FILE_LOCK_NONE, FILE_LOCK_EXCLUSIVE, @@ -126,8 +129,15 @@ struct fio_file { * Tracks the last iodepth number of completed writes, if data * verification is enabled */ - uint64_t *last_write_comp; + struct fio_write_comp { + uint64_t offset; + uint64_t completion_time_nsec; + uint32_t flags; /* I/O flags including FUA */ + uint32_t flush_count; /* FLUSH count at completion time */ + } *last_write_comp; unsigned int last_write_idx; + uint64_t last_flush_time_nsec; /* Last FLUSH completion timestamp */ + unsigned int flush_count; /* Count of completed FLUSH operations */ /* * For use by the io engine to store offset diff --git a/fio.1 b/fio.1 index 5bcb1d46a7..239a8c3675 100644 --- a/fio.1 +++ b/fio.1 @@ -3734,6 +3734,34 @@ Recreate an instance of the \fBverify_pattern\fR every up the process of writing each block on a device with its offset. Default: 0 (disabled). .TP +.BI verify_type \fR=\fPstr +Controls which write operations are included during the verification phase. +This option only affects offline verification when using \fBverify_state_save\fR +to save completion state and later verify with a separate job. The allowed +values are: +.RS +.RS +.TP +.B flush +Only verify writes that completed at or before the last fsync operation. +This mode filters out writes that completed after the last fsync, which may +not be persistent on storage. Writes with the Force Unit Access (FUA) flag +are always included regardless of fsync timing, as they bypass the cache and +are immediately persistent. This is useful for testing data persistence +guarantees across power failures or system crashes. +.RE +.P +When \fBverify_type=flush\fR is used, fio tracks fsync completion times and +write completion times during the write phase. During verification, only +writes that meet the fsync timing criteria are verified. This allows testing +scenarios where only data that was properly synced before a simulated +failure should be verified. +.P +This option requires \fBverify_state_save\fR to be enabled and is only +effective during offline verification (separate verify job). Default: none +(verify all completed writes). +.RE +.TP .BI verify_fatal \fR=\fPbool Normally fio will keep checking the entire contents before quitting on a block verification failure. If this option is set, fio will exit the job on diff --git a/io_u.c b/io_u.c index ca97f38881..e452fc3525 100644 --- a/io_u.c +++ b/io_u.c @@ -2063,7 +2063,7 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u, } static void file_log_write_comp(const struct thread_data *td, struct fio_file *f, - uint64_t offset, unsigned int bytes) + uint64_t offset, unsigned int bytes, struct io_u *io_u) { int idx; @@ -2079,11 +2079,31 @@ static void file_log_write_comp(const struct thread_data *td, struct fio_file *f return; idx = f->last_write_idx++; - f->last_write_comp[idx] = offset; + f->last_write_comp[idx].offset = offset; + f->last_write_comp[idx].completion_time_nsec = ntime_since_now(&io_u->start_time); + f->last_write_comp[idx].flags = 0; + f->last_write_comp[idx].flush_count = f->flush_count; + + /* Check if this is a FUA write */ + if (io_u && (io_u->flags & IO_U_F_FUA)) + f->last_write_comp[idx].flags |= FIO_WRITE_COMP_FUA; + if (f->last_write_idx == td->last_write_comp_depth) f->last_write_idx = 0; } +static void file_log_flush_comp(struct fio_file *f, struct io_u *io_u) +{ + if (!f) + return; + + /* Track the last FLUSH completion timestamp */ + f->last_flush_time_nsec = ntime_since_now(&io_u->start_time); + + /* Increment FLUSH counter */ + f->flush_count++; +} + static bool should_account(struct thread_data *td) { return ramp_time_over(td) && (td->runstate == TD_RUNNING || @@ -2125,7 +2145,10 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr, if (ddir_sync(ddir)) { if (io_u->error) goto error; + + /* Log flush completion */ if (f) { + file_log_flush_comp(f, io_u); f->first_write = -1ULL; f->last_write = -1ULL; } @@ -2164,7 +2187,7 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr, } if (ddir == DDIR_WRITE) - file_log_write_comp(td, f, io_u->offset, bytes); + file_log_write_comp(td, f, io_u->offset, bytes, io_u); if (should_account(td)) account_io_completion(td, io_u, icd, ddir, bytes); @@ -2464,6 +2487,10 @@ int do_io_u_sync(const struct thread_data *td, struct io_u *io_u) if (ret < 0) io_u->error = errno; + else { + /* Record FLUSH completion timing for verification state */ + file_log_flush_comp(io_u->file, io_u); + } return ret; } diff --git a/io_u.h b/io_u.h index 178c12293f..4f8bbf73c8 100644 --- a/io_u.h +++ b/io_u.h @@ -24,6 +24,7 @@ enum { IO_U_F_PATTERN_DONE = 1 << 8, IO_U_F_DEVICE_ERROR = 1 << 9, IO_U_F_VER_IN_DEV = 1 << 10, /* Verify data in device */ + IO_U_F_FUA = 1 << 11, /* Force Unit Access flag */ }; /* diff --git a/options.c b/options.c index 6295a616ca..17bfcbd475 100644 --- a/options.c +++ b/options.c @@ -3256,6 +3256,28 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .category = FIO_OPT_C_IO, .group = FIO_OPT_G_VERIFY, }, + { + .name = "verify_type", + .lname = "Verify type", + .type = FIO_OPT_STR, + .off1 = offsetof(struct thread_options, verify_type), + .help = "Verification filter type", + .def = "none", + .parent = "verify", + .hide = 1, + .category = FIO_OPT_C_IO, + .group = FIO_OPT_G_VERIFY, + .posval = { + { .ival = "none", + .oval = VERIFY_NONE, + .help = "No verification filtering", + }, + { .ival = "flush", + .oval = VERIFY_TYPE_FLUSH, + .help = "Verify only writes that completed before flush", + }, + }, + }, { .name = "verifysort", .lname = "Verify sort", diff --git a/t/verify-state.c b/t/verify-state.c index f8787e9a33..465889f53d 100644 --- a/t/verify-state.c +++ b/t/verify-state.c @@ -26,14 +26,16 @@ static void show_s(struct thread_io_list *s, unsigned int no_s) printf("Max completions per file:\t\t%lu\n", (unsigned long) s->max_no_comps_per_file); printf("Number IOs:\t%llu\n", (unsigned long long) s->numberio); printf("Index:\t\t%llu\n", (unsigned long long) s->index); + printf("Last flush count:\t%u\n", s->last_flush_count); printf("Completions:\n"); if (!s->no_comps) return; for (i = s->no_comps - 1; i >= 0; i--) { - printf("\t(file=%2llu) %llu\n", + printf("\t(file=%2llu) %llu (flush_count=%u)\n", (unsigned long long) s->comps[i].fileno, - (unsigned long long) s->comps[i].offset); + (unsigned long long) s->comps[i].offset, + s->comps[i].flush_count); } } @@ -51,10 +53,12 @@ static void show(struct thread_io_list *s, size_t size) s->nofiles = le32_to_cpu(s->nofiles); s->numberio = le64_to_cpu(s->numberio); s->index = le64_to_cpu(s->index); + s->last_flush_count = le32_to_cpu(s->last_flush_count); for (i = 0; i < s->no_comps; i++) { s->comps[i].fileno = le64_to_cpu(s->comps[i].fileno); s->comps[i].offset = le64_to_cpu(s->comps[i].offset); + s->comps[i].flush_count = le32_to_cpu(s->comps[i].flush_count); } show_s(s, no_s); @@ -92,7 +96,7 @@ static void show_verify_state(void *buf, size_t size) return; } - if (hdr->version == 0x04) + if (hdr->version == 0x05) show(s, size); else log_err("Unsupported version %d\n", (int) hdr->version); diff --git a/thread_options.h b/thread_options.h index 1b26ab5864..9f733ff6cb 100644 --- a/thread_options.h +++ b/thread_options.h @@ -142,6 +142,7 @@ struct thread_options { unsigned int sync_io; unsigned int write_hint; unsigned int verify; + unsigned int verify_type; unsigned int do_verify; unsigned int verify_interval; unsigned int verify_offset; @@ -189,6 +190,7 @@ struct thread_options { struct zone_split *zone_split[DDIR_RWDIR_CNT]; unsigned int zone_split_nr[DDIR_RWDIR_CNT]; + uint32_t pad2; fio_fp64_t zipf_theta; fio_fp64_t pareto_h; @@ -477,6 +479,7 @@ struct thread_options_pack { uint32_t sync_io; uint32_t write_hint; uint32_t verify; + uint32_t verify_type; uint32_t do_verify; uint32_t verify_interval; uint32_t verify_offset; @@ -521,6 +524,7 @@ struct thread_options_pack { struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX]; uint32_t zone_split_nr[DDIR_RWDIR_CNT]; + uint32_t pad2; fio_fp64_t zipf_theta; fio_fp64_t pareto_h; diff --git a/verify-state.h b/verify-state.h index 603af70d4b..c2d17d5ef4 100644 --- a/verify-state.h +++ b/verify-state.h @@ -28,6 +28,8 @@ struct thread_rand_state { struct file_comp { uint64_t fileno; uint64_t offset; + uint32_t flush_count; /* FLUSH count at completion time for ordering */ + uint32_t flags; /* I/O flags including FUA */ }; struct thread_io_list { @@ -37,6 +39,8 @@ struct thread_io_list { uint32_t nofiles; uint64_t numberio; uint64_t index; + uint32_t last_flush_count; /* Last FLUSH count for ordering */ + uint32_t padding; /* Padding for alignment */ struct thread_rand_state rand; uint8_t name[64]; struct file_comp comps[0]; @@ -47,7 +51,7 @@ struct all_io_list { struct thread_io_list state[0]; }; -#define VSTATE_HDR_VERSION 0x04 +#define VSTATE_HDR_VERSION 0x05 /* Incremented for FLUSH count support */ struct verify_state_hdr { uint64_t version; @@ -57,6 +61,9 @@ struct verify_state_hdr { #define IO_LIST_ALL 0xffffffff +/* Flags for file_comp.flags */ +#define FIO_COMP_FLAG_FUA 0x1 /* Write had Force Unit Access flag */ + struct io_u; extern struct all_io_list *get_all_io_list(int, size_t *); extern void __verify_save_state(struct all_io_list *, const char *); diff --git a/verify.c b/verify.c index 04718f303a..9c42512fd1 100644 --- a/verify.c +++ b/verify.c @@ -1407,6 +1407,7 @@ int get_next_verify(struct thread_data *td, struct io_u *io_u) if (io_u->file) return 0; +retry: if (!RB_EMPTY_ROOT(&td->io_hist_tree)) { struct fio_rb_node *n = rb_first(&td->io_hist_tree); @@ -1466,6 +1467,11 @@ int get_next_verify(struct thread_data *td, struct io_u *io_u) remove_trim_entry(td, ipo); free(ipo); + + /* Check if this offset was filtered out by FLUSH timing */ + if (verify_state_should_stop(td, io_u)) + goto retry; + dprint(FD_VERIFY, "get_next_verify: ret io_u %p\n", io_u); if (!td->o.verify_pattern_bytes) { @@ -1649,7 +1655,9 @@ static int __fill_file_completions(struct thread_data *td, if (j == -1) j = td->last_write_comp_depth - 1; s->comps[*index].fileno = __cpu_to_le64(f->fileno); - s->comps[*index].offset = cpu_to_le64(f->last_write_comp[j]); + s->comps[*index].offset = cpu_to_le64(f->last_write_comp[j].offset); + s->comps[*index].flush_count = cpu_to_le32(f->last_write_comp[j].flush_count); + s->comps[*index].flags = cpu_to_le32(f->last_write_comp[j].flags); (*index)++; j--; } @@ -1661,12 +1669,17 @@ static int fill_file_completions(struct thread_data *td, struct thread_io_list *s, unsigned int *index) { struct fio_file *f; - unsigned int i; + unsigned int i = 0; int comps = 0; + uint32_t max_flush_count = 0; - for_each_file(td, f, i) + for_each_file(td, f, i) { comps += __fill_file_completions(td, s, f, index); + if (f->flush_count > max_flush_count) + max_flush_count = f->flush_count; + } + s->last_flush_count = cpu_to_le32(max_flush_count); return comps; } @@ -1887,6 +1900,46 @@ int verify_state_hdr(struct verify_state_hdr *hdr, struct thread_io_list *s) return 0; } +/* + * Filter completion records based on FLUSH completion count rules: + * - Include writes that completed at or before the last FLUSH count + * - Include FUA writes regardless of FLUSH count + * - Exclude non-FUA writes that completed after the last FLUSH count + */ +static void filter_verify_state_by_flush_timing(struct thread_io_list *s) +{ + uint32_t last_flush_count; + int original_count; + int i, j; + + if (!s || s->no_comps == 0) + return; + + original_count = le64_to_cpu(s->no_comps); + last_flush_count = le32_to_cpu(s->last_flush_count); + + if (last_flush_count == 0) + return; + + /* Filter completion records in-place */ + for (i = 0, j = 0; i < original_count; i++) { + uint32_t write_flush_count = le32_to_cpu(s->comps[i].flush_count); + uint32_t write_flags = le32_to_cpu(s->comps[i].flags); + + /* Apply FLUSH completion count rules */ + if ((write_flags & FIO_COMP_FLAG_FUA) || + (write_flush_count < last_flush_count)) { + /* FUA writes or writes completed before FLUSH are included */ + if (i != j) + s->comps[j] = s->comps[i]; + j++; + } + } + + /* Update the completion count */ + s->no_comps = cpu_to_le64((uint64_t) j); +} + int verify_load_state(struct thread_data *td, const char *prefix) { struct verify_state_hdr hdr; @@ -1937,6 +1990,10 @@ int verify_load_state(struct thread_data *td, const char *prefix) close(fd); + /* Filter completion records based on FLUSH timing before assigning state */ + if (td->o.verify_type == VERIFY_TYPE_FLUSH) + filter_verify_state_by_flush_timing(s); + verify_assign_state(td, s); return 0; err: @@ -1971,6 +2028,9 @@ int verify_state_should_stop(struct thread_data *td, struct io_u *io_u) * We're in the window of having to check if this io was * completed or not. If the IO was seen as completed, then * lets verify it. + * + * Note: FLUSH completion timing filtering is now done at state load time, + * so any offset in the completion list is valid for verification. */ for (i = 0; i < s->no_comps; i++) { if (s->comps[i].fileno != f->fileno) diff --git a/verify.h b/verify.h index 539e6f6cf5..f5c7a99535 100644 --- a/verify.h +++ b/verify.h @@ -30,6 +30,8 @@ enum { VERIFY_PATTERN, /* verify specific patterns */ VERIFY_PATTERN_NO_HDR, /* verify specific patterns, no hdr */ VERIFY_NULL, /* pretend to verify */ + + VERIFY_TYPE_FLUSH, /* verify offsets based on flush completion time */ }; /*