Skip to content

Commit e4275de

Browse files
committed
fio: add latency steady state detection
Add fio latency steady state support. The implementation calculates weighted average latency across all I/O directions and supports both maximum mean deviation and slope-based detection methods. Tested successfully against NVMe device with debug output confirming proper latency calculation and steady state evaluation. Quick demo with the null engine: fio --name=test --ioengine=null --size=1G --rw=randread --bs=4k --iodepth=64 --steadystate=lat:100us --ss_dur=10s --runtime=120s --time_based=1 test: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=null, iodepth=64 fio-3.41-20-gf2b2e-dirty Starting 1 process Jobs: 1 (f=1): [f(1)][100.0%][r=15.6GiB/s][r=4078k IOPS][eta 00m:00s] test: (groupid=0, jobs=1): err= 0: pid=1207074: Tue Oct 7 11:47:27 2025 read: IOPS=4054k, BW=15.5GiB/s (16.6GB/s)(153GiB/9899msec) slat (nsec): min=18, max=33347, avg=22.20, stdev=64.49 clat (nsec): min=135, max=42283, avg=152.74, stdev=173.53 lat (nsec): min=156, max=42306, avg=174.94, stdev=185.32 clat percentiles (nsec): | 1.00th=[ 139], 5.00th=[ 141], 10.00th=[ 141], 20.00th=[ 143], | 30.00th=[ 143], 40.00th=[ 145], 50.00th=[ 147], 60.00th=[ 153], | 70.00th=[ 155], 80.00th=[ 157], 90.00th=[ 163], 95.00th=[ 171], | 99.00th=[ 217], 99.50th=[ 221], 99.90th=[ 239], 99.95th=[ 270], | 99.99th=[ 2096] bw ( MiB/s): min=15344, max=15944, per=100.00%, avg=15835.92, stdev=132.94, samples=19 iops : min=3928298, max=4081694, avg=4053996.63, stdev=34033.65, samples=19 lat (nsec) : 250=99.93%, 500=0.06%, 750=0.01%, 1000=0.01% lat (usec) : 2=0.01%, 4=0.01%, 10=0.01%, 20=0.01%, 50=0.01% cpu : usr=99.93%, sys=0.03%, ctx=88, majf=0, minf=6 IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% issued rwts: total=40129848,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=64 steadystate : attained=yes, bw=15.2GiB/s (15.9GB/s), iops=3976k, lat=152nsec, lat mean dev=2.700 Run status group 0 (all jobs): READ: bw=15.5GiB/s (16.6GB/s), 15.5GiB/s-15.5GiB/s (16.6GB/s-16.6GB/s), io=153GiB (164GB), run=9899-9899msec Generated-by: Claude AI Signed-off-by: Luis Chamberlain <[email protected]>
1 parent 81a80cb commit e4275de

File tree

8 files changed

+195
-18
lines changed

8 files changed

+195
-18
lines changed

HOWTO.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4216,6 +4216,18 @@ Steady state
42164216
Collect bandwidth data and calculate the least squares regression
42174217
slope. Stop the job if the slope falls below the specified limit.
42184218

4219+
**lat**
4220+
Collect completion latency data and calculate the maximum mean
4221+
deviation. Stop the job if the deviation falls below the specified
4222+
limit. The latency values are weighted by the number of I/O samples
4223+
in each measurement interval.
4224+
4225+
**lat_slope**
4226+
Collect completion latency data and calculate the least squares
4227+
regression slope. Stop the job if the slope falls below the
4228+
specified limit. The latency values are weighted by the number
4229+
of I/O samples in each measurement interval.
4230+
42194231
.. option:: steadystate_duration=time, ss_dur=time
42204232

42214233
A rolling window of this duration will be used to judge whether steady

client.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,6 +1079,7 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
10791079
for (i = 0; i < dst->ss_dur; i++ ) {
10801080
dst->ss_iops_data[i] = le64_to_cpu(src->ss_iops_data[i]);
10811081
dst->ss_bw_data[i] = le64_to_cpu(src->ss_bw_data[i]);
1082+
dst->ss_lat_data[i] = le64_to_cpu(src->ss_lat_data[i]);
10821083
}
10831084
}
10841085

example_latency_steadystate.fio

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Example FIO job file demonstrating latency steady state detection
2+
# This example shows how to use FIO's latency steady state detection
3+
# to automatically terminate workloads when latency stabilizes
4+
#
5+
# Based on SNIA SSD Performance Test Specification requirements:
6+
# - Steady state is achieved when latency measurements don't change more than
7+
# 20% for 5 measurement windows and remain within 5% of a line with 10% slope
8+
# - This example uses more conservative 5% deviation threshold for demonstration
9+
10+
[global]
11+
# Basic I/O parameters
12+
ioengine=libaio
13+
iodepth=32
14+
bs=4k
15+
direct=1
16+
rw=randread
17+
numjobs=1
18+
time_based=1
19+
runtime=3600 # Max runtime: 1 hour (will terminate early if steady state reached)
20+
21+
# Steady state detection parameters
22+
steadystate=lat:5% # Stop when latency mean deviation < 5% of average
23+
steadystate_duration=300 # Use 5-minute rolling window for measurements
24+
steadystate_ramp_time=60 # Wait 1 minute before starting measurements
25+
steadystate_check_interval=10 # Take measurements every 10 seconds
26+
27+
# Output options
28+
write_lat_log=lat_steadystate
29+
log_avg_msec=10000 # Log average latency every 10 seconds
30+
31+
[latency_steady_test]
32+
filename=/dev/nvme3n1
33+
size=10G
34+
35+
# Alternative steady state configurations (uncomment to try):
36+
37+
# Use slope-based detection instead of deviation:
38+
# steadystate=lat_slope:0.1%
39+
40+
# More aggressive detection (faster convergence):
41+
# steadystate=lat:2%
42+
# steadystate_duration=120 # 2-minute window
43+
# steadystate_check_interval=5 # Check every 5 seconds
44+
45+
# More conservative detection (slower convergence):
46+
# steadystate=lat:10%
47+
# steadystate_duration=600 # 10-minute window

options.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1364,7 +1364,8 @@ static int str_random_distribution_cb(void *data, const char *str)
13641364
static bool is_valid_steadystate(unsigned int state)
13651365
{
13661366
return (state == FIO_SS_IOPS || state == FIO_SS_IOPS_SLOPE ||
1367-
state == FIO_SS_BW || state == FIO_SS_BW_SLOPE);
1367+
state == FIO_SS_BW || state == FIO_SS_BW_SLOPE ||
1368+
state == FIO_SS_LAT || state == FIO_SS_LAT_SLOPE);
13681369
}
13691370

13701371
static int str_steadystate_cb(void *data, const char *str)
@@ -1419,6 +1420,21 @@ static int str_steadystate_cb(void *data, const char *str)
14191420
return 0;
14201421

14211422
td->o.ss_limit.u.f = val;
1423+
} else if (td->o.ss_state & FIO_SS_LAT) {
1424+
long long tns;
1425+
if (check_str_time(nr, &tns, 0)) {
1426+
log_err("fio: steadystate latency threshold parsing failed\n");
1427+
free(nr);
1428+
return 1;
1429+
}
1430+
1431+
dprint(FD_PARSE, "set steady state latency threshold to %lld nsec\n", tns);
1432+
free(nr);
1433+
if (parse_dryrun())
1434+
return 0;
1435+
1436+
td->o.ss_limit.u.f = (double) tns;
1437+
14221438
} else { /* bandwidth criterion */
14231439
if (str_to_decimal(nr, &ll, 1, td, 0, 0)) {
14241440
log_err("fio: steadystate BW threshold postfix parsing failed\n");
@@ -5536,6 +5552,14 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
55365552
.oval = FIO_SS_BW_SLOPE,
55375553
.help = "slope calculated from bandwidth measurements",
55385554
},
5555+
{ .ival = "lat",
5556+
.oval = FIO_SS_LAT,
5557+
.help = "maximum mean deviation of latency measurements",
5558+
},
5559+
{ .ival = "lat_slope",
5560+
.oval = FIO_SS_LAT_SLOPE,
5561+
.help = "slope calculated from latency measurements",
5562+
},
55395563
},
55405564
.category = FIO_OPT_C_GENERAL,
55415565
.group = FIO_OPT_G_RUNTIME,

stat.c

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -935,31 +935,52 @@ static void show_block_infos(int nr_block_infos, uint32_t *block_infos,
935935

936936
static void show_ss_normal(const struct thread_stat *ts, struct buf_output *out)
937937
{
938-
char *p1, *p1alt, *p2;
939-
unsigned long long bw_mean, iops_mean;
938+
char *p1, *p1alt, *p2, *p3 = NULL;
939+
unsigned long long bw_mean, iops_mean, lat_mean;
940940
const int i2p = is_power_of_2(ts->kb_base);
941941

942942
if (!ts->ss_dur)
943943
return;
944944

945945
bw_mean = steadystate_bw_mean(ts);
946946
iops_mean = steadystate_iops_mean(ts);
947+
lat_mean = steadystate_lat_mean(ts);
947948

948949
p1 = num2str(bw_mean / ts->kb_base, ts->sig_figs, ts->kb_base, i2p, ts->unit_base);
949950
p1alt = num2str(bw_mean / ts->kb_base, ts->sig_figs, ts->kb_base, !i2p, ts->unit_base);
950951
p2 = num2str(iops_mean, ts->sig_figs, 1, 0, N2S_NONE);
952+
if (ts->ss_state & FIO_SS_LAT) {
953+
const char *lat_unit = "nsec";
954+
unsigned long long lat_val = lat_mean;
955+
double lat_mean_d = lat_mean, lat_dev_d = 0.0;
956+
char *lat_num;
951957

952-
log_buf(out, " steadystate : attained=%s, bw=%s (%s), iops=%s, %s%s=%.3f%s\n",
958+
if (nsec_to_msec(&lat_val, &lat_val, &lat_mean_d, &lat_dev_d))
959+
lat_unit = "msec";
960+
else if (nsec_to_usec(&lat_val, &lat_val, &lat_mean_d, &lat_dev_d))
961+
lat_unit = "usec";
962+
963+
lat_num = num2str((unsigned long long)lat_mean_d, ts->sig_figs, 1, 0, N2S_NONE);
964+
if (asprintf(&p3, "%s%s", lat_num, lat_unit) < 0)
965+
p3 = NULL;
966+
free(lat_num);
967+
}
968+
969+
log_buf(out, " steadystate : attained=%s, bw=%s (%s), iops=%s%s%s, %s%s=%.3f%s\n",
953970
ts->ss_state & FIO_SS_ATTAINED ? "yes" : "no",
954971
p1, p1alt, p2,
955-
ts->ss_state & FIO_SS_IOPS ? "iops" : "bw",
972+
p3 ? ", lat=" : "",
973+
p3 ? p3 : "",
974+
ts->ss_state & FIO_SS_IOPS ? "iops" : (ts->ss_state & FIO_SS_LAT ? "lat" : "bw"),
956975
ts->ss_state & FIO_SS_SLOPE ? " slope": " mean dev",
957976
ts->ss_criterion.u.f,
958977
ts->ss_state & FIO_SS_PCT ? "%" : "");
959978

960979
free(p1);
961980
free(p1alt);
962981
free(p2);
982+
if (p3)
983+
free(p3);
963984
}
964985

965986
static void show_agg_stats(const struct disk_util_agg *agg, int terse,
@@ -1903,7 +1924,7 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
19031924
int intervals = ts->ss_dur / (ss_check_interval / 1000L);
19041925

19051926
snprintf(ss_buf, sizeof(ss_buf), "%s%s:%f%s",
1906-
ts->ss_state & FIO_SS_IOPS ? "iops" : "bw",
1927+
ts->ss_state & FIO_SS_IOPS ? "iops" : (ts->ss_state & FIO_SS_LAT ? "lat" : "bw"),
19071928
ts->ss_state & FIO_SS_SLOPE ? "_slope" : "",
19081929
(float) ts->ss_limit.u.f,
19091930
ts->ss_state & FIO_SS_PCT ? "%" : "");
@@ -1942,6 +1963,16 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
19421963
}
19431964
json_object_add_value_int(data, "bw_mean", steadystate_bw_mean(ts));
19441965
json_object_add_value_int(data, "iops_mean", steadystate_iops_mean(ts));
1966+
if (ts->ss_state & FIO_SS_LAT) {
1967+
struct json_array *lat;
1968+
lat = json_create_array();
1969+
for (l = 0; l < intervals; l++) {
1970+
k = (j + l) % intervals;
1971+
json_array_add_value_int(lat, ts->ss_lat_data[k]);
1972+
}
1973+
json_object_add_value_int(data, "lat_mean", steadystate_lat_mean(ts));
1974+
json_object_add_value_array(data, "lat", lat);
1975+
}
19451976
json_object_add_value_array(data, "iops", iops);
19461977
json_object_add_value_array(data, "bw", bw);
19471978
}
@@ -2600,6 +2631,7 @@ void __show_run_stats(void)
26002631
ts->ss_head = td->ss.head;
26012632
ts->ss_bw_data = td->ss.bw_data;
26022633
ts->ss_iops_data = td->ss.iops_data;
2634+
ts->ss_lat_data = td->ss.lat_data;
26032635
ts->ss_limit.u.f = td->ss.limit;
26042636
ts->ss_slope.u.f = td->ss.slope;
26052637
ts->ss_deviation.u.f = td->ss.deviation;

stat.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,16 @@ struct thread_stat {
283283
uint64_t pad5;
284284
};
285285

286+
union {
287+
uint64_t *ss_lat_data;
288+
/*
289+
* For FIO_NET_CMD_TS, the pointed to data will temporarily
290+
* be stored at this offset from the start of the payload.
291+
*/
292+
uint64_t ss_lat_data_offset;
293+
uint64_t pad5b;
294+
};
295+
286296
union {
287297
struct clat_prio_stat *clat_prio[DDIR_RWDIR_CNT];
288298
/*

0 commit comments

Comments
 (0)