Skip to content

Commit 8097707

Browse files
authored
Merge pull request #32 from redhat-performance/feat/RPOPC-1304-fio-multi-metric
RPOPC-1304: Add multi-metric support for FIO benchmark
2 parents 1fee56c + b7e3fa9 commit 8097707

2 files changed

Lines changed: 284 additions & 28 deletions

File tree

src/chronicler/processors/fio_processor.py

Lines changed: 64 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from .base_processor import BaseProcessor, ProcessorError
1010
from ..schema import (
11-
Run, TimeSeriesPoint, TimeSeriesSummary, PrimaryMetric,
11+
Run, TimeSeriesPoint, TimeSeriesSummary, PrimaryMetric, StatisticalSummary,
1212
create_run_key, create_sequence_key
1313
)
1414
from ..utils.parser_utils import (
@@ -77,42 +77,78 @@ class FioProcessor(BaseProcessor):
7777
def get_test_name(self) -> str:
7878
return "fio"
7979

80-
def build_results(self) -> Any:
80+
def _extract_primary_metrics(
81+
self, runs: Dict[str, Any],
82+
overall_stats: Optional[StatisticalSummary]
83+
) -> Optional[List[PrimaryMetric]]:
8184
"""
82-
Build Results object with overall primary metric.
85+
Extract bandwidth, IOPS, and latency as coequal primary metrics.
8386
84-
Primary metric is the maximum bandwidth achieved across all workloads.
87+
FIO is a multi-metric benchmark measuring disk I/O performance across three
88+
dimensions. All three metrics are equally important for characterizing performance.
8589
86-
Returns:
87-
Results object
90+
Returns list of PrimaryMetric objects for: bandwidth, iops, latency.
8891
"""
89-
# Call parent to build basic Results object
90-
results = super().build_results()
91-
92-
if not results or not results.runs:
93-
return results
94-
95-
# Find the run with the highest bandwidth
96-
max_bw = 0
97-
max_bw_run = None
92+
if not runs:
93+
return None
94+
95+
# Collect metric values from all runs
96+
bandwidth_values = []
97+
iops_values = []
98+
latency_values = []
99+
100+
for run_key, run in runs.items():
101+
# Handle both dict and Run dataclass objects
102+
metrics = None
103+
if isinstance(run, dict) and 'metrics' in run:
104+
metrics = run['metrics']
105+
elif hasattr(run, 'metrics') and run.metrics:
106+
metrics = run.metrics
107+
108+
if metrics:
109+
# Extract bandwidth
110+
if 'total_bandwidth_kbps' in metrics and metrics['total_bandwidth_kbps'] is not None:
111+
bandwidth_values.append(metrics['total_bandwidth_kbps'])
112+
113+
# Extract IOPS
114+
if 'total_iops' in metrics and metrics['total_iops'] is not None:
115+
iops_values.append(metrics['total_iops'])
116+
117+
# Extract latency
118+
if 'avg_latency_mean_ns' in metrics and metrics['avg_latency_mean_ns'] is not None:
119+
latency_values.append(metrics['avg_latency_mean_ns'])
120+
121+
# Build list of primary metrics (only include metrics with data)
122+
primary_metrics = []
123+
124+
if bandwidth_values:
125+
primary_metrics.append(
126+
PrimaryMetric(
127+
name='bandwidth',
128+
value=statistics.mean(bandwidth_values),
129+
unit='KiB/s'
130+
)
131+
)
98132

99-
for run_key, run in results.runs.items():
100-
bw = run.metrics.get('total_bandwidth_kbps', 0)
101-
if bw > max_bw:
102-
max_bw = bw
103-
max_bw_run = run
133+
if iops_values:
134+
primary_metrics.append(
135+
PrimaryMetric(
136+
name='iops',
137+
value=statistics.mean(iops_values),
138+
unit='IOPS'
139+
)
140+
)
104141

105-
# Set primary metrics (single-element list for now; see issue #27 for multi-metric)
106-
if max_bw_run and max_bw > 0:
107-
results.primary_metrics = [
142+
if latency_values:
143+
primary_metrics.append(
108144
PrimaryMetric(
109-
name='max_bandwidth',
110-
value=max_bw,
111-
unit='KiB/s'
145+
name='latency',
146+
value=statistics.mean(latency_values),
147+
unit='nanoseconds'
112148
)
113-
]
149+
)
114150

115-
return results
151+
return primary_metrics if primary_metrics else None
116152

117153
def parse_runs(self, extracted_result: Dict[str, Any]) -> Dict[str, Any]:
118154
"""

tests/test_fio_multi_metric.py

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
"""
2+
FIO processor: multi-metric extraction (bandwidth, IOPS, latency).
3+
4+
Tests that FIO extracts all three coequal metrics as primary_metrics.
5+
"""
6+
7+
import pytest
8+
import json
9+
from pathlib import Path
10+
from unittest.mock import patch
11+
12+
from chronicler.processors.fio_processor import FioProcessor
13+
from chronicler.processors.base_processor import ProcessorError
14+
from chronicler.schema import Run
15+
16+
pytestmark = pytest.mark.unit
17+
18+
19+
def test_fio_extracts_all_three_primary_metrics(result_dir):
20+
"""
21+
FIO should extract bandwidth, IOPS, and latency as primary_metrics.
22+
23+
Test validates RPOPC-1304: multi-metric support for FIO.
24+
"""
25+
# Create FIO results JSON with multiple runs to test mean calculation
26+
# Run 0: read-4KiB workload
27+
fio_data_run0 = {
28+
"timestamp": 1707004800, # 2024-02-04 00:00:00 UTC
29+
"jobs": [
30+
{
31+
"jobname": "job0",
32+
"read": {
33+
"bw": 500000,
34+
"iops": 125000,
35+
"io_bytes": 1024000000,
36+
"total_ios": 250000,
37+
"lat_ns": {
38+
"mean": 120000,
39+
"min": 100000,
40+
"max": 150000
41+
},
42+
"clat_ns": {
43+
"mean": 118000,
44+
"min": 98000,
45+
"max": 148000
46+
},
47+
"slat_ns": {
48+
"mean": 2000,
49+
"min": 1000,
50+
"max": 3000
51+
}
52+
},
53+
"elapsed": 60
54+
}
55+
]
56+
}
57+
58+
# Run 1: read-1024KiB workload (different metrics)
59+
fio_data_run1 = {
60+
"timestamp": 1707004920, # 2024-02-04 00:02:00 UTC
61+
"jobs": [
62+
{
63+
"jobname": "job0",
64+
"read": {
65+
"bw": 800000,
66+
"iops": 200000,
67+
"io_bytes": 2048000000,
68+
"total_ios": 500000,
69+
"lat_ns": {
70+
"mean": 140000,
71+
"min": 120000,
72+
"max": 170000
73+
},
74+
"clat_ns": {
75+
"mean": 138000,
76+
"min": 118000,
77+
"max": 168000
78+
},
79+
"slat_ns": {
80+
"mean": 2000,
81+
"min": 1000,
82+
"max": 3000
83+
}
84+
},
85+
"elapsed": 60
86+
}
87+
]
88+
}
89+
90+
# Create directory structure for two workloads
91+
export_dir = result_dir / "export_fio_data_test"
92+
export_dir.mkdir()
93+
94+
config_dir = export_dir / "fio_ndisks_1_disksize_10_GiB_njobs_1_ioengine_libaio_iodepth_16_2024.02.04T00.00.00"
95+
config_dir.mkdir()
96+
97+
# Workload 0
98+
workload0_dir = config_dir / "1-read-4KiB"
99+
workload0_dir.mkdir()
100+
(workload0_dir / "fio-results.json").write_text(json.dumps(fio_data_run0))
101+
102+
# Workload 1
103+
workload1_dir = config_dir / "2-read-1024KiB"
104+
workload1_dir.mkdir()
105+
(workload1_dir / "fio-results.json").write_text(json.dumps(fio_data_run1))
106+
107+
# Create test_results_report (status)
108+
(export_dir / "test_results_report").write_text("Ran 2 tests")
109+
110+
# Create dummy zip
111+
dummy_zip = result_dir / "results_fio.zip"
112+
dummy_zip.write_bytes(b"")
113+
114+
# Process FIO results
115+
processor = FioProcessor(str(result_dir))
116+
extracted_result = {"files": {}, "extracted_path": str(result_dir)}
117+
118+
# Mock archive extraction and call build_results
119+
with patch.object(processor.archive_handler, "extract_result_archive") as mock_extract:
120+
mock_extract.return_value = extracted_result
121+
results = processor.build_results()
122+
123+
# Verify primary_metrics exists and has 3 metrics
124+
assert results.primary_metrics is not None, "primary_metrics should not be None"
125+
assert len(results.primary_metrics) == 3, f"Expected 3 metrics, got {len(results.primary_metrics)}"
126+
127+
# Extract metric names and values
128+
metrics_by_name = {m.name: m for m in results.primary_metrics}
129+
130+
# Verify all three metrics are present
131+
assert "bandwidth" in metrics_by_name, "bandwidth metric missing"
132+
assert "iops" in metrics_by_name, "iops metric missing"
133+
assert "latency" in metrics_by_name, "latency metric missing"
134+
135+
# Verify units are correct
136+
assert metrics_by_name["bandwidth"].unit == "KiB/s"
137+
assert metrics_by_name["iops"].unit == "IOPS"
138+
assert metrics_by_name["latency"].unit == "nanoseconds"
139+
140+
# Verify values are means across both runs
141+
# Bandwidth mean: (500000 + 800000) / 2 = 650000
142+
assert metrics_by_name["bandwidth"].value == 650000.0
143+
144+
# IOPS mean: (125000 + 200000) / 2 = 162500
145+
assert metrics_by_name["iops"].value == 162500.0
146+
147+
# Latency mean: (120000 + 140000) / 2 = 130000
148+
assert metrics_by_name["latency"].value == 130000.0
149+
150+
151+
def test_fio_handles_single_run(result_dir):
152+
"""
153+
FIO should extract metrics correctly from single run.
154+
155+
No averaging needed when only one run exists.
156+
"""
157+
fio_data = {
158+
"timestamp": 1707004800,
159+
"jobs": [
160+
{
161+
"jobname": "job0",
162+
"read": {
163+
"bw": 600000,
164+
"iops": 150000,
165+
"io_bytes": 1024000000,
166+
"total_ios": 250000,
167+
"lat_ns": {
168+
"mean": 125000,
169+
"min": 110000,
170+
"max": 160000
171+
},
172+
"clat_ns": {
173+
"mean": 123000,
174+
"min": 108000,
175+
"max": 158000
176+
},
177+
"slat_ns": {
178+
"mean": 2000,
179+
"min": 1000,
180+
"max": 3000
181+
}
182+
},
183+
"elapsed": 60
184+
}
185+
]
186+
}
187+
188+
# Create proper directory structure (needed for operation type detection)
189+
export_dir = result_dir / "export_fio_data_test"
190+
export_dir.mkdir()
191+
192+
config_dir = export_dir / "fio_ndisks_1_disksize_10_GiB_njobs_1_ioengine_libaio_iodepth_16_2024.02.04T00.00.00"
193+
config_dir.mkdir()
194+
195+
workload_dir = config_dir / "1-read-4KiB"
196+
workload_dir.mkdir()
197+
(workload_dir / "fio-results.json").write_text(json.dumps(fio_data))
198+
199+
(export_dir / "test_results_report").write_text("Ran 1 test")
200+
201+
dummy_zip = result_dir / "results_fio.zip"
202+
dummy_zip.write_bytes(b"")
203+
204+
processor = FioProcessor(str(result_dir))
205+
extracted_result = {"files": {}, "extracted_path": str(result_dir)}
206+
207+
with patch.object(processor.archive_handler, "extract_result_archive") as mock_extract:
208+
mock_extract.return_value = extracted_result
209+
results = processor.build_results()
210+
211+
# Should have exactly 3 metrics from single run
212+
assert results.primary_metrics is not None
213+
assert len(results.primary_metrics) == 3
214+
215+
metrics_by_name = {m.name: m for m in results.primary_metrics}
216+
217+
# Values should match the single run (no averaging)
218+
assert metrics_by_name["bandwidth"].value == 600000.0
219+
assert metrics_by_name["iops"].value == 150000.0
220+
assert metrics_by_name["latency"].value == 125000.0

0 commit comments

Comments
 (0)