@@ -58,17 +58,17 @@ static_assert(false, "No <filesystem> or <experimental/filesystem> found.");
5858namespace
5959{
6060
61- bool is_little_endian ()
62- {
6361#if NVBENCH_CPP_DIALECT >= 2020
64- return std::endian::native == std::endian::little;
62+ constexpr bool is_little_endian () noexcept { return std::endian::native == std::endian::little; }
6563#else
64+ bool is_little_endian () noexcept
65+ {
6666 const nvbench::uint32_t word = {0xBadDecaf };
6767 nvbench::uint8_t bytes[4 ];
6868 std::memcpy (bytes, &word, 4 );
6969 return bytes[0 ] == 0xaf ;
70- #endif
7170}
71+ #endif
7272
7373template <typename JsonNode>
7474void write_named_values (JsonNode &node, const nvbench::named_values &values)
@@ -167,23 +167,42 @@ void json_printer::do_process_bulk_data_float64(state &state,
167167 out.exceptions (out.exceptions () | std::ios::failbit | std::ios::badbit);
168168 out.open (result_path, std::ios::binary | std::ios::out);
169169
170- // FIXME: SLOW -- Writing the binary file, 4 bytes at a time...
171- // There are a lot of optimizations that could be done here if this ends
172- // up being a noticeable bottleneck.
170+ // choose buffer to be block size of modern SSD
171+ static constexpr std::size_t buffer_nbytes = 4096 ;
172+ static constexpr std::size_t value_nbytes = sizeof (nvbench::float32_t );
173+ static_assert (buffer_nbytes % value_nbytes == 0 );
174+
175+ alignas (alignof (nvbench::float32_t )) char buffer[buffer_nbytes];
176+ std::size_t bytes_in_buffer = 0 ;
177+
173178 for (auto value64 : data)
174179 {
175- const auto value32 = static_cast <nvbench::float32_t >(value64);
176- char buffer[4 ];
177- std::memcpy (buffer, &value32, 4 );
180+ const auto value32 = static_cast <nvbench::float32_t >(value64);
181+ auto value_subbuffer = &buffer[bytes_in_buffer];
182+ std::memcpy (value_subbuffer, &value32, value_nbytes);
183+
178184 // the c++17 implementation of is_little_endian isn't constexpr, but
179185 // all supported compilers optimize this branch as if it were.
180186 if (!is_little_endian ())
181187 {
182188 using std::swap;
183- swap (buffer[0 ], buffer[3 ]);
184- swap (buffer[1 ], buffer[2 ]);
189+ swap (value_subbuffer[0 ], value_subbuffer[3 ]);
190+ swap (value_subbuffer[1 ], value_subbuffer[2 ]);
191+ }
192+ bytes_in_buffer += value_nbytes;
193+
194+ // if buffer is full, write it out and wrap around
195+ if (bytes_in_buffer == buffer_nbytes)
196+ {
197+ out.write (buffer, buffer_nbytes);
198+ bytes_in_buffer = 0 ;
185199 }
186- out.write (buffer, 4 );
200+ } // end of foreach value64 in data
201+
202+ if (bytes_in_buffer)
203+ {
204+ out.write (buffer, bytes_in_buffer);
205+ bytes_in_buffer = 0 ;
187206 }
188207 }
189208 catch (std::exception &e)
0 commit comments