@@ -106,6 +106,45 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values)
106106 } // end foreach value name
107107}
108108
109+ template <std::size_t buffer_nbytes>
110+ void write_out_values (std::ofstream &out, const std::vector<nvbench::float64_t > &data)
111+ {
112+ static constexpr std::size_t value_nbytes = sizeof (nvbench::float32_t );
113+ static_assert (buffer_nbytes % value_nbytes == 0 );
114+
115+ alignas (alignof (nvbench::float32_t )) char buffer[buffer_nbytes];
116+ std::size_t bytes_in_buffer = 0 ;
117+
118+ for (auto value64 : data)
119+ {
120+ const auto value32 = static_cast <nvbench::float32_t >(value64);
121+ auto value_subbuffer = &buffer[bytes_in_buffer];
122+ std::memcpy (value_subbuffer, &value32, value_nbytes);
123+
124+ // the c++17 implementation of is_little_endian isn't constexpr, but
125+ // all supported compilers optimize this branch as if it were.
126+ if (!is_little_endian ())
127+ {
128+ std::swap (value_subbuffer[0 ], value_subbuffer[3 ]);
129+ std::swap (value_subbuffer[1 ], value_subbuffer[2 ]);
130+ }
131+ bytes_in_buffer += value_nbytes;
132+
133+ // if buffer is full, write it out and wrap around
134+ if (bytes_in_buffer == buffer_nbytes)
135+ {
136+ out.write (buffer, static_cast <std::streamsize>(buffer_nbytes));
137+ bytes_in_buffer = 0 ;
138+ }
139+ } // end of foreach value64 in data
140+
141+ if (bytes_in_buffer)
142+ {
143+ out.write (buffer, static_cast <std::streamsize>(bytes_in_buffer));
144+ bytes_in_buffer = 0 ;
145+ }
146+ }
147+
109148} // end namespace
110149
111150namespace nvbench
@@ -168,42 +207,9 @@ void json_printer::do_process_bulk_data_float64(state &state,
168207 out.open (result_path, std::ios::binary | std::ios::out);
169208
170209 // choose buffer to be block size of modern SSD
171- static constexpr std::size_t buffer_nbytes = 4096 ;
172- static constexpr std::size_t value_nbytes = sizeof (nvbench::float32_t );
173- static_assert (buffer_nbytes % value_nbytes == 0 );
174-
175- alignas (alignof (nvbench::float32_t )) char buffer[buffer_nbytes];
176- std::size_t bytes_in_buffer = 0 ;
177-
178- for (auto value64 : data)
179- {
180- const auto value32 = static_cast <nvbench::float32_t >(value64);
181- auto value_subbuffer = &buffer[bytes_in_buffer];
182- std::memcpy (value_subbuffer, &value32, value_nbytes);
183-
184- // the c++17 implementation of is_little_endian isn't constexpr, but
185- // all supported compilers optimize this branch as if it were.
186- if (!is_little_endian ())
187- {
188- using std::swap;
189- swap (value_subbuffer[0 ], value_subbuffer[3 ]);
190- swap (value_subbuffer[1 ], value_subbuffer[2 ]);
191- }
192- bytes_in_buffer += value_nbytes;
193-
194- // if buffer is full, write it out and wrap around
195- if (bytes_in_buffer == buffer_nbytes)
196- {
197- out.write (buffer, static_cast <std::streamsize>(buffer_nbytes));
198- bytes_in_buffer = 0 ;
199- }
200- } // end of foreach value64 in data
201-
202- if (bytes_in_buffer)
203- {
204- out.write (buffer, static_cast <std::streamsize>(bytes_in_buffer));
205- bytes_in_buffer = 0 ;
206- }
210+ // see: https://github.com/NVIDIA/nvbench/issues/255
211+ constexpr std::size_t buffer_nbytes = 4096 ;
212+ write_out_values<buffer_nbytes>(out, data);
207213 }
208214 catch (std::exception &e)
209215 {
0 commit comments