4242namespace lbann {
4343namespace callback {
4444
45+ // FIXME Does this need an anon namespace since it's only in the cpp file?
46+ void print_value (std::ostringstream& os, double value)
47+ {
48+ os << value;
49+ }
50+ void print_value (std::ostringstream& os, long value)
51+ {
52+ os << value;
53+ }
54+ void print_value (std::ostringstream& os, size_t value)
55+ {
56+ os << value;
57+ }
58+ void print_value (std::ostringstream& os, std::string const & value)
59+ {
60+ os << " \" " << value << " \" " ;
61+ }
62+ void print_value (std::ostringstream& os, char const * value)
63+ {
64+ os << " \" " << value << " \" " ;
65+ }
4566template <typename T>
46- void mlperf_logging::print (std::ostream& os, mlperf_logging::event_type et,
67+ void print_value (std::ostringstream& os, T value)
68+ {
69+ // FIXME: Should I push the value anyway?
70+ os << " UNKNOWN_DATA_TYPE" ;
71+ }
72+
73+ template <typename T>
74+ void mlperf_logging::print (std::ostringstream& os, mlperf_logging::event_type et,
4775 std::string key, T value, char const * file,
4876 size_t line, double epoch) const
4977{
@@ -54,19 +82,22 @@ void mlperf_logging::print(std::ostream& os, mlperf_logging::event_type et,
5482 print_event_type (os, et);
5583
5684 os << " \" , "
57- << " \" key\" : " << key << " \" , "
85+ << " \" key\" : \" " << key << " \" , "
5886 << " \" value\" : " ;
5987 print_value (os, value);
6088 os << " , "
6189 << " \" metadata\" : {\" file\" : \" " << file << " \" , "
6290 << " \" lineno\" : " << line;
6391 if (epoch < 0 )
64- os << " }}\n " ;
92+ os << " }}" ;
6593 else
66- os << " , " << " \" epoch_num\" : " << epoch << " }}\n " ;
94+ os << " , " << " \" epoch_num\" : " << epoch << " }}" ;
95+
96+ H2_INFO (os.str ());
97+ os.flush ();
6798}
6899
69- void mlperf_logging::print_event_type (std::ostream & os, mlperf_logging::event_type et) const
100+ void mlperf_logging::print_event_type (std::ostringstream & os, mlperf_logging::event_type et) const
70101{
71102 switch (et) {
72103 case mlperf_logging::event_type::TIME_POINT: os << " POINT_IN_TIME" ; break ;
@@ -76,30 +107,6 @@ void mlperf_logging::print_event_type(std::ostream& os, mlperf_logging::event_ty
76107 }
77108}
78109
79- void mlperf_logging::print_value (std::ostream& os, double value) const
80- {
81- os << value;
82- }
83- void mlperf_logging::print_value (std::ostream& os, long value) const
84- {
85- os << value;
86- }
87- void mlperf_logging::print_value (std::ostream& os, size_t value) const
88- {
89- os << value;
90- }
91- void mlperf_logging::print_value (std::ostream& os, std::string value) const
92- {
93- os << value;
94- }
95- /* template <typename T>
96- void mlperf_logging::print_value(std::ostream& os, T value) const
97- {
98- //FIXME: Should I push the value anyway?
99- os << "UNKNOWN_DATA_TYPE";
100- }
101- */
102-
103110size_t mlperf_logging::get_ms_since_epoch ()
104111{
105112 using namespace std ::chrono;
@@ -117,35 +124,24 @@ void mlperf_logging::setup(model *m)
117124 print (os, mlperf_logging::event_type::TIME_POINT, " cache_clear" , value,
118125 __FILE__, __LINE__);
119126
120- // FIXME: Make these user input vars
121- value = " oc20" ;
122127 print (os, mlperf_logging::event_type::TIME_POINT, " submission_benchmark" ,
123- value , __FILE__, __LINE__);
128+ m_sub_benchmark , __FILE__, __LINE__);
124129
125- value = " LBANN" ;
126130 print (os, mlperf_logging::event_type::TIME_POINT, " submission_org" ,
127- value , __FILE__, __LINE__);
131+ m_sub_org , __FILE__, __LINE__);
128132
129- // FIXME: value = closed?
130- value = " closed" ;
131133 print (os, mlperf_logging::event_type::TIME_POINT, " submission_division" ,
132- value , __FILE__, __LINE__);
134+ m_sub_division , __FILE__, __LINE__);
133135
134- // FIXME: value = onprem?
135- value = " onprem" ;
136136 print (os, mlperf_logging::event_type::TIME_POINT, " submission_status" ,
137- value , __FILE__, __LINE__);
137+ m_sub_status , __FILE__, __LINE__);
138138
139- // FIXME: value = SUBMISSION_PLATFORM_PLACEHOLDER?
140- value = " ?" ;
141139 print (os, mlperf_logging::event_type::TIME_POINT, " submission_platform" ,
142- value , __FILE__, __LINE__);
140+ m_sub_platform , __FILE__, __LINE__);
143141
144142 value = " null" ;
145143 print (os, mlperf_logging::event_type::TIME_POINT, " init_start" , value,
146144 __FILE__, __LINE__);
147-
148- H2_INFO (os.str ());
149145}
150146void mlperf_logging::on_setup_end (model *m)
151147{
@@ -227,8 +223,6 @@ void mlperf_logging::on_setup_end(model *m)
227223
228224 print (os, mlperf_logging::event_type::TIME_POINT, " init_stop" , " null" ,
229225 __FILE__, __LINE__);
230-
231- H2_INFO (os.str ());
232226}
233227
234228void mlperf_logging::on_epoch_begin (model *m)
@@ -239,8 +233,6 @@ void mlperf_logging::on_epoch_begin(model *m)
239233
240234 print (os, mlperf_logging::event_type::INT_START, " epoch_start" , " null" ,
241235 __FILE__, __LINE__, epoch);
242-
243- H2_INFO (os.str ());
244236}
245237
246238void mlperf_logging::on_epoch_end (model *m)
@@ -251,8 +243,6 @@ void mlperf_logging::on_epoch_end(model *m)
251243
252244 print (os, mlperf_logging::event_type::INT_START, " epoch_stop" , " null" ,
253245 __FILE__, __LINE__, epoch);
254-
255- H2_INFO (os.str ());
256246}
257247
258248void mlperf_logging::on_train_begin (model *m)
@@ -264,8 +254,6 @@ void mlperf_logging::on_train_begin(model *m)
264254 // FIXME: run_start? Same time stamp as epoch 1 in results
265255 print (os, mlperf_logging::event_type::INT_START, " run_start" , " null" ,
266256 __FILE__, __LINE__, epoch);
267-
268- H2_INFO (os.str ());
269257}
270258
271259void mlperf_logging::on_train_end (model *m)
@@ -277,8 +265,6 @@ void mlperf_logging::on_train_end(model *m)
277265 // FIXME: run_stop? End of training?
278266 print (os, mlperf_logging::event_type::INT_START, " run_stop" , " null" ,
279267 __FILE__, __LINE__, epoch);
280-
281- H2_INFO (os.str ());
282268}
283269
284270void mlperf_logging::on_batch_evaluate_begin (model *m)
@@ -289,8 +275,6 @@ void mlperf_logging::on_batch_evaluate_begin(model *m)
289275
290276 print (os, mlperf_logging::event_type::INT_START, " eval_start" , " null" ,
291277 __FILE__, __LINE__, epoch);
292-
293- H2_INFO (os.str ());
294278}
295279
296280void mlperf_logging::on_batch_evaluate_end (model *m)
@@ -307,8 +291,6 @@ void mlperf_logging::on_batch_evaluate_end(model *m)
307291 print (os, mlperf_logging::event_type::TIME_POINT, " eval_error" ,
308292 static_cast <double >(eval_error), __FILE__,
309293 __LINE__, epoch);
310-
311- H2_INFO (os.str ());
312294}
313295
314296std::unique_ptr<callback_base>
@@ -318,7 +300,12 @@ build_mlperf_logging_callback_from_pbuf(
318300{
319301 const auto & params =
320302 dynamic_cast <const lbann_data::Callback::CallbackMlperfLogging&>(proto_msg);
321- return std::make_unique<mlperf_logging>(params.output_filename ());
303+ return std::make_unique<mlperf_logging>(params.sub_benchmark (),
304+ params.sub_org (),
305+ params.sub_division (),
306+ params.sub_status (),
307+ params.sub_platform (),
308+ params.output_filename ());
322309}
323310} // namespace callback
324311} // namespace lbann
0 commit comments