Skip to content

Commit 5495bb6

Browse files
committed
Added callback for mlperf logs
1 parent 5eb080a commit 5495bb6

File tree

7 files changed

+477
-2
lines changed

7 files changed

+477
-2
lines changed

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,15 +224,19 @@ if (LBANN_WITH_DISTCONV)
224224
find_package(DiHydrogen CONFIG REQUIRED COMPONENTS Meta Patterns DistConv)
225225
set(LBANN_HAS_DISTCONV TRUE)
226226
set(LBANN_H2_LIBS
227+
H2::H2Core
227228
H2::H2Meta
228229
H2::H2Patterns
229230
H2::H2DistConv)
230231
else ()
231232
find_package(DiHydrogen CONFIG REQUIRED COMPONENTS Meta Patterns)
232233
set(LBANN_H2_LIBS
234+
H2::H2Core
233235
H2::H2Meta
234236
H2::H2Patterns)
235237
endif ()
238+
#FIXME(KLG): There is no H2CoreConfig.cmake in H2
239+
#find_package(H2Core REQUIRED)
236240
set(LBANN_HAS_DIHYDROGEN TRUE)
237241
message(STATUS "Found DiHydrogen: ${DiHydrogen_DIR}")
238242

@@ -657,6 +661,7 @@ target_link_libraries(lbann PUBLIC
657661
${CLARA_LIBRARIES}
658662
${LBANN_PYTHON_LIBS}
659663
protobuf::libprotobuf
664+
spdlog::spdlog
660665
${CEREAL_LIBRARIES}
661666
ZSTR::ZSTR)
662667

include/lbann/callbacks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ set_full_path(THIS_DIR_HEADERS
5050
learning_rate.hpp
5151
ltfb.hpp
5252
mixup.hpp
53+
mlperf_logging.hpp
5354
monitor_io.hpp
5455
perturb_adam.hpp
5556
perturb_dropout.hpp
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
////////////////////////////////////////////////////////////////////////////////
2+
// Copyright (c) 2014-2022, Lawrence Livermore National Security, LLC.
3+
// Produced at the Lawrence Livermore National Laboratory.
4+
// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5+
// the CONTRIBUTORS file. <[email protected]>
6+
//
7+
// LLNL-CODE-697807.
8+
// All rights reserved.
9+
//
10+
// This file is part of LBANN: Livermore Big Artificial Neural Network
11+
// Toolkit. For details, see http://software.llnl.gov/LBANN or
12+
// https://github.com/LLNL/LBANN.
13+
//
14+
// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15+
// may not use this file except in compliance with the License. You may
16+
// obtain a copy of the License at:
17+
//
18+
// http://www.apache.org/licenses/LICENSE-2.0
19+
//
20+
// Unless required by applicable law or agreed to in writing, software
21+
// distributed under the License is distributed on an "AS IS" BASIS,
22+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23+
// implied. See the License for the specific language governing
24+
// permissions and limitations under the license.
25+
//
26+
// mlperf_logging .hpp .cpp - Prints mlperf compliant benchmark logs
27+
////////////////////////////////////////////////////////////////////////////////
28+
29+
#ifndef LBANN_CALLBACKS_MLPERF_LOGGING_HPP_INCLUDED
30+
#define LBANN_CALLBACKS_MLPERF_LOGGING_HPP_INCLUDED
31+
32+
#include "lbann/callbacks/callback.hpp"
33+
#include <h2/utils/Logger.hpp>
34+
35+
namespace lbann {
36+
namespace callback {
37+
38+
/** @class mlperf_logging
39+
* @brief Callback to print mlperf compliant benchmark logs
40+
*/
41+
class mlperf_logging : public callback_base {
42+
43+
public:
44+
45+
enum class event_type {
46+
TIME_POINT,
47+
INT_START,
48+
INT_END,
49+
};
50+
51+
public:
52+
53+
/** @brief mlperf_logging Constructor.
54+
* @param output_filename Output filename (default = results.txt)
55+
*/
56+
mlperf_logging(std::string output_filename)
57+
: callback_base(/*batch_interval=*/1),
58+
m_output_filename{output_filename.size() ?
59+
std::move(output_filename) :
60+
std::string("results.txt")}
61+
{}
62+
63+
/** @brief Copy interface */
64+
mlperf_logging* copy() const override {
65+
return new mlperf_logging(*this);
66+
}
67+
68+
/** @brief Return name of callback */
69+
std::string name() const override { return "mlperf_logging"; }
70+
71+
/** @brief Push mlperf formatted log string to stream object.
72+
* @param ostream os Stores log strings.
73+
* @param event_type et Type of mlperf style event.
74+
* @param string key Mlperf log key.
75+
* @param T value Mlperf log value.
76+
* @param char const* file Current file name.
77+
* @param size_t line File line number.
78+
* @param double epoch Current epoch number.
79+
*/
80+
template <typename T>
81+
void print(std::ostream& os, mlperf_logging::event_type et, std::string key,
82+
T value, char const* file, size_t line, double epoch = -1) const;
83+
84+
void setup(model *m) override;
85+
void on_setup_end(model *m) override;
86+
void on_epoch_begin(model *m) override;
87+
void on_epoch_end(model *m) override;
88+
void on_train_begin(model *m) override;
89+
void on_train_end(model *m) override;
90+
void on_batch_evaluate_begin(model *m) override;
91+
void on_batch_evaluate_end(model *m) override;
92+
93+
private:
94+
95+
/** @brief Populate log with mlperf event type.
96+
* @param ostream os Stores log string.
97+
* @param event_type et Type of mlperf style event.
98+
*/
99+
void print_event_type(std::ostream& os, mlperf_logging::event_type et) const;
100+
101+
/** @brief Populate log with value.
102+
* @param ostream os Stores log string.
103+
* @param event_type et Mlperf log value.
104+
*/
105+
void print_value(std::ostream& os, double value) const;
106+
void print_value(std::ostream& os, long value) const;
107+
void print_value(std::ostream& os, size_t value) const;
108+
void print_value(std::ostream& os, std::string value) const;
109+
//FIXME: Always picks this function first
110+
//template <typename T>
111+
//void print_value(std::ostream& os, T value) const;
112+
113+
static size_t get_ms_since_epoch();
114+
115+
private:
116+
117+
//FIXME: get logger to output file
118+
/* @brief name of output file. Default = results.txt */
119+
std::string m_output_filename;
120+
121+
//FIXME: Add custom logging tag
122+
/* @brief DiHydrogen logger */
123+
h2::Logger m_logger;
124+
125+
}; // class mlperf_logging
126+
127+
std::unique_ptr<callback_base>
128+
build_mlperf_logging_callback_from_pbuf(
129+
const google::protobuf::Message& proto_msg,
130+
const std::shared_ptr<lbann_summary>&);
131+
132+
} // namespace callback
133+
} // namespace lbann
134+
135+
#endif // LBANN_CALLBACKS_MLPERF_LOGGING_HPP_INCLUDED

src/callbacks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ set_full_path(THIS_DIR_SOURCES
5151
load_model.cpp
5252
ltfb.cpp
5353
mixup.cpp
54+
mlperf_logging.cpp
5455
monitor_io.cpp
5556
perturb_adam.cpp
5657
perturb_dropout.cpp

0 commit comments

Comments
 (0)