Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,7 @@ add_subdirectory(applications/CANDLE/pilot2/tools)
add_subdirectory(applications/ATOM/utils)
add_subdirectory(tests)
add_subdirectory(scripts)
add_subdirectory(core-driver)

################################################################
# Install LBANN
Expand Down
2 changes: 1 addition & 1 deletion cmake/configure_files/LBANNConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ set(LBANN_HAS_DIHYDROGEN @LBANN_HAS_DIHYDROGEN@)
set(LBANN_HAS_DISTCONV @LBANN_HAS_DISTCONV@)
set(LBANN_HAS_DOXYGEN @LBANN_HAS_DOXYGEN@)
set(LBANN_HAS_EMBEDDED_PYTHON @LBANN_HAS_EMBEDDED_PYTHON@)
set(LBANN_HAS_FFTW @LBANN_HAS_FFTW@
set(LBANN_HAS_FFTW @LBANN_HAS_FFTW@)
set(LBANN_HAS_FFTW_FLOAT @LBANN_HAS_FFTW_FLOAT@)
set(LBANN_HAS_FFTW_DOUBLE @LBANN_HAS_FFTW_DOUBLE@)
set(LBANN_HAS_GPU_FP16 @LBANN_HAS_GPU_FP16@)
Expand Down
21 changes: 17 additions & 4 deletions core-driver/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
cmake_minimum_required(VERSION 3.18.0)
project(my_lbann_test C CXX)
cmake_minimum_required(VERSION 3.21.0)
project(my_lbann_test CXX)
find_package(LBANN 0.102.0 REQUIRED)
add_executable(Main main.cpp)
target_link_libraries(Main PRIVATE LBANN::lbann)
find_package(Conduit CONFIG REQUIRED)
add_executable(lbann-core main.cpp)
target_link_libraries(lbann-core PRIVATE LBANN::lbann)

#target_link_libraries(lbann-bin lbann)
set_target_properties(lbann-core
PROPERTIES
OUTPUT_NAME lbann-core-driver
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

#list(APPEND LBANN_EXE_TGTS lbann-core)

install(TARGETS lbann-core
EXPORT LBANNTargets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
107 changes: 93 additions & 14 deletions core-driver/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@
#include <mpi.h>
#include <stdio.h>

// Add test-specific options
void construct_opts(int argc, char **argv) {
auto& arg_parser = lbann::global_argument_parser();
lbann::construct_std_options();
lbann::construct_datastore_options();
arg_parser.add_option("samples",
{"-n"},
"Number of samples to run inference on",
Expand All @@ -52,20 +55,76 @@ void construct_opts(int argc, char **argv) {
"Number of labels in dataset",
10);
arg_parser.add_option("minibatchsize",
{"-mbs"},
{"--mbs"},
"Number of samples in a mini-batch",
16);
arg_parser.add_flag("use_conduit",
{"--conduit"},
"Use Conduit node samples (Default is non-distributed matrix)");
arg_parser.add_flag("use_dist_matrix",
{"--dist"},
"Use Hydrogen distributed matrix (Default is non-distributed matrix)");
arg_parser.add_required_argument<std::string>
("model",
"Directory containing checkpointed model");
arg_parser.parse(argc, argv);
}

El::DistMatrix<float, El::STAR, El::STAR, El::ELEMENT, El::Device::CPU>
random_samples(El::Grid const& g, int n, int c, int h, int w) {
// Generates random samples and labels for mnist data in Hydrogen matrix
std::map<
std::string,
El::Matrix<float, El::Device::CPU>>
mat_mnist_samples(int n, int c, int h, int w)
{
El::Matrix<float, El::Device::CPU>
samples(c * h * w, n);
El::MakeUniform(samples);
El::Matrix<float, El::Device::CPU>
labels(1, n);
El::MakeUniform(labels);
std::map<
std::string,
El::Matrix<float, El::Device::CPU>>
samples_map = {{"data/samples", samples}, {"data/labels", labels}};
return samples_map;
}

// Generates random samples and labels for mnist data in Hydrogen distributed matrix
std::map<
std::string,
El::DistMatrix<float, El::STAR, El::STAR, El::ELEMENT, El::Device::CPU>>
distmat_mnist_samples(El::Grid const& g, int n, int c, int h, int w)
{
El::DistMatrix<float, El::STAR, El::STAR, El::ELEMENT, El::Device::CPU>
samples(n, c * h * w, g);
samples(c * h * w, n, g);
El::MakeUniform(samples);
El::DistMatrix<float, El::STAR, El::STAR, El::ELEMENT, El::Device::CPU>
labels(1, n, g);
El::MakeUniform(labels);
std::map<
std::string,
El::DistMatrix<float, El::STAR, El::STAR, El::ELEMENT, El::Device::CPU>>
samples_map = {{"data/samples", samples}, {"data/labels", labels}};
return samples_map;
}

// Fills array with random values
void random_fill(float *arr, int size, int max_val=255) {
for (int i; i < size; i++) {
arr[i] = (float)(std::rand() % max_val) / (float)max_val;
}
}

// Generates random samples and labels for mnist data in vector of Conduit nodes
std::vector<conduit::Node> conduit_mnist_samples(int n, int c, int h, int w) {
std::vector<conduit::Node> samples(n);
int sample_size = c * h * w;
float this_sample[sample_size];
for (int i; i<n; i++) {
random_fill(this_sample, sample_size);
samples[i]["data/samples"].set(this_sample, sample_size);
samples[i]["data/labels"] = std::rand() % 10;
}
return samples;
}

Expand All @@ -79,10 +138,13 @@ int main(int argc, char **argv) {
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);

// Get input arguments and print values
// Get input arguments, check and print values
construct_opts(argc, argv);
auto& arg_parser = lbann::global_argument_parser();
if (rank == 0) {
if (arg_parser.get<bool>("use_conduit") && arg_parser.get<bool>("use_dist_matrix")) {
LBANN_ERROR("Cannot use conduit node and distributed matrix together, choose one: --conduit --dist");
}
std::stringstream msg;
msg << "Model: " << arg_parser.get<std::string>("model") << std::endl;
msg << "{ N, c, h, w } = { " << arg_parser.get<int>("samples") << ", ";
Expand All @@ -94,8 +156,8 @@ int main(int argc, char **argv) {
std::cout << msg.str();
}

// Load model and run inference on samples
auto lbann_comm = lbann::initialize_lbann(MPI_COMM_WORLD);

auto m = lbann::load_inference_model(lbann_comm.get(),
arg_parser.get<std::string>("model"),
arg_parser.get<int>("minibatchsize"),
Expand All @@ -105,14 +167,31 @@ int main(int argc, char **argv) {
arg_parser.get<int>("width")
},
{arg_parser.get<int>("labels")});
auto samples = random_samples(lbann_comm->get_trainer_grid(),
arg_parser.get<int>("samples"),
arg_parser.get<int>("channels"),
arg_parser.get<int>("height"),
arg_parser.get<int>("width"));
auto labels = lbann::infer(m.get(),
samples,
arg_parser.get<int>("minibatchsize"));

// three options for data generation
if (arg_parser.get<bool>("use_conduit")) {
auto samples = conduit_mnist_samples(arg_parser.get<int>("samples"),
arg_parser.get<int>("channels"),
arg_parser.get<int>("height"),
arg_parser.get<int>("width"));
lbann::set_inference_samples(samples);
} else if (arg_parser.get<bool>("use_dist_matrix")) {
auto samples = distmat_mnist_samples(lbann_comm->get_trainer_grid(),
arg_parser.get<int>("samples"),
arg_parser.get<int>("channels"),
arg_parser.get<int>("height"),
arg_parser.get<int>("width"));
lbann::set_inference_samples(samples);
} else {
auto samples = mat_mnist_samples(
arg_parser.get<int>("samples"),
arg_parser.get<int>("channels"),
arg_parser.get<int>("height"),
arg_parser.get<int>("width"));
lbann::set_inference_samples(samples);
}

auto labels = lbann::inference(m.get());

// Print inference results
if (lbann_comm->am_world_master()) {
Expand Down
10 changes: 10 additions & 0 deletions core-driver/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export AL_PROGRESS_RANKS_PER_NUMA_NODE=2
export OMP_NUM_THREADS=8
export MV2_USE_RDMA_CM=0

# This should be a checkpointed lenet model
MODEL_LOC="path/to/checkpointed/model"

./Main $MODEL_LOC
./Main $MODEL_LOC --dist
./Main $MODEL_LOC --conduit
1 change: 1 addition & 0 deletions include/lbann/data_ingestion/readers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set_full_path(THIS_DIR_HEADERS
metadata.hpp
# Data readers
data_reader_cifar10.hpp
data_reader_conduit.hpp
data_reader_csv.hpp
data_reader_image.hpp
data_reader_HDF5.hpp
Expand Down
72 changes: 72 additions & 0 deletions include/lbann/data_ingestion/readers/data_reader_conduit.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
// Produced at the Lawrence Livermore National Laboratory.
// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
// the CONTRIBUTORS file. <[email protected]>
//
// LLNL-CODE-697807.
// All rights reserved.
//
// This file is part of LBANN: Livermore Big Artificial Neural Network
// Toolkit. For details, see http://software.llnl.gov/LBANN or
// https://github.com/LLNL/LBANN.
//
// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
// may not use this file except in compliance with the License. You may
// obtain a copy of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the license.
////////////////////////////////////////////////////////////////////////////////

#ifndef LBANN_DATA_READER_CONDUIT_HPP
#define LBANN_DATA_READER_CONDUIT_HPP

#include "lbann/data_readers/data_reader.hpp"
#include "lbann/data_store/data_store_conduit.hpp"

namespace lbann {
/**
* A generalized data reader for passed in conduit nodes.
*/
class conduit_data_reader : public generic_data_reader
{
public:
conduit_data_reader* copy() const override { return new conduit_data_reader(*this); }
bool has_conduit_output() override { return true; }
void load() override;
bool fetch_conduit_node(conduit::Node& sample, int data_id) override;

void set_data_dims(std::vector<int> dims);
void set_label_dims(std::vector<int> dims);

std::string get_type() const override { return "conduit_data_reader"; }
int get_linearized_data_size() const override {
int data_size = 1;
for(int i : m_data_dims) {
data_size *= i;
}
return data_size;
}
int get_linearized_label_size() const override {
int label_size = 1;
for(int i : m_label_dims) {
label_size *= i;
}
return label_size;
}

protected:
std::vector<int> m_data_dims;
std::vector<int> m_label_dims;

}; // END: class conduit_data_reader

} // namespace lbann

#endif // LBANN_DATA_READER_CONDUIT_HPP
Loading