Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/mnist-learn/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ constexpr size_t active_steps = 10;
constexpr size_t steps_per_image = 15;
constexpr float state_increment_factor = 1.f / 255;
constexpr size_t images_amount_to_train = 60000;
constexpr float dataset_split = 0.8;
constexpr size_t images_amount_to_test = 10000;
constexpr size_t classes_amount = 10;

namespace data_processing = knp::framework::data_processing::classification::images;
Expand Down Expand Up @@ -67,7 +67,7 @@ int main(int argc, char** argv)
dataset.process_labels_and_images(
images_stream, labels_stream, images_amount_to_train, classes_amount, input_size, steps_per_image,
dataset.make_incrementing_image_to_spikes_converter(active_steps, state_increment_factor));
dataset.split(dataset_split);
dataset.split(images_amount_to_train, images_amount_to_test);

std::cout << "Processed dataset, training will last " << dataset.get_steps_required_for_training()
<< " steps, inference " << dataset.get_steps_required_for_inference() << " steps" << std::endl;
Expand Down
39 changes: 17 additions & 22 deletions knp/base-framework/impl/data_processing/classification/dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,35 +21,30 @@

#include <knp/framework/data_processing/classification/dataset.h>

#include <spdlog/spdlog.h>


namespace knp::framework::data_processing::classification
{

void Dataset::split(float split_percent)
void Dataset::split(size_t frames_for_training, size_t frames_for_inference)
{
size_t split_beginning = static_cast<float>(data_for_training_.size()) * split_percent + 0.5F;
for (size_t i = split_beginning; i < data_for_training_.size(); ++i)
data_for_inference_.emplace_back(std::move(data_for_training_[i]));
data_for_training_.erase(data_for_training_.begin() + split_beginning, data_for_training_.end());

/*
* The idea is that, if is too big for required training amount, then inference will be bigger than
* training, so to compensate we make inference smaller, according to split.
*/
if (required_training_amount_ < data_for_training_.size())
{
data_for_training_.resize(required_training_amount_);
data_for_inference_.resize(
static_cast<size_t>(static_cast<float>(data_for_training_.size()) / split_percent) -
data_for_training_.size());
steps_required_for_training_ = steps_per_frame_ * data_for_training_.size();
steps_required_for_inference_ = steps_per_frame_ * data_for_inference_.size();
}
else
if (data_for_training_.size() < frames_for_inference + frames_for_training)
{
steps_required_for_training_ = steps_per_frame_ * required_training_amount_;
steps_required_for_inference_ = steps_per_frame_ * data_for_inference_.size();
SPDLOG_ERROR(
"Incorrect split size. Dataset is too small. Required {} frames for training, and {} frames for inference, "
"while dataset only have {} frames.",
frames_for_training, frames_for_training, data_for_training_.size());
throw std::runtime_error("Dataset too small.");
}

data_for_inference_.insert(
data_for_inference_.begin(), data_for_training_.begin() + frames_for_training,
data_for_training_.begin() + frames_for_training + frames_for_inference);
data_for_training_.resize(frames_for_training);

steps_required_for_training_ = steps_per_frame_ * data_for_training_.size();
steps_required_for_inference_ = steps_per_frame_ * data_for_inference_.size();
}

} // namespace knp::framework::data_processing::classification
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ namespace classification

/**
* @brief The `Dataset` class is the base class for datasets.
* @details A dataset is supposed to be abstracted from its actual processing and characteristics, such as size.
* The size of the dataset is not a crucial factor, as it is handled during the splitting process. The correct
* workflow would be to first process a dataset, then split it, and finally use it for your purposes.
* @details A dataset is supposed to be abstracted from its actual processing and characteristics, such as size.
* The size of the dataset is not a crucial factor, as it is handled during the splitting process. The correct
* workflow would be to first process a dataset, then split it, and finally use it for your purposes.
* Splitting the dataset is important because it calculates the number of steps required for inference and/or training.
*/
class KNP_DECLSPEC Dataset
Expand Down Expand Up @@ -89,19 +89,12 @@ class KNP_DECLSPEC Dataset

public:
/**
* @brief Split the dataset into training and inference sets based on a given ratio.
* @pre The @p split_percent must be within the range [0, 1].
* @param split_percent The proportion of the dataset to be used for training, between 0 and 1.
* @details The dataset is split such that @p split_percent of the data is allocated for training and the remaining
* is allocated for inference. The function also calculates the number of steps required for training and inference.
* If the dataset is too large and only a subset of it is required for training (as specified
* by @ref required_training_amount_), the function adjusts the inference set size accordingly to maintain the specified
* split ratio.
* For example, if @p split_percent is 0.8 and @ref required_training_amount_ is 100, the training set
* will contain 100 records and the inference set will contain 25 records (100 / 0.8 - 100), regardless of the actual
* size of the dataset.
* @brief Split dataset into training and inference based on given requirements.
* @pre Sum of given parameters should be less or equal to size of whole dataset.
* @param frames_for_training Amount of frames for training.
* @param frames_for_inference Amount of frames for inference.
*/
virtual void split(float split_percent);
void split(size_t frames_for_training, size_t frames_for_inference);

/**
* @brief Get training data, consisting of pairs of labels and frames.
Expand Down Expand Up @@ -149,17 +142,19 @@ class KNP_DECLSPEC Dataset

/**
* @brief The structure represents a class instance in the form of spikes, distributed over multiple steps.
* @details This structure encapsulates the spike data for a class instance, which is transmitted over a series of steps.
* For example, an image might be sent over 20 steps, with each step representing a subset of the image data.
* The structure stores a vector of boolean values, where each value indicates whether a spike should be sent at a particular step.
* The length of this vector is determined by the product of the steps per frame and the size of the class instance data.
* @details This structure encapsulates the spike data for a class instance, which is transmitted over a series of
* steps. For example, an image might be sent over 20 steps, with each step representing a subset of the image data.
* The structure stores a vector of boolean values, where each value indicates whether a spike should be sent at a
* particular step. The length of this vector is determined by the product of the steps per frame and the size of
* the class instance data.
*/
struct Frame
{
// cppcheck-suppress unusedStructMember
/**
* @brief A vector of boolean values representing the spike pattern for this frame.
* @note The length of this vector is equal to the number of steps per frame multiplied by the size of the class instance data.
* @note The length of this vector is equal to the number of steps per frame multiplied by the size of the class
* instance data.
*/
std::vector<bool> spikes_;
};
Expand All @@ -183,18 +178,21 @@ class KNP_DECLSPEC Dataset
size_t steps_per_frame_ = 0;

/**
* @brief Total number of steps required for training, calculated based on @ref data_for_training_ and @ref steps_per_frame_.
* @brief Total number of steps required for training, calculated based on @ref data_for_training_ and @ref
* steps_per_frame_.
*/
size_t steps_required_for_training_ = 0;

/**
* @brief Total number of steps required for inference, calculated based on @ref data_for_inference_ and @ref steps_per_frame_.
* @brief Total number of steps required for inference, calculated based on @ref data_for_inference_ and @ref
* steps_per_frame_.
*/
size_t steps_required_for_inference_ = 0;

/**
* @brief User-specified amount of training data required.
* @note If this value is less than the actual size of @ref data_for_training_, the @ref split function adjusts the inference data accordingly.
* @note If this value is less than the actual size of @ref data_for_training_, the @ref split function adjusts the
* inference data accordingly.
*/
size_t required_training_amount_ = 0;

Expand All @@ -206,4 +204,4 @@ class KNP_DECLSPEC Dataset

} // namespace classification

} // knp::framework::data_processing
} //namespace knp::framework::data_processing
Loading