Added dataset max size. Removed redundant code: KasperskyLab#123

DavidIkov · DavidIkov · commit 13593489c2cd · 2025-12-19T11:31:53.000+03:00
diff --git a/examples/mnist-learn/altai/construct_network.cpp b/examples/mnist-learn/altai/construct_network.cpp
@@ -108,9 +108,9 @@ auto add_subnetwork_populations(AnnotatedNetwork &result)
     };
     //
     std::vector<PopulationRole> pop_data{
-        {{10 * neurons_per_column, l_neuron}, true, false, "L"},
-        {{10, default_neuron}, true, true, "OUT"},
-        {{10, default_neuron}, false, false, "BIAS"}};
+        {{classes_amount * neurons_per_column, l_neuron}, true, false, "L"},
+        {{classes_amount, default_neuron}, true, true, "OUT"},
+        {{classes_amount, default_neuron}, false, false, "BIAS"}};
 
     std::vector<knp::core::UID> population_uids;
     for (auto &pop_init_data : pop_data)
@@ -163,7 +163,7 @@ AnnotatedNetwork create_example_network(int num_compound_networks)
         TARGET_to_L_synapse.delay_ = 3;
 
         DeltaProjection TARGET_to_L_projection = knp::framework::projection::creators::aligned<DeltaSynapse>(
-            knp::core::UID(false), population_uids[L], 10, pop_data[L].pd_.size_,
+            knp::core::UID(false), population_uids[L], classes_amount, pop_data[L].pd_.size_,
             [&TARGET_to_L_synapse](size_t, size_t) { return TARGET_to_L_synapse; });
         result.network_.add_projection(TARGET_to_L_projection);
         result.data_.projections_from_classes_.push_back(TARGET_to_L_projection.get_uid());
@@ -175,7 +175,7 @@ AnnotatedNetwork create_example_network(int num_compound_networks)
         TARGET_to_L_synapse2.delay_ = 4;
 
         DeltaProjection TARGET_to_L_projection2 = knp::framework::projection::creators::all_to_all<DeltaSynapse>(
-            knp::core::UID(false), population_uids[L], 10, pop_data[L].pd_.size_,
+            knp::core::UID(false), population_uids[L], classes_amount, pop_data[L].pd_.size_,
             [&TARGET_to_L_synapse2](size_t, size_t) { return TARGET_to_L_synapse2; });
         result.network_.add_projection(TARGET_to_L_projection2);
         result.data_.projections_from_classes_.push_back(TARGET_to_L_projection2.get_uid());
@@ -186,7 +186,7 @@ AnnotatedNetwork create_example_network(int num_compound_networks)
         TARGET_to_BIAS_synapse.weight_ = 10 * scale;
 
         DeltaProjection TARGET_to_BIAS_projection = knp::framework::projection::creators::aligned<DeltaSynapse>(
-            knp::core::UID(false), population_uids[BIAS], 10, pop_data[BIAS].pd_.size_,
+            knp::core::UID(false), population_uids[BIAS], classes_amount, pop_data[BIAS].pd_.size_,
             [&TARGET_to_BIAS_synapse](size_t, size_t) { return TARGET_to_BIAS_synapse; });
         result.network_.add_projection(TARGET_to_BIAS_projection);
         result.data_.projections_from_classes_.push_back(TARGET_to_BIAS_projection.get_uid());
diff --git a/examples/mnist-learn/altai/inference.cpp b/examples/mnist-learn/altai/inference.cpp
@@ -83,7 +83,7 @@ std::vector<knp::core::messaging::SpikeMessage> run_mnist_inference(
     std::vector<knp::core::UID> wta_uids;
     {
         std::vector<size_t> wta_borders;
-        for (size_t i = 0; i < num_possible_labels; ++i) wta_borders.push_back(neurons_per_column * (i + 1));
+        for (size_t i = 0; i < classes_amount; ++i) wta_borders.push_back(neurons_per_column * (i + 1));
         wta_uids = knp::framework::projection::add_wta_handlers(
             model_executor, wta_winners_amount, wta_borders, described_network.data_.wta_data_);
     }
diff --git a/examples/mnist-learn/altai/main.cpp b/examples/mnist-learn/altai/main.cpp
@@ -32,9 +32,6 @@
 #include "time_string.h"
 #include "train.h"
 
-constexpr float state_increment_factor = 1.f / 255;
-constexpr size_t classes_amount = 10;
-
 namespace data_processing = knp::framework::data_processing::classification::images;
 namespace inference_evaluation = knp::framework::inference_evaluation::classification;
 
@@ -63,8 +60,8 @@ int main(int argc, char** argv)
 
     data_processing::Dataset dataset;
     dataset.process_labels_and_images(
-        images_stream, labels_stream, images_amount_to_train, classes_amount, input_size, steps_per_image,
-        dataset.make_incrementing_image_to_spikes_converter(active_steps, state_increment_factor));
+        images_stream, labels_stream, images_amount_to_train + images_amount_for_inference, classes_amount, input_size,
+        steps_per_image, dataset.make_incrementing_image_to_spikes_converter(active_steps, state_increment_factor));
     dataset.split(images_amount_to_train, images_amount_for_inference);
 
     std::cout << "Processed dataset, training will last " << dataset.get_steps_required_for_training()
diff --git a/examples/mnist-learn/altai/shared_network.h b/examples/mnist-learn/altai/shared_network.h
@@ -26,31 +26,14 @@
 #include <knp/synapse-traits/all_traits.h>
 
 // Network hyperparameters. You may want to fine-tune these.
-/*
-constexpr float default_threshold = 8.571F;
-constexpr float min_synaptic_weight = -0.7;
-constexpr float max_synaptic_weight = 0.864249F;
-constexpr float base_weight_value = 0.000F;
-constexpr int neuron_dopamine_period = 10;
-constexpr int synapse_dopamine_period = 10;
-constexpr float l_neuron_potential_leak = 1.0 - 1.0 / 3.0;
-constexpr float dopamine_parameter = 0.042F;
-constexpr float dopamine_value = dopamine_parameter;
-constexpr float threshold_weight_coeff = 0.023817F;
-*/
-
-//
-// Network geometry.
-//
 
 // Number of neurons reserved per a single digit.
-// constexpr size_t neurons_per_column = 15;
+constexpr size_t neurons_per_column = 20;
 
-// Ten possible digits, one column per each.
-constexpr size_t num_possible_labels = 10;
+// Ten possible digits, one column for each one.
+constexpr size_t classes_amount = 10;
 
-// All columns are a part of the same population.
-// constexpr size_t num_input_neurons = neurons_per_column * num_possible_labels;
+constexpr float state_increment_factor = 1.f / 255;
 
 // Number of pixels in width for a single MNIST image.
 constexpr size_t input_size_width = 28;
@@ -67,10 +50,5 @@ constexpr size_t steps_per_image = 15;
 /// How many subnetworks to use.
 constexpr size_t num_subnetworks = 1;
 
-constexpr size_t neurons_per_column = 20;
-
 // Number of pixels for a single MNIST image.
 constexpr size_t input_size = input_size_width * input_size_height;
-
-// Dense input projection from 28 * 28 image to population of 150 neurons.
-// constexpr size_t input_projection_size = input_size * num_input_neurons;
diff --git a/examples/mnist-learn/altai/train.cpp b/examples/mnist-learn/altai/train.cpp
@@ -131,7 +131,7 @@ AnnotatedNetwork train_mnist_network(
     std::vector<knp::core::UID> wta_uids;
     {
         std::vector<size_t> wta_borders;
-        for (size_t i = 0; i < num_possible_labels; ++i) wta_borders.push_back(neurons_per_column * (i + 1));
+        for (size_t i = 0; i < classes_amount; ++i) wta_borders.push_back(neurons_per_column * (i + 1));
         wta_uids = knp::framework::projection::add_wta_handlers(
             model_executor, wta_winners_amount, wta_borders, example_network.data_.wta_data_);
     }
diff --git a/knp/base-framework/impl/data_processing/classification/dataset.cpp b/knp/base-framework/impl/data_processing/classification/dataset.cpp
@@ -34,7 +34,7 @@ void Dataset::split(size_t frames_for_training, size_t frames_for_inference)
         SPDLOG_ERROR(
             "Incorrect split size. Dataset is too small. Required {} frames for training, and {} frames for inference, "
             "while dataset only have {} frames.",
-            frames_for_training, frames_for_training, data_for_training_.size());
+            frames_for_training, frames_for_inference, data_for_training_.size());
         throw std::runtime_error("Dataset too small.");
     }
 
diff --git a/knp/base-framework/impl/data_processing/classification/image.cpp b/knp/base-framework/impl/data_processing/classification/image.cpp
@@ -26,17 +26,18 @@ namespace knp::framework::data_processing::classification::images
 {
 
 void Dataset::process_labels_and_images(
-    std::istream &images_stream, std::istream &labels_stream, size_t training_amount, size_t classes_amount,
+    std::istream &images_stream, std::istream &labels_stream, size_t max_images_amount, size_t classes_amount,
     size_t image_size, size_t steps_per_image,
     std::function<Frame(std::vector<uint8_t> const &)> const &image_to_spikes)
 {
     image_size_ = image_size;
     steps_per_frame_ = steps_per_image;
-    required_training_amount_ = training_amount;
     classes_amount_ = classes_amount;
 
     std::vector<uint8_t> image_reading_buffer(image_size, 0);
 
+    data_for_training_.reserve(max_images_amount);
+
     while (images_stream.good() && labels_stream.good())
     {
         images_stream.read(reinterpret_cast<char *>(image_reading_buffer.data()), image_size);
@@ -48,6 +49,8 @@ void Dataset::process_labels_and_images(
 
         // Push to training data set because we dont know dataset size yet for a split
         data_for_training_.push_back({label, std::move(spikes_frame)});
+
+        if (data_for_training_.size() == max_images_amount) break;
     }
 }
 
diff --git a/knp/base-framework/include/knp/framework/data_processing/classification/dataset.h b/knp/base-framework/include/knp/framework/data_processing/classification/dataset.h
@@ -129,12 +129,6 @@ class KNP_DECLSPEC Dataset
      */
     [[nodiscard]] inline size_t get_steps_required_for_inference() const { return steps_required_for_inference_; }
 
-    /**
-     * @brief Get the user-specified amount of training data required.
-     * @return required training amount, which may affect the allocation of data for inference.
-     */
-    [[nodiscard]] inline size_t get_required_training_amount() const { return required_training_amount_; }
-
     /**
      * @brief Get the number of classes in the dataset.
      * @return number of classes.
@@ -190,13 +184,6 @@ class KNP_DECLSPEC Dataset
      */
     size_t steps_required_for_inference_ = 0;
 
-    /**
-     * @brief User-specified amount of training data required.
-     * @note If this value is less than the actual size of @ref data_for_training_, the @ref split function adjusts the
-     * inference data accordingly.
-     */
-    size_t required_training_amount_ = 0;
-
     /**
      * @brief Number of classes in the dataset.
      */
diff --git a/knp/base-framework/include/knp/framework/data_processing/classification/image.h b/knp/base-framework/include/knp/framework/data_processing/classification/image.h
@@ -46,52 +46,54 @@ class KNP_DECLSPEC Dataset final : public classification::Dataset
      * @brief Process labels and images, converting the images to spike form and creating data pairs.
      * @param images_stream stream containing the raw image data.
      * @param labels_stream stream containing the corresponding labels.
-     * @param training_amount desired number of images to use for training.
+     * @param max_images_amount maximum amount of images that should be processed.
      * @param classes_amount total number of classes in the dataset.
      * @param image_size size of each image in bytes.
      * @param steps_per_image number of steps required to transmit an image in spike form to a model.
      * @param image_to_spikes function that converts raw image data to spike form, returning a `Frame` object.
-     * @details This method reads images and labels from the provided streams, converts each image to spike form 
-     * using the provided converter function, and creates data pairs consisting of the label and spike frame. 
+     * @details This method reads images and labels from the provided streams, converts each image to spike form
+     * using the provided converter function, and creates data pairs consisting of the label and spike frame.
      * The data pairs are added to the training dataset.
      */
     void process_labels_and_images(
-        std::istream &images_stream, std::istream &labels_stream, size_t training_amount, size_t classes_amount,
+        std::istream &images_stream, std::istream &labels_stream, size_t max_images_amount, size_t classes_amount,
         size_t image_size, size_t steps_per_image,
         std::function<Frame(std::vector<uint8_t> const &)> const &image_to_spikes);
 
     /**
      * @brief Create a generator that produces spike data from training labels.
      * @return functor for generating spikes from training labels.
-     * @details The generated spike data is created by iterating over the training labels in a loop, with each label repeated at regular intervals.
+     * @details The generated spike data is created by iterating over the training labels in a loop, with each label
+     * repeated at regular intervals.
      */
     [[nodiscard]] std::function<knp::core::messaging::SpikeData(knp::core::Step)> make_training_labels_generator()
         const;
 
     /**
      * @brief Create a generator that produces spike data from training images.
      * @return functor for generating spikes from training images.
-     * @details The spike data is generated by iterating over the training images in a looped manner, where each image is divided into
-     * frames. For each frame, the corresponding spike data is extracted and returned.
+     * @details The spike data is generated by iterating over the training images in a looped manner, where each image
+     * is divided into frames. For each frame, the corresponding spike data is extracted and returned.
      */
     [[nodiscard]] std::function<knp::core::messaging::SpikeData(knp::core::Step)>
     make_training_images_spikes_generator() const;
 
     /**
      * @brief Create a generator that produces spike data from inference images.
      * @return functor for generating spikes from inference images.
-     * @details The spike data is generated by iterating over the inference images in a looped manner, where each image is divided into
-     * frames. For each frame, the corresponding spike data is extracted and returned.
+     * @details The spike data is generated by iterating over the inference images in a looped manner, where each image
+     * is divided into frames. For each frame, the corresponding spike data is extracted and returned.
      */
     [[nodiscard]] std::function<knp::core::messaging::SpikeData(knp::core::Step)>
     make_inference_images_spikes_generator() const;
 
     /**
      * @brief Create an incrementing image to spikes converter.
-     * @details This converter generates spikes based on the input image data, considering the specified number of active steps
-     * and the state increment factor. Spikes are sent for the active steps, and no spikes are sent for the remaining steps
-     * until the total steps per image (@ref steps_per_frame_) are reached.
-     * @param active_steps number of active steps, which must be less than the total steps per image (@ref steps_per_frame_).
+     * @details This converter generates spikes based on the input image data, considering the specified number of
+     * active steps and the state increment factor. Spikes are sent for the active steps, and no spikes are sent for the
+     * remaining steps until the total steps per image (@ref steps_per_frame_) are reached.
+     * @param active_steps number of active steps, which must be less than the total steps per image (@ref
+     * steps_per_frame_).
      * @param state_increment_factor factor by which the state is incremented for each input value.
      * @return functor that converts raw image data to spikes.
      */
diff --git a/knp/tests/framework/data_processing_test.cpp b/knp/tests/framework/data_processing_test.cpp
@@ -38,7 +38,6 @@ TEST(DataProcessing, ImageClassification)
 
     ASSERT_EQ(dataset.get_image_size(), image_size);
     ASSERT_EQ(dataset.get_amount_of_classes(), classes_amount);
-    ASSERT_EQ(dataset.get_required_training_amount(), training_amount);
     ASSERT_EQ(dataset.get_steps_per_frame(), steps_per_image);
     ASSERT_EQ(dataset.get_steps_required_for_training(), training_amount);
     ASSERT_EQ(dataset.get_steps_required_for_inference(), inference_amount);

Original file line number	Diff line number	Diff line change
`@@ -83,7 +83,7 @@ std::vector<knp::core::messaging::SpikeMessage> run_mnist_inference(`
`83`	`83`	`std::vector<knp::core::UID> wta_uids;`
`84`	`84`	`{`
`85`	`85`	`std::vector<size_t> wta_borders;`
`86`		`- for (size_t i = 0; i < num_possible_labels; ++i) wta_borders.push_back(neurons_per_column * (i + 1));`
	`86`	`+ for (size_t i = 0; i < classes_amount; ++i) wta_borders.push_back(neurons_per_column * (i + 1));`
`87`	`87`	`wta_uids = knp::framework::projection::add_wta_handlers(`
`88`	`88`	`model_executor, wta_winners_amount, wta_borders, described_network.data_.wta_data_);`
`89`	`89`	`}`
Original file line number	Diff line number	Diff line change
`@@ -131,7 +131,7 @@ AnnotatedNetwork train_mnist_network(`
`131`	`131`	`std::vector<knp::core::UID> wta_uids;`
`132`	`132`	`{`
`133`	`133`	`std::vector<size_t> wta_borders;`
`134`		`- for (size_t i = 0; i < num_possible_labels; ++i) wta_borders.push_back(neurons_per_column * (i + 1));`
	`134`	`+ for (size_t i = 0; i < classes_amount; ++i) wta_borders.push_back(neurons_per_column * (i + 1));`
`135`	`135`	`wta_uids = knp::framework::projection::add_wta_handlers(`
`136`	`136`	`model_executor, wta_winners_amount, wta_borders, example_network.data_.wta_data_);`
`137`	`137`	`}`
Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ void Dataset::split(size_t frames_for_training, size_t frames_for_inference)`
`34`	`34`	`SPDLOG_ERROR(`
`35`	`35`	`"Incorrect split size. Dataset is too small. Required {} frames for training, and {} frames for inference, "`
`36`	`36`	`"while dataset only have {} frames.",`
`37`		`- frames_for_training, frames_for_training, data_for_training_.size());`
	`37`	`+ frames_for_training, frames_for_inference, data_for_training_.size());`
`38`	`38`	`throw std::runtime_error("Dataset too small.");`
`39`	`39`	`}`
`40`	`40`
Original file line number	Diff line number	Diff line change
`@@ -26,17 +26,18 @@ namespace knp::framework::data_processing::classification::images`
`26`	`26`	`{`
`27`	`27`
`28`	`28`	`void Dataset::process_labels_and_images(`
`29`		`- std::istream &images_stream, std::istream &labels_stream, size_t training_amount, size_t classes_amount,`
	`29`	`+ std::istream &images_stream, std::istream &labels_stream, size_t max_images_amount, size_t classes_amount,`
`30`	`30`	`size_t image_size, size_t steps_per_image,`
`31`	`31`	`std::function<Frame(std::vector<uint8_t> const &)> const &image_to_spikes)`
`32`	`32`	`{`
`33`	`33`	`image_size_ = image_size;`
`34`	`34`	`steps_per_frame_ = steps_per_image;`
`35`		`- required_training_amount_ = training_amount;`
`36`	`35`	`classes_amount_ = classes_amount;`
`37`	`36`
`38`	`37`	`std::vector<uint8_t> image_reading_buffer(image_size, 0);`
`39`	`38`
	`39`	`+ data_for_training_.reserve(max_images_amount);`
	`40`	`+`
`40`	`41`	`while (images_stream.good() && labels_stream.good())`
`41`	`42`	`{`
`42`	`43`	`images_stream.read(reinterpret_cast<char *>(image_reading_buffer.data()), image_size);`
`@@ -48,6 +49,8 @@ void Dataset::process_labels_and_images(`
`48`	`49`
`49`	`50`	`// Push to training data set because we dont know dataset size yet for a split`
`50`	`51`	`data_for_training_.push_back({label, std::move(spikes_frame)});`
	`52`	`+`
	`53`	`+ if (data_for_training_.size() == max_images_amount) break;`
`51`	`54`	`}`
`52`	`55`	`}`
`53`	`56`