diff --git a/bindings/py/cpp_src/bindings/algorithms/py_SDRClassifier.cpp b/bindings/py/cpp_src/bindings/algorithms/py_SDRClassifier.cpp index 1613c8ba7b..27dfe5f29c 100644 --- a/bindings/py/cpp_src/bindings/algorithms/py_SDRClassifier.cpp +++ b/bindings/py/cpp_src/bindings/algorithms/py_SDRClassifier.cpp @@ -138,11 +138,11 @@ Example Usage: # Give the predictor partial information, and make predictions # about the future. pred.reset() - A = pred.infer( 0, sequence[0] ) + A = pred.infer( sequence[0] ) numpy.argmax( A[1] ) -> labels[1] numpy.argmax( A[2] ) -> labels[2] - B = pred.infer( 1, sequence[1] ) + B = pred.infer( sequence[1] ) numpy.argmax( B[1] ) -> labels[2] numpy.argmax( B[2] ) -> labels[3] )"); @@ -162,14 +162,10 @@ R"(For use with time series datasets.)"); py_Predictor.def("infer", &Predictor::infer, R"(Compute the likelihoods. -Argument recordNum is an incrementing integer for each record. -Gaps in numbers correspond to missing records. - Argument pattern is the SDR containing the active input bits. Returns a dictionary whos keys are prediction steps, and values are PDFs. See help(Classifier.infer) for details about PDFs.)", - py::arg("recordNum"), py::arg("pattern")); py_Predictor.def("learn", &Predictor::learn, diff --git a/bindings/py/tests/algorithms/sdr_classifier_test.py b/bindings/py/tests/algorithms/sdr_classifier_test.py index 3519793e75..15225629ed 100644 --- a/bindings/py/tests/algorithms/sdr_classifier_test.py +++ b/bindings/py/tests/algorithms/sdr_classifier_test.py @@ -69,11 +69,11 @@ def testExampleUsage(self): # Give the predictor partial information, and make predictions # about the future. pred.reset() - A = pred.infer( 0, sequence[0] ) + A = pred.infer( sequence[0] ) assert( numpy.argmax( A[1] ) == labels[1] ) assert( numpy.argmax( A[2] ) == labels[2] ) - B = pred.infer( 1, sequence[1] ) + B = pred.infer( sequence[1] ) assert( numpy.argmax( B[1] ) == labels[2] ) assert( numpy.argmax( B[2] ) == labels[3] ) @@ -121,7 +121,7 @@ def testSingleValue0Steps(self): for recordNum in range(10): pred.learn(recordNum, inp, 2) - retval = pred.infer( 10, inp ) + retval = pred.infer( inp ) self.assertGreater(retval[0][2], 0.9) @@ -131,15 +131,18 @@ def testComputeInferOrLearnOnly(self): inp.randomize( .3 ) # learn only - c.infer(recordNum=0, pattern=inp) # Don't crash with not enough training data. + with self.assertRaises(RuntimeError): + c.infer(pattern=inp) # crash with not enough training data. c.learn(recordNum=0, pattern=inp, classification=4) - c.infer(recordNum=1, pattern=inp) # Don't crash with not enough training data. + with self.assertRaises(RuntimeError): + c.infer(pattern=inp) # crash with not enough training data. c.learn(recordNum=2, pattern=inp, classification=4) c.learn(recordNum=3, pattern=inp, classification=4) + c.infer(pattern=inp) # Don't crash with not enough training data. # infer only - retval1 = c.infer(recordNum=5, pattern=inp) - retval2 = c.infer(recordNum=6, pattern=inp) + retval1 = c.infer(pattern=inp) + retval2 = c.infer(pattern=inp) self.assertSequenceEqual(list(retval1[1]), list(retval2[1])) @@ -164,7 +167,7 @@ def testComputeComplex(self): classification=4,) inp.sparse = [1, 5, 9] - result = c.infer(recordNum=4, pattern=inp) + result = c.infer(pattern=inp) self.assertSetEqual(set(result.keys()), set([1])) self.assertEqual(len(result[1]), 6) @@ -206,7 +209,7 @@ def testMultistepSingleValue(self): for recordNum in range(10): classifier.learn(recordNum, inp, 0) - retval = classifier.infer(10, inp) + retval = classifier.infer(inp) # Should have a probability of 100% for that bucket. self.assertEqual(retval[1], [1.]) @@ -221,7 +224,7 @@ def testMultistepSimple(self): inp.sparse = [i % 10] classifier.learn(recordNum=i, pattern=inp, classification=(i % 10)) - retval = classifier.infer(99, inp) + retval = classifier.infer(inp) self.assertGreater(retval[1][0], 0.99) for i in range(1, 10): @@ -267,7 +270,7 @@ def testMissingRecords(self): # At this point, we should have learned [1,3,5] => bucket 1 # [2,4,6] => bucket 2 inp.sparse = [1, 3, 5] - result = c.infer(recordNum=recordNum, pattern=inp) + result = c.infer(pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=2) recordNum += 1 self.assertLess(result[1][0], 0.1) @@ -275,7 +278,7 @@ def testMissingRecords(self): self.assertLess(result[1][2], 0.1) inp.sparse = [2, 4, 6] - result = c.infer(recordNum=recordNum, pattern=inp) + result = c.infer(pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=1) recordNum += 1 self.assertLess(result[1][0], 0.1) @@ -289,7 +292,7 @@ def testMissingRecords(self): # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [1, 3, 5] - result = c.infer(recordNum=recordNum, pattern=inp) + result = c.infer(pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) @@ -300,7 +303,7 @@ def testMissingRecords(self): # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [2, 4, 6] - result = c.infer(recordNum=recordNum, pattern=inp) + result = c.infer(pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) @@ -311,7 +314,7 @@ def testMissingRecords(self): # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [1, 3, 5] - result = c.infer(recordNum=recordNum, pattern=inp) + result = c.infer(pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) @@ -548,8 +551,8 @@ def testMultiStepPredictions(self): c.learn(recordNum, pattern=SDR2, classification=1) recordNum += 1 - result1 = c.infer(recordNum, SDR1) - result2 = c.infer(recordNum, SDR2) + result1 = c.infer(SDR1) + result2 = c.infer(SDR2) self.assertAlmostEqual(result1[0][0], 1.0, places=1) self.assertAlmostEqual(result1[0][1], 0.0, places=1) diff --git a/src/htm/algorithms/SDRClassifier.cpp b/src/htm/algorithms/SDRClassifier.cpp index 9904735c84..45d6573b6d 100644 --- a/src/htm/algorithms/SDRClassifier.cpp +++ b/src/htm/algorithms/SDRClassifier.cpp @@ -38,32 +38,18 @@ void Classifier::initialize(const Real alpha) { NTA_CHECK(alpha > 0.0f); alpha_ = alpha; - dimensions_.clear(); + dimensions_ = 0; numCategories_ = 0u; weights_.clear(); } -PDF Classifier::infer(const SDR & pattern) -{ - // Check input dimensions, or if this is the first time the Classifier has - // been used then initialize it with the given SDR's dimensions. - if( dimensions_.empty() ) { - dimensions_ = pattern.dimensions; - while( weights_.size() < pattern.size ) { - weights_.push_back( vector( numCategories_, 0.0f )); - } - } else if( pattern.dimensions != dimensions_ ) { - stringstream err_msg; - err_msg << "Classifier input SDR.dimensions mismatch: previously given SDR with dimensions ( "; - for( auto dim : dimensions_ ) - { err_msg << dim << " "; } - err_msg << "), now given SDR with dimensions ( "; - for( auto dim : pattern.dimensions ) - { err_msg << dim << " "; } - err_msg << ")."; - NTA_THROW << err_msg.str(); - } +PDF Classifier::infer(const SDR & pattern) const { + // Check input dimensions, or if this is the first time the Classifier is used and dimensions + // are unset, return zeroes. + NTA_CHECK( dimensions_ != 0 ) + << "Classifier: must call `learn` before `infer`."; + NTA_ASSERT(pattern.size == dimensions_) << "Input SDR does not match previously seen size!"; // Accumulate feed forward input. PDF probabilities( numCategories_, 0.0f ); @@ -81,8 +67,19 @@ PDF Classifier::infer(const SDR & pattern) void Classifier::learn(const SDR &pattern, const vector &categoryIdxList) { + // If this is the first time the Classifier is being used, weights are empty, + // so we set the dimensions to that of the input `pattern` + if( dimensions_ == 0 ) { + dimensions_ = pattern.size; + while( weights_.size() < pattern.size ) { + const auto initialEmptyWeights = PDF( numCategories_, 0.0f ); + weights_.push_back( initialEmptyWeights ); + } + } + NTA_ASSERT(pattern.size == dimensions_) << "Input SDR does not match previously seen size!"; + // Check if this is a new category & resize the weights table to hold it. - const auto maxCategoryIdx = *max_element(categoryIdxList.begin(), categoryIdxList.end()); + const auto maxCategoryIdx = *max_element(categoryIdxList.cbegin(), categoryIdxList.cend()); if( maxCategoryIdx >= numCategories_ ) { numCategories_ = maxCategoryIdx + 1; for( auto & vec : weights_ ) { @@ -93,7 +90,7 @@ void Classifier::learn(const SDR &pattern, const vector &categoryIdxList) } // Compute errors and update weights. - const vector error = calculateError_(categoryIdxList, pattern); + const auto& error = calculateError_(categoryIdxList, pattern); for( const auto& bit : pattern.getSparse() ) { for(size_t i = 0u; i < numCategories_; i++) { weights_[bit][i] += alpha_ * error[i]; @@ -103,9 +100,8 @@ void Classifier::learn(const SDR &pattern, const vector &categoryIdxList) // Helper function to compute the error signal in learning. -std::vector Classifier::calculateError_( - const std::vector &categoryIdxList, const SDR &pattern) -{ +std::vector Classifier::calculateError_(const std::vector &categoryIdxList, + const SDR &pattern) const { // compute predicted likelihoods auto likelihoods = infer(pattern); @@ -165,56 +161,49 @@ void Predictor::reset() { } -Predictions Predictor::infer(const UInt recordNum, const SDR &pattern) -{ - updateHistory_( recordNum, pattern ); - +Predictions Predictor::infer(const SDR &pattern) const { Predictions result; for( const auto step : steps_ ) { - result[step] = classifiers_[step].infer( pattern ); + result.insert({step, classifiers_.at(step).infer( pattern )}); } return result; } -void Predictor::learn(const UInt recordNum, const SDR &pattern, +void Predictor::learn(const UInt recordNum, //TODO make recordNum optional, autoincrement as steps + const SDR &pattern, const std::vector &bucketIdxList) { - updateHistory_( recordNum, pattern ); + checkMonotonic_(recordNum); + + // Update pattern history if this is a new record. + const UInt lastRecordNum = recordNumHistory_.empty() ? 0 : recordNumHistory_.back(); + if (recordNumHistory_.size() == 0u || recordNum > lastRecordNum) { + patternHistory_.emplace_back( pattern ); + recordNumHistory_.push_back(recordNum); + if (patternHistory_.size() > steps_.back() + 1u) { //steps_ are sorted, so steps_.back() is the "oldest/deepest" N-th step (ie 10 of [1,2,10]) + patternHistory_.pop_front(); + recordNumHistory_.pop_front(); + } + } // Iterate through all recently given inputs, starting from the furthest in the past. auto pastPattern = patternHistory_.begin(); auto pastRecordNum = recordNumHistory_.begin(); - for( ; pastRecordNum != recordNumHistory_.end(); pastPattern++, pastRecordNum++ ) + for( ; pastRecordNum != recordNumHistory_.cend(); pastPattern++, pastRecordNum++ ) { const UInt nSteps = recordNum - *pastRecordNum; // Update weights. if( binary_search( steps_.begin(), steps_.end(), nSteps )) { - classifiers_[nSteps].learn( *pastPattern, bucketIdxList ); + classifiers_.at(nSteps).learn( *pastPattern, bucketIdxList ); } } } -void Predictor::updateHistory_(const UInt recordNum, const SDR & pattern) -{ +void Predictor::checkMonotonic_(const UInt recordNum) const { // Ensure that recordNum increases monotonically. - UInt lastRecordNum = -1; - if( not recordNumHistory_.empty() ) { - lastRecordNum = recordNumHistory_.back(); - if (recordNum < lastRecordNum) { - NTA_THROW << "The record number must increase monotonically."; - } - } - - // Update pattern history if this is a new record. - if (recordNumHistory_.size() == 0u || recordNum > lastRecordNum) { - patternHistory_.emplace_back( pattern ); - recordNumHistory_.push_back(recordNum); - if (patternHistory_.size() > steps_.back() + 1u) { - patternHistory_.pop_front(); - recordNumHistory_.pop_front(); - } - } + const UInt lastRecordNum = recordNumHistory_.empty() ? 0 : recordNumHistory_.back(); + NTA_CHECK(recordNum >= lastRecordNum) << "The record number must increase monotonically."; } diff --git a/src/htm/algorithms/SDRClassifier.hpp b/src/htm/algorithms/SDRClassifier.hpp index 63f2221ef1..7b662925e3 100644 --- a/src/htm/algorithms/SDRClassifier.hpp +++ b/src/htm/algorithms/SDRClassifier.hpp @@ -21,13 +21,23 @@ /** @file * Definitions for the SDR Classifier & Predictor. + * + * `Classifier` learns mapping from SDR->input value (encoder's output). + * This is used when you need to "explain" the HTM network back to real-world, + * ie. mapping SDRs back to digits in MNIST digit classification task. + * + * `Predictor` has similar functionality for time-sequences + * where you want to "predict" N-steps ahead and then return real-world value. + * Internally it uses (several) Classifiers, and in nupic.core this used to be + * a part for SDRClassifier, for `htm.core` this is a separate class `Predictor`. + * */ #ifndef NTA_SDR_CLASSIFIER_HPP #define NTA_SDR_CLASSIFIER_HPP #include -#include +#include #include #include @@ -43,7 +53,8 @@ namespace htm { * * See also: https://en.wikipedia.org/wiki/Probability_distribution */ -using PDF = std::vector; +using PDF = std::vector; //Real64 (not Real/float) must be used here, +// ... otherwise precision is lost and Predictor never reaches sufficient results. /** * Returns the category with the greatest probablility. @@ -98,8 +109,11 @@ class Classifier : public Serializable /** * Constructor. * - * @param alpha - The alpha used to adapt the weight matrix during learning. A - * larger alpha results in faster adaptation to the data. + * @param alpha - The alpha used to adapt the weight matrix during learning. + * A larger alpha results in faster adaptation to the data. + * Note: when SDRs are formed correctly, the classification task + * for this class is quite easy, so you likely will never need to + * optimize this parameter. */ Classifier(Real alpha = 0.001f ); @@ -115,7 +129,7 @@ class Classifier : public Serializable * @returns: The Probablility Distribution Function (PDF) of the categories. * This is indexed by the category label. */ - PDF infer(const SDR & pattern); + PDF infer(const SDR & pattern) const; /** * Learn from example data. @@ -141,18 +155,19 @@ class Classifier : public Serializable private: Real alpha_; - std::vector dimensions_; + UInt dimensions_; UInt numCategories_; /** * 2D map used to store the data. * Use as: weights_[ input-bit ][ category-index ] + * Real64 (not just Real) so the computations do not lose precision. */ - std::vector> weights_; + std::vector> weights_; // Helper function to compute the error signal for learning. - std::vector calculateError_(const std::vector &bucketIdxList, - const SDR &pattern); + std::vector calculateError_(const std::vector &bucketIdxList, + const SDR &pattern) const; }; /** @@ -170,7 +185,7 @@ void softmax(PDF::iterator begin, PDF::iterator end); * The value is a PDF (probability distribution function, of the result being in * each bucket or category). */ -using Predictions = std::map; +using Predictions = std::unordered_map; /** * The Predictor class does N-Step ahead predictions. @@ -179,9 +194,11 @@ using Predictions = std::map; * This class handles missing datapoints. * * Compatibility Note: This class is the replacement for the old SDRClassifier. - * It no longer provides estimates of the actual value. + * It no longer provides estimates of the actual value. Instead, users can get a rough estimate + * from bucket-index. If more precision is needed, use more buckets in the encoder. * * Example Usage: + * ``` * // Predict 1 and 2 time steps into the future. * // Make a sequence of 4 random SDRs. Each SDR has 1000 bits and 2% sparsity. * vector sequence( 4, { 1000 } ); @@ -201,13 +218,14 @@ using Predictions = std::map; * // Give the predictor partial information, and make predictions * // about the future. * pred.reset(); - * Predictions A = pred.infer( 0, sequence[0] ); + * Predictions A = pred.infer( sequence[0] ); * argmax( A[1] ) -> labels[1] * argmax( A[2] ) -> labels[2] * - * Predictions B = pred.infer( 1, sequence[1] ); + * Predictions B = pred.infer( sequence[1] ); * argmax( B[1] ) -> labels[2] * argmax( B[2] ) -> labels[3] + * ``` */ class Predictor : public Serializable { @@ -216,8 +234,9 @@ class Predictor : public Serializable * Constructor. * * @param steps - The number of steps into the future to learn and predict. - * @param alpha - The alpha used to adapt the weight matrix during learning. A - * larger alpha results in faster adaptation to the data. + * @param alpha - The alpha used to adapt the weight matrix during learning. + * A larger alpha results in faster adaptation to the data. + * (The default value will likely be OK in most cases.) */ Predictor(const std::vector &steps, Real alpha = 0.001f ); @@ -235,14 +254,11 @@ class Predictor : public Serializable /** * Compute the likelihoods. * - * @param recordNum: An incrementing integer for each record. Gaps in - * numbers correspond to missing records. - * * @param pattern: The active input SDR. * * @returns: A mapping from prediction step to PDF. */ - Predictions infer(UInt recordNum, const SDR &pattern); + Predictions infer(const SDR &pattern) const; /** * Learn from example data. @@ -252,7 +268,8 @@ class Predictor : public Serializable * @param pattern: The active input SDR. * @param bucketIdxList: Vector of the current value bucket indices or categories. */ - void learn(UInt recordNum, const SDR &pattern, + void learn(const UInt recordNum, + const SDR &pattern, const std::vector &bucketIdxList); CerealAdapter; @@ -276,10 +293,10 @@ class Predictor : public Serializable // Stores the input pattern history, starting with the previous input. std::deque patternHistory_; std::deque recordNumHistory_; - void updateHistory_(UInt recordNum, const SDR & pattern); + void checkMonotonic_(UInt recordNum) const; // One per prediction step - std::map classifiers_; + std::unordered_map classifiers_; }; // End of Predictor class diff --git a/src/test/unit/algorithms/SDRClassifierTest.cpp b/src/test/unit/algorithms/SDRClassifierTest.cpp index 735b2ea03e..6f935d940a 100644 --- a/src/test/unit/algorithms/SDRClassifierTest.cpp +++ b/src/test/unit/algorithms/SDRClassifierTest.cpp @@ -81,11 +81,11 @@ TEST(SDRClassifierTest, ExampleUsagePredictor) // Give the predictor partial information, and make predictions // about the future. pred.reset(); - Predictions A = pred.infer( 0, sequence[0] ); + Predictions A = pred.infer( sequence[0] ); ASSERT_EQ( argmax( A[1] ), labels[1] ); ASSERT_EQ( argmax( A[2] ), labels[2] ); - Predictions B = pred.infer( 1, sequence[1] ); + Predictions B = pred.infer( sequence[1] ); ASSERT_EQ( argmax( B[1] ), labels[2] ); ASSERT_EQ( argmax( B[2] ), labels[3] ); } @@ -103,7 +103,7 @@ TEST(SDRClassifierTest, SingleValue) { for (UInt i = 0u; i < 10u; ++i) { c.learn( i, input1, bucketIdxList ); } - Predictions result1 = c.infer( 10u, input1 ); + Predictions result1 = c.infer( input1 ); ASSERT_EQ( argmax( result1[1u] ), 4u ) << "Incorrect prediction for bucket 4"; @@ -138,7 +138,7 @@ TEST(SDRClassifierTest, ComputeComplex) { c.learn(1, input2, bucketIdxList2); c.learn(2, input3, bucketIdxList3); c.learn(3, input1, bucketIdxList4); - auto result = c.infer(4, input1); + auto result = c.infer(input1); // Check the one-step prediction ASSERT_EQ(result.size(), 1u) @@ -211,7 +211,7 @@ TEST(SDRClassifierTest, SaveLoad) { // Measure and save some output. A.addNoise( 0.20f ); // Change two bits. c1.reset(); - const auto c1_out = c1.infer( 0u, A ); + const auto c1_out = c1.infer( A ); // Save and load. stringstream ss; @@ -220,7 +220,7 @@ TEST(SDRClassifierTest, SaveLoad) { EXPECT_NO_THROW(c2.load(ss)); // Expect identical results. - const auto c2_out = c2.infer( 0u, A ); + const auto c2_out = c2.infer( A ); ASSERT_EQ(c1_out, c2_out); }