Skip to content

Commit 41f6e08

Browse files
authored
Merge pull request #406 from htm-community/tm_anomaly
TM anomaly additional fixes
2 parents a3a8ea8 + e73b54b commit 41f6e08

File tree

18 files changed

+194
-435
lines changed

18 files changed

+194
-435
lines changed

API_CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,12 @@ longer accept a synapse permanence threshold argument. PR #305
7676

7777
* SDRClassifier class is replaced by `Classifier` and `Predictor` classes.
7878

79+
* Anomaly class removed as obsolete, use `TM.anomaly` which is simpler to use, and `MovingAverage` when you need to emulate
80+
running averages. Internaly the code still uses `computeRawAnomalyScore()` but there's no need to call it directly. `AnomalyLikelihood`
81+
is still available and can be used in addition to TM.getAnomalyScore(). PR #406
82+
7983
* TemporalMemory::getPredictiveCells() now returns a SDR. This ensures more convenient API and that the SDR object has correct
80-
dimensions matching TM. use TM.getPredictiveCells().getSparse() to obtain the sparse vector as before.
84+
dimensions matching TM. use TM.getPredictiveCells().getSparse() to obtain the sparse vector as before. PR #437, #442
8185

8286
* TemporalMemory `compute()` and `activateCells()` now use only SDR variants, old overloads with C-style arrays removed. Bindings and
8387
tests also updated.

bindings/py/cpp_src/bindings/algorithms/py_TemporalMemory.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,9 @@ using namespace nupic::algorithms::connections;
180180

181181
py_HTM.def_property_readonly("extra", [](const HTM_t &self) { return self.extra; } );
182182

183+
py_HTM.def_property_readonly("anomaly", [](const HTM_t &self) { return self.anomaly; },
184+
"Anomaly score updated with each TM::compute() call. "
185+
);
183186
}
184187

185188
} // namespace nupic_ext

src/examples/hotgym/HelloSPTP.cpp

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@
2626

2727
#include "HelloSPTP.hpp"
2828

29-
#include "nupic/algorithms/Anomaly.hpp"
3029
#include "nupic/algorithms/TemporalMemory.hpp"
3130
#include "nupic/algorithms/SpatialPooler.hpp"
3231
#include "nupic/encoders/RandomDistributedScalarEncoder.hpp"
32+
#include "nupic/algorithms/AnomalyLikelihood.hpp"
3333

3434
#include "nupic/types/Sdr.hpp"
3535
#include "nupic/utils/Random.hpp"
@@ -44,8 +44,7 @@ using Encoder = nupic::encoders::RandomDistributedScalarEncoder;
4444
using EncoderParameters = nupic::encoders::RDSE_Parameters;
4545
using nupic::algorithms::spatial_pooler::SpatialPooler;
4646
using TM = nupic::algorithms::temporal_memory::TemporalMemory;
47-
using nupic::algorithms::anomaly::Anomaly;
48-
using nupic::algorithms::anomaly::AnomalyMode;
47+
using nupic::algorithms::anomaly::AnomalyLikelihood;
4948

5049

5150
// work-load
@@ -63,7 +62,7 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
6362
std::cout << "EPOCHS = " << EPOCHS << std::endl;
6463

6564

66-
// initialize SP, TM, Anomaly, AnomalyLikelihood
65+
// initialize SP, TM, AnomalyLikelihood
6766
tInit.start();
6867
EncoderParameters encParams;
6968
encParams.sparsity = 0.2f; //20% of the encoding are active bits (1's)
@@ -80,8 +79,7 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
8079

8180
TM tm(vector<UInt>{COLS}, CELLS);
8281

83-
Anomaly an(5, AnomalyMode::PURE);
84-
Anomaly anLikelihood(5, AnomalyMode::LIKELIHOOD);
82+
AnomalyLikelihood anLikelihood;
8583
tInit.stop();
8684

8785
// data for processing input
@@ -90,8 +88,7 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
9088
SDR outSPlocal(spLocal.getColumnDimensions()); //for SPlocal
9189
SDR outSP(vector<UInt>{COLS});
9290
SDR outTM(spGlobal.getColumnDimensions());
93-
Real res = 0.0; //for anomaly:
94-
SDR prevPred_(outTM.dimensions); //holds T-1 TM.predictive cells
91+
Real an = 0.0f, anLikely = 0.0f; //for anomaly:
9592

9693
// Start a stopwatch timer
9794
printf("starting: %d iterations.", EPOCHS);
@@ -125,7 +122,6 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
125122
tSPglob.stop();
126123
}
127124
outSP = outSPglobal; //toggle if local/global SP is used further down the chain (TM, Anomaly)
128-
NTA_CHECK(outSP == outSPglobal);
129125

130126
// TM
131127
if(useTM) {
@@ -138,22 +134,19 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
138134

139135

140136
//Anomaly (pure x likelihood)
141-
tAn.start();
142-
res = an.compute(outSP /*active*/, prevPred_ /*prev predicted*/);
143-
tAn.stop();
144-
137+
an = tm.anomaly;
145138
tAnLikelihood.start();
146-
anLikelihood.compute(outSP /*active*/, prevPred_ /*prev predicted*/);
139+
anLikelihood.anomalyProbability(an); //FIXME AnLikelihood is 0.0, probably not working correctly
147140
tAnLikelihood.stop();
148141

149-
prevPred_ = outTM; //to be used as predicted T-1
150142

151143
// print
152144
if (e == EPOCHS - 1) {
153145
tAll.stop();
154146

155147
cout << "Epoch = " << e << endl;
156-
cout << "Anomaly = " << res << endl;
148+
cout << "Anomaly = " << an << endl;
149+
cout << "Anomaly (Likelihood) = " << anLikely << endl;
157150
cout << "SP (g)= " << outSP << endl;
158151
cout << "SP (l)= " << outSPlocal <<endl;
159152
cout << "TM= " << outTM << endl;
@@ -164,7 +157,6 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
164157
if(useSPlocal) cout << "SP (l):\t" << tSPloc.getElapsed()*1.0f << endl;
165158
if(useSPglobal) cout << "SP (g):\t" << tSPglob.getElapsed() << endl;
166159
if(useTM) cout << "TM:\t" << tTM.getElapsed() << endl;
167-
cout << "AN:\t" << tAn.getElapsed() << endl;
168160
cout << "AN:\t" << tAnLikelihood.getElapsed() << endl;
169161

170162
// check deterministic SP, TM output
@@ -192,16 +184,16 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
192184
};
193185
goldTM.setSparse(deterministicTM);
194186

195-
const float goldAn = 0.920001f;
187+
const float goldAn = 0.8f;
196188

197189
if(EPOCHS == 5000) { //these hand-written values are only valid for EPOCHS = 5000 (default), but not for debug and custom runs.
198190
NTA_CHECK(input == goldEnc) << "Deterministic output of Encoder failed!\n" << input << "should be:\n" << goldEnc;
199191
NTA_CHECK(outSPglobal == goldSP) << "Deterministic output of SP (g) failed!\n" << outSP << "should be:\n" << goldSP;
200192
NTA_CHECK(outSPlocal == goldSPlocal) << "Deterministic output of SP (l) failed!\n" << outSPlocal << "should be:\n" << goldSPlocal;
201193
#ifndef _MSC_VER //FIXME deterministic checks fail on Windows
202194
NTA_CHECK(outTM == goldTM) << "Deterministic output of TM failed!\n" << outTM << "should be:\n" << goldTM;
203-
NTA_CHECK(static_cast<UInt>(res *10000) == static_cast<UInt>(goldAn *10000)) //compare to 4 decimal places
204-
<< "Deterministic output of Anomaly failed! " << res << "should be: " << goldAn;
195+
NTA_CHECK(static_cast<UInt>(an *10000.0f) == static_cast<UInt>(goldAn *10000.0f)) //compare to 4 decimal places
196+
<< "Deterministic output of Anomaly failed! " << an << "should be: " << goldAn;
205197
#endif
206198
}
207199

src/examples/hotgym/HelloSPTP.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@ class BenchmarkHotgym {
2525
);
2626

2727
//timers
28-
Timer tInit, tAll, tRng, tEnc, tSPloc, tSPglob, tTM,
29-
tAn, tAnLikelihood;
30-
28+
Timer tInit, tAll, tRng, tEnc, tSPloc, tSPglob, tTM, tAnLikelihood;
3129
};
3230

3331
} //-ns

src/nupic/algorithms/Anomaly.cpp

Lines changed: 2 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,8 @@
2020
* ---------------------------------------------------------------------
2121
*/
2222

23-
#include <algorithm>
24-
#include <iterator>
25-
#include <numeric>
26-
#include <set>
27-
#include <vector>
28-
2923
#include "nupic/algorithms/Anomaly.hpp"
3024
#include "nupic/utils/Log.hpp"
31-
#include "nupic/types/Sdr.hpp"
3225

3326
using namespace std;
3427
using namespace nupic;
@@ -42,92 +35,18 @@ namespace anomaly {
4235
Real computeRawAnomalyScore(const SDR& active,
4336
const SDR& predicted) {
4437

38+
NTA_ASSERT(active.dimensions == predicted.dimensions);
39+
4540
// Return 0 if no active columns are present
4641
if (active.getSum() == 0) {
4742
return 0.0f;
4843
}
4944

50-
NTA_CHECK(active.dimensions == predicted.dimensions);
51-
5245
// Calculate and return percent of active columns that were not predicted.
5346
SDR both(active.dimensions);
5447
both.intersection(active, predicted);
5548

5649
return (active.getSum() - both.getSum()) / Real(active.getSum());
5750
}
5851

59-
Real computeRawAnomalyScore(vector<UInt>& active,
60-
vector<UInt>& predicted)
61-
{
62-
// Don't divide by zero. Return 0 if no active columns are present.
63-
if (active.size() == 0) {
64-
return 0.0f;
65-
}
66-
67-
vector<UInt> correctPredictions;
68-
sort( active.begin(), active.end());
69-
sort( predicted.begin(), predicted.end());
70-
set_intersection(active.begin(), active.end(),
71-
predicted.begin(), predicted.end(),
72-
back_inserter( correctPredictions ));
73-
74-
return (Real) (active.size() - correctPredictions.size()) / active.size();
75-
}
76-
7752
}}} // End namespace
78-
79-
Anomaly::Anomaly(UInt slidingWindowSize, AnomalyMode mode, Real binaryAnomalyThreshold)
80-
: binaryThreshold_(binaryAnomalyThreshold)
81-
{
82-
NTA_CHECK(binaryAnomalyThreshold >= 0 && binaryAnomalyThreshold <= 1) << "binaryAnomalyThreshold must be within [0.0,1.0]";
83-
mode_ = mode;
84-
if (slidingWindowSize > 0) {
85-
movingAverage_.reset(new nupic::util::MovingAverage(slidingWindowSize));
86-
}
87-
}
88-
89-
90-
Real Anomaly::compute(const SDR& active, const SDR& predicted, int timestamp)
91-
{ return compute(active.getSparse(), predicted.getSparse(), timestamp); }
92-
93-
Real Anomaly::compute(vector<UInt>& active, vector<UInt>& predicted, int timestamp)
94-
{
95-
Real anomalyScore = computeRawAnomalyScore(active, predicted);
96-
Real likelihood = 0.5;
97-
Real score = anomalyScore;
98-
switch(mode_)
99-
{
100-
case AnomalyMode::PURE:
101-
score = anomalyScore;
102-
break;
103-
case AnomalyMode::LIKELIHOOD:
104-
likelihood = likelihood_.anomalyProbability(anomalyScore, timestamp);
105-
score = 1 - likelihood;
106-
break;
107-
case AnomalyMode::WEIGHTED:
108-
likelihood = likelihood_.anomalyProbability(anomalyScore, timestamp);
109-
score = anomalyScore * (1 - likelihood);
110-
break;
111-
}
112-
113-
if (movingAverage_) {
114-
score = movingAverage_->compute(score);
115-
}
116-
117-
if (binaryThreshold_) {
118-
score = (score >= binaryThreshold_) ? 1.0f : 0.0f;
119-
}
120-
121-
return score;
122-
}
123-
124-
bool Anomaly::operator==(const Anomaly &a) const {
125-
if (mode_ != a.mode_) return false;
126-
if (binaryThreshold_ != a.binaryThreshold_) return false;
127-
if (movingAverage_ != nullptr && a.movingAverage_ == nullptr) return false;
128-
if (movingAverage_ == nullptr && a.movingAverage_ != nullptr) return false;
129-
if (movingAverage_ != nullptr && *(movingAverage_.get()) != *(a.movingAverage_.get())) return false;
130-
if (likelihood_ != a.likelihood_) return false;
131-
return true;
132-
}
133-

src/nupic/algorithms/Anomaly.hpp

Lines changed: 6 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,7 @@
2323
#ifndef NUPIC_ALGORITHMS_ANOMALY_HPP
2424
#define NUPIC_ALGORITHMS_ANOMALY_HPP
2525

26-
#include <memory> // Needed for smart pointer templates
27-
#include <vector>
28-
#include <nupic/types/Serializable.hpp>
29-
#include <nupic/algorithms/AnomalyLikelihood.hpp>
3026
#include <nupic/types/Types.hpp>
31-
#include <nupic/utils/MovingAverage.hpp> // Needed for for smart pointer templates
3227
#include <nupic/types/Sdr.hpp> // sdr::SDR
3328

3429
namespace nupic {
@@ -40,104 +35,16 @@ namespace anomaly {
4035
* Computes the raw anomaly score.
4136
*
4237
* The raw anomaly score is the fraction of active columns not predicted.
38+
* Do not use these methods directly, these are for testing and internal implementation.
39+
* Use `TM.anomaly` (+AnomalyLikelihood, MovingAverage for more specific needs).
4340
*
44-
* @param activeColumns: array of active column indices
45-
* @param prevPredictedColumns: array of columns indices predicted in
46-
* prev step
41+
* @param activeColumns: SDR with active columns (not cells) from current step (T)
42+
* @param prevPredictedColumns: SDR of predictive columns indices from prev step (T-1)
4743
* @return anomaly score 0..1 (Real32)
4844
*/
49-
Real32 computeRawAnomalyScore(std::vector<UInt> &active,
50-
std::vector<UInt> &predicted);
45+
Real32 computeRawAnomalyScore(const sdr::SDR& active,
46+
const sdr::SDR& predicted);
5147

52-
Real32 computeRawAnomalyScore(const sdr::SDR& active, const sdr::SDR& predicted);
53-
54-
enum class AnomalyMode { PURE, LIKELIHOOD, WEIGHTED };
55-
56-
class Anomaly : public Serializable {
57-
public:
58-
/**
59-
* Utility class for generating anomaly scores in different ways.
60-
*
61-
* Supported modes:
62-
* PURE - the raw anomaly score as computed by computeRawAnomalyScore
63-
* LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
64-
* anomaly scores (not implemented in C++)
65-
* WEIGHTED - multiplies the likelihood result with the raw anomaly
66-
* score that was used to generate the likelihood (not
67-
* implemented in C++)
68-
*
69-
* @param slidingWindowSize (optional) - how many elements are
70-
* summed up; enables moving average on final anomaly score;
71-
* int >= 0
72-
* @param mode (optional) - (enum) how to compute anomaly;
73-
* possible values are AnomalyMode::
74-
* - PURE - the default, how much anomal the value is;
75-
* Real32 0..1 where 1=totally unexpected
76-
* - LIKELIHOOD - uses the anomaly_likelihood code;
77-
* models probability of receiving this value and
78-
* anomalyScore
79-
* - WEIGHTED - "pure" anomaly weighted by "likelihood"
80-
* (anomaly * likelihood)
81-
* @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly
82-
* score will be discretized to 1/0
83-
* (1 iff >= binaryAnomalyThreshold). The transformation is
84-
* applied after moving average is computed.
85-
*/
86-
Anomaly(UInt slidingWindowSize = 0, AnomalyMode mode = AnomalyMode::PURE,
87-
Real32 binaryAnomalyThreshold = 0);
88-
89-
/**
90-
* Compute the anomaly score as the percent of active columns not
91-
* predicted.
92-
*
93-
* @param active: array of active column indices
94-
* @param predicted: array of columns indices predicted in prev step (T-1)
95-
* @param timestamp: (optional) date timestamp when the sample occured
96-
* (used in anomaly-likelihood), -1 defaults to using iteration step
97-
* @return the computed anomaly score; Real32 0..1
98-
*/
99-
Real compute(std::vector<UInt> &active,
100-
std::vector<UInt> &predicted,
101-
int timestamp = -1);
102-
103-
Real compute(const sdr::SDR &active,
104-
const sdr::SDR &predicted,
105-
int timestamp = -1);
106-
107-
108-
CerealAdapter;
109-
110-
template<class Archive>
111-
void save_ar(Archive & ar) const {
112-
std::string name("Anomaly");
113-
ar(CEREAL_NVP(name),
114-
CEREAL_NVP(mode_),
115-
CEREAL_NVP(binaryThreshold_),
116-
CEREAL_NVP(likelihood_),
117-
CEREAL_NVP(movingAverage_));
118-
}
119-
template<class Archive>
120-
void load_ar(Archive & ar) {
121-
std::string name;
122-
ar(CEREAL_NVP(name),
123-
CEREAL_NVP(mode_),
124-
CEREAL_NVP(binaryThreshold_));
125-
ar(CEREAL_NVP(likelihood_));
126-
ar(CEREAL_NVP(movingAverage_));
127-
}
128-
129-
130-
131-
bool operator==(const Anomaly &a) const;
132-
inline bool operator!=(const Anomaly &a) const
133-
{ return not ((*this) == a); }
134-
135-
private:
136-
AnomalyMode mode_;
137-
Real32 binaryThreshold_;
138-
std::unique_ptr<nupic::util::MovingAverage> movingAverage_;
139-
AnomalyLikelihood likelihood_; //TODO which params/how pass them to constructor?
140-
};
14148
}}} //end-ns
14249

14350
#endif // NUPIC_ALGORITHMS_ANOMALY_HPP

src/nupic/algorithms/AnomalyLikelihood.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,13 @@ There are 3 ways to use the code:
3333
- using the raw individual functions.
3434
**/
3535

36-
using namespace std;
3736

3837
namespace nupic {
3938
namespace algorithms {
4039
namespace anomaly {
4140

41+
using namespace std;
42+
4243
struct DistributionParams {
4344
DistributionParams(std::string name, Real mean, Real variance, Real stdev) :
4445
name(name),mean(mean), variance(variance), stdev(stdev) {}

0 commit comments

Comments
 (0)