Skip to content

Commit eb0aae1

Browse files
committed
add new testing utilities for random dataset generation and add randomized stress test
1 parent 57af6a8 commit eb0aae1

File tree

3 files changed

+121
-0
lines changed

3 files changed

+121
-0
lines changed

shared/libebm/tests/boosting_unusual_inputs.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,3 +2060,59 @@ TEST_CASE("lossguide, boosting, regression") {
20602060
termScore = test.GetCurrentTermScore(0, {0}, 0);
20612061
CHECK_APPROX(termScore, 0.40592050000000002);
20622062
}
2063+
2064+
TEST_CASE("stress test, boosting, regression") {
2065+
const IntEbm cClasses = Task_Regression;
2066+
auto rng = MakeRng(0);
2067+
const IntEbm cTrainSamples = 200;
2068+
const IntEbm cValidationSamples = 100;
2069+
const std::vector<FeatureTest> features = {
2070+
FeatureTest(10, false, false, false),
2071+
FeatureTest(10, false, false, true),
2072+
FeatureTest(10, false, true, false),
2073+
FeatureTest(10, false, true, true),
2074+
FeatureTest(10, true, false, false),
2075+
FeatureTest(10, true, false, true),
2076+
FeatureTest(10, true, true, false),
2077+
FeatureTest(10, true, true, true),
2078+
};
2079+
auto terms = MakeMains(features);
2080+
terms.push_back({0, 0});
2081+
if(2 <= features.size()) {
2082+
terms.push_back({0, 1});
2083+
terms.push_back({1, 0});
2084+
}
2085+
if(3 <= features.size()) {
2086+
// terms.push_back({0, 1, 2}); // TODO: enable when fast enough
2087+
}
2088+
if(4 <= features.size()) {
2089+
// terms.push_back({0, 1, 2, 3}); // TODO: enable when fast enough
2090+
}
2091+
const auto train = MakeRandomDataset(rng, cClasses, cTrainSamples, features);
2092+
const auto validation = MakeRandomDataset(rng, cClasses, cValidationSamples, features);
2093+
const size_t cRounds = 1000;
2094+
const IntEbm boostFlags = TermBoostFlags_Default;
2095+
2096+
TestBoost test = TestBoost(cClasses,
2097+
features,
2098+
terms,
2099+
train,
2100+
validation,
2101+
k_countInnerBagsDefault,
2102+
k_testCreateBoosterFlags_Default,
2103+
AccelerationFlags_NONE);
2104+
2105+
double validationMetric = 0;
2106+
for(size_t iRound = 0; iRound < cRounds; ++iRound) {
2107+
for(IntEbm iTerm = 0; iTerm < static_cast<IntEbm>(terms.size()); ++iTerm) {
2108+
// we allow 1 cut more than the number of bins to test excessive leaves.
2109+
const IntEbm cLeaves = 1 + TestRand(rng, features[terms[iTerm][0]].CountRealBins() + 1);
2110+
const std::vector<IntEbm> leaves(terms[iTerm].size(), cLeaves);
2111+
validationMetric =
2112+
test.Boost(iTerm, boostFlags, k_learningRateDefault, 1, 0.0, 0.0, 0.0, 0.0, 10.0, 6, 0.75, leaves)
2113+
.validationMetric;
2114+
}
2115+
}
2116+
2117+
CHECK(validationMetric == 15092.795219174546);
2118+
}

shared/libebm/tests/libebm_test.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,43 @@ extern void DisplayCuts(IntEbm countSamples,
10041004
std::cout << std::endl << std::endl;
10051005
}
10061006

1007+
extern std::vector<TestSample> MakeRandomDataset(std::vector<unsigned char>& rng,
1008+
const IntEbm cClasses,
1009+
const size_t cSamples,
1010+
const std::vector<FeatureTest>& features) {
1011+
std::vector<TestSample> samples;
1012+
1013+
for(size_t iSample = 0; iSample < cSamples; ++iSample) {
1014+
std::vector<IntEbm> sampleBinIndexes;
1015+
for(const FeatureTest& feature : features) {
1016+
IntEbm iBin = TestRand(rng, feature.CountRealBins());
1017+
if(!feature.m_bMissing) {
1018+
++iBin;
1019+
}
1020+
sampleBinIndexes.push_back(iBin);
1021+
}
1022+
1023+
double target;
1024+
if(Task_GeneralClassification <= cClasses) {
1025+
target = static_cast<double>(TestRand(rng, cClasses));
1026+
} else {
1027+
target = TestRand(rng);
1028+
}
1029+
1030+
samples.push_back(TestSample(sampleBinIndexes, target));
1031+
}
1032+
return samples;
1033+
}
1034+
1035+
extern std::vector<std::vector<IntEbm>> MakeMains(const std::vector<FeatureTest>& features) {
1036+
const IntEbm cFeatures = static_cast<IntEbm>(features.size());
1037+
std::vector<std::vector<IntEbm>> termFeatures;
1038+
for(IntEbm iFeature = 0; iFeature < cFeatures; ++iFeature) {
1039+
termFeatures.push_back({iFeature});
1040+
}
1041+
return termFeatures;
1042+
}
1043+
10071044
int main() {
10081045
SetLogCallback(&LogCallback);
10091046
SetTraceLevel(Trace_Verbose);

shared/libebm/tests/libebm_test.hpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ class FeatureTest final {
179179
const bool m_bUnseen;
180180
const bool m_bNominal;
181181

182+
inline IntEbm CountRealBins() const { return m_countBins - (m_bMissing ? 0 : 1) - (m_bUnseen ? 0 : 1); }
183+
182184
inline FeatureTest(
183185
const IntEbm countBins, const bool bMissing = true, const bool bUnseen = true, const bool bNominal = false) :
184186
m_countBins(countBins), m_bMissing(bMissing), m_bUnseen(bUnseen), m_bNominal(bNominal) {}
@@ -536,4 +538,30 @@ void DisplayCuts(IntEbm countSamples,
536538
double minFeatureVal,
537539
double maxFeatureVal);
538540

541+
std::vector<TestSample> MakeRandomDataset(std::vector<unsigned char>& rng,
542+
const IntEbm cClasses,
543+
const size_t cSamples,
544+
const std::vector<FeatureTest>& features);
545+
546+
std::vector<std::vector<IntEbm>> MakeMains(const std::vector<FeatureTest>& features);
547+
548+
inline static std::vector<unsigned char> MakeRng(const SeedEbm seed) {
549+
std::vector<unsigned char> rng(static_cast<size_t>(MeasureRNG()));
550+
InitRNG(seed, &rng[0]);
551+
return rng;
552+
}
553+
554+
inline IntEbm TestRand(std::vector<unsigned char>& rng, const IntEbm count) {
555+
// this isn't balanced, but good enough for tests
556+
SeedEbm randomNum;
557+
GenerateSeed(&rng[0], &randomNum);
558+
return static_cast<IntEbm>(static_cast<USeedEbm>(randomNum) % static_cast<USeedEbm>(count));
559+
}
560+
561+
inline double TestRand(std::vector<unsigned char>& rng) {
562+
double ret;
563+
GenerateGaussianRandom(&rng[0], 100.0, 1, &ret);
564+
return ret;
565+
}
566+
539567
#endif // LIBEBM_TEST_HPP

0 commit comments

Comments
 (0)