@@ -2060,3 +2060,114 @@ TEST_CASE("lossguide, boosting, regression") {
2060
2060
termScore = test.GetCurrentTermScore (0 , {0 }, 0 );
2061
2061
CHECK_APPROX (termScore, 0.40592050000000002 );
2062
2062
}
2063
+
2064
+ TEST_CASE (" stress test, boosting" ) {
2065
+ auto rng = MakeRng (0 );
2066
+ const IntEbm cTrainSamples = 200 ;
2067
+ const IntEbm cValidationSamples = 100 ;
2068
+ const std::vector<FeatureTest> features = {
2069
+ FeatureTest (10 , false , false , false ),
2070
+ FeatureTest (10 , false , false , true ),
2071
+ FeatureTest (10 , false , true , false ),
2072
+ FeatureTest (10 , false , true , true ),
2073
+ FeatureTest (10 , true , false , false ),
2074
+ FeatureTest (10 , true , false , true ),
2075
+ FeatureTest (10 , true , true , false ),
2076
+ FeatureTest (10 , true , true , true ),
2077
+ };
2078
+ auto terms = MakeMains (features);
2079
+ terms.push_back ({0 , 0 });
2080
+ if (2 <= features.size ()) {
2081
+ terms.push_back ({0 , 1 });
2082
+ terms.push_back ({1 , 0 });
2083
+ }
2084
+ if (3 <= features.size ()) {
2085
+ // terms.push_back({0, 1, 2}); // TODO: enable when fast enough
2086
+ }
2087
+ if (4 <= features.size ()) {
2088
+ // terms.push_back({0, 1, 2, 3}); // TODO: enable when fast enough
2089
+ }
2090
+ const size_t cRounds = 200 ;
2091
+ std::vector<IntEbm> boostFlagsAny{// TermBoostFlags_PurifyGain,
2092
+ TermBoostFlags_DisableNewtonGain,
2093
+ TermBoostFlags_DisableCategorical,
2094
+ // TermBoostFlags_PurifyUpdate,
2095
+ // TermBoostFlags_GradientSums, // does not return a metric
2096
+ TermBoostFlags_DisableNewtonUpdate,
2097
+ TermBoostFlags_RandomSplits};
2098
+ std::vector<IntEbm> boostFlagsChoose{TermBoostFlags_Default,
2099
+ TermBoostFlags_MissingLow,
2100
+ TermBoostFlags_MissingHigh,
2101
+ TermBoostFlags_MissingSeparate,
2102
+ TermBoostFlags_MissingDrop};
2103
+
2104
+ double validationMetric = 1.0 ;
2105
+
2106
+ for (IntEbm classesCount = Task_Regression; classesCount < 5 ; ++classesCount) {
2107
+ if (classesCount != Task_Regression && classesCount < 1 ) {
2108
+ continue ;
2109
+ }
2110
+ const auto train = MakeRandomDataset (rng, classesCount, cTrainSamples, features);
2111
+ const auto validation = MakeRandomDataset (rng, classesCount, cValidationSamples, features);
2112
+ for (IntEbm innerBagCount = 0 ; innerBagCount < 3 ; ++innerBagCount) {
2113
+ TestBoost test = TestBoost (classesCount,
2114
+ features,
2115
+ terms,
2116
+ train,
2117
+ validation,
2118
+ innerBagCount,
2119
+ k_testCreateBoosterFlags_Default,
2120
+ AccelerationFlags_NONE);
2121
+
2122
+ double validationMetricIteration = 0.0 ;
2123
+ for (size_t iRound = 0 ; iRound < cRounds; ++iRound) {
2124
+ for (IntEbm iTerm = 0 ; iTerm < static_cast <IntEbm>(terms.size ()); ++iTerm) {
2125
+ const IntEbm cRealBins = features[terms[iTerm][0 ]].CountRealBins ();
2126
+ const IntEbm cDimensions = terms[iTerm].size ();
2127
+
2128
+ const TermBoostFlags boostFlags =
2129
+ static_cast <TermBoostFlags>(ChooseAny (rng, boostFlagsAny) | ChooseFrom (rng, boostFlagsChoose));
2130
+
2131
+ const double learningRate = 0.015625 ;
2132
+ const IntEbm minSamplesLeaf = TestRand (rng, 5 ) + 1 ;
2133
+ const double minHessian = 0 == TestRand (rng, 5 ) ? 0.015625 : 0.0 ;
2134
+ const double regAlpha = 0 == TestRand (rng, 5 ) ? 0.015625 : 0.0 ;
2135
+ const double regLambda = 0 == TestRand (rng, 5 ) ? 0.015625 : 0.0 ;
2136
+ const double maxDeltaStep = 0 == TestRand (rng, 5 ) ? 1.0 : 0.0 ;
2137
+ const double categoricalSmoothing = 10.0 ;
2138
+ const IntEbm maxCategoricalThreshold = 1 + TestRand (rng, cRealBins + 1 );
2139
+ const double categoricalInclusionPercent = 0 == TestRand (rng, 2 ) ? 0.75 : 1.0 ;
2140
+
2141
+ // we allow 1 cut more than the number of bins to test excessive leaves.
2142
+ const IntEbm cLeaves = 1 + TestRand (rng, cRealBins + 1 );
2143
+ const std::vector<IntEbm> leaves (cDimensions, cLeaves);
2144
+ const MonotoneDirection direction =
2145
+ 0 == TestRand (rng, 5 ) ? static_cast <MonotoneDirection>(TestRand (rng, 2 ) * 2 - 1 ) : 0 ;
2146
+ const std::vector<MonotoneDirection> monotonicity (cDimensions, direction);
2147
+
2148
+ validationMetricIteration = test.Boost (iTerm,
2149
+ boostFlags,
2150
+ learningRate,
2151
+ minSamplesLeaf,
2152
+ minHessian,
2153
+ regAlpha,
2154
+ regLambda,
2155
+ maxDeltaStep,
2156
+ categoricalSmoothing,
2157
+ maxCategoricalThreshold,
2158
+ categoricalInclusionPercent,
2159
+ leaves,
2160
+ monotonicity)
2161
+ .validationMetric ;
2162
+ }
2163
+ }
2164
+ if (classesCount == 1 ) {
2165
+ CHECK (std::numeric_limits<double >::infinity () == validationMetricIteration);
2166
+ } else {
2167
+ validationMetric *= validationMetricIteration;
2168
+ }
2169
+ }
2170
+ }
2171
+
2172
+ CHECK (validationMetric == 62013566170252.117 );
2173
+ }
0 commit comments