@@ -2061,8 +2061,7 @@ TEST_CASE("lossguide, boosting, regression") {
20612061 CHECK_APPROX (termScore, 0.40592050000000002 );
20622062}
20632063
2064- TEST_CASE (" stress test, boosting, regression" ) {
2065- const IntEbm cClasses = Task_Regression;
2064+ TEST_CASE (" stress test, boosting" ) {
20662065 auto rng = MakeRng (0 );
20672066 const IntEbm cTrainSamples = 200 ;
20682067 const IntEbm cValidationSamples = 100 ;
@@ -2088,31 +2087,81 @@ TEST_CASE("stress test, boosting, regression") {
20882087 if (4 <= features.size ()) {
20892088 // terms.push_back({0, 1, 2, 3}); // TODO: enable when fast enough
20902089 }
2091- const auto train = MakeRandomDataset (rng, cClasses, cTrainSamples, features);
2092- const auto validation = MakeRandomDataset (rng, cClasses, cValidationSamples, features);
2093- const size_t cRounds = 1000 ;
2094- const IntEbm boostFlags = TermBoostFlags_Default;
2095-
2096- TestBoost test = TestBoost (cClasses,
2097- features,
2098- terms,
2099- train,
2100- validation,
2101- k_countInnerBagsDefault,
2102- k_testCreateBoosterFlags_Default,
2103- AccelerationFlags_NONE);
2104-
2105- double validationMetric = 0 ;
2106- for (size_t iRound = 0 ; iRound < cRounds; ++iRound) {
2107- for (IntEbm iTerm = 0 ; iTerm < static_cast <IntEbm>(terms.size ()); ++iTerm) {
2108- // we allow 1 cut more than the number of bins to test excessive leaves.
2109- const IntEbm cLeaves = 1 + TestRand (rng, features[terms[iTerm][0 ]].CountRealBins () + 1 );
2110- const std::vector<IntEbm> leaves (terms[iTerm].size (), cLeaves);
2111- validationMetric =
2112- test.Boost (iTerm, boostFlags, k_learningRateDefault, 1 , 0.0 , 0.0 , 0.0 , 0.0 , 10.0 , 6 , 0.75 , leaves)
2113- .validationMetric ;
2090+ const size_t cRounds = 200 ;
2091+ std::vector<IntEbm> boostFlagsAny{TermBoostFlags_PurifyGain,
2092+ TermBoostFlags_DisableNewtonGain,
2093+ TermBoostFlags_DisableCategorical,
2094+ TermBoostFlags_DisableNewtonUpdate,
2095+ TermBoostFlags_RandomSplits};
2096+ std::vector<IntEbm> boostFlagsChoose{TermBoostFlags_Default,
2097+ TermBoostFlags_MissingLow,
2098+ TermBoostFlags_MissingHigh,
2099+ TermBoostFlags_MissingSeparate,
2100+ TermBoostFlags_MissingDrop};
2101+
2102+ double validationMetric = 0.0 ;
2103+
2104+ for (IntEbm classesCount = Task_Regression; classesCount < 5 ; ++classesCount) {
2105+ if (classesCount != Task_Regression && classesCount < 2 ) {
2106+ continue ;
2107+ }
2108+ const auto train = MakeRandomDataset (rng, classesCount, cTrainSamples, features);
2109+ const auto validation = MakeRandomDataset (rng, classesCount, cValidationSamples, features);
2110+ for (IntEbm innerBagCount = 0 ; innerBagCount < 3 ; ++innerBagCount) {
2111+ TestBoost test = TestBoost (classesCount,
2112+ features,
2113+ terms,
2114+ train,
2115+ validation,
2116+ innerBagCount,
2117+ k_testCreateBoosterFlags_Default,
2118+ AccelerationFlags_NONE);
2119+
2120+ double validationMetricIteration = 0.0 ;
2121+ for (size_t iRound = 0 ; iRound < cRounds; ++iRound) {
2122+ for (IntEbm iTerm = 0 ; iTerm < static_cast <IntEbm>(terms.size ()); ++iTerm) {
2123+ const IntEbm cRealBins = features[terms[iTerm][0 ]].CountRealBins ();
2124+ const IntEbm cDimensions = terms[iTerm].size ();
2125+
2126+ const TermBoostFlags boostFlags =
2127+ static_cast <TermBoostFlags>(ChooseAny (rng, boostFlagsAny) | ChooseFrom (rng, boostFlagsChoose));
2128+
2129+ const double learningRate = 0.015625 ;
2130+ const IntEbm minSamplesLeaf = TestRand (rng, 5 ) + 1 ;
2131+ const double minHessian = 0 == TestRand (rng, 5 ) ? 0.015625 : 0.0 ;
2132+ const double regAlpha = 0 == TestRand (rng, 5 ) ? 0.015625 : 0.0 ;
2133+ const double regLambda = 0 == TestRand (rng, 5 ) ? 0.015625 : 0.0 ;
2134+ const double maxDeltaStep = 0 == TestRand (rng, 5 ) ? 1.0 : 0.0 ;
2135+ const double categoricalSmoothing = 10.0 ;
2136+ const IntEbm maxCategoricalThreshold = 1 + TestRand (rng, cRealBins + 1 );
2137+ const double categoricalInclusionPercent = 0 == TestRand (rng, 2 ) ? 0.75 : 1.0 ;
2138+
2139+ // we allow 1 cut more than the number of bins to test excessive leaves.
2140+ const IntEbm cLeaves = 1 + TestRand (rng, cRealBins + 1 );
2141+ const std::vector<IntEbm> leaves (cDimensions, cLeaves);
2142+ const MonotoneDirection direction =
2143+ 0 == TestRand (rng, 5 ) ? static_cast <MonotoneDirection>(TestRand (rng, 2 ) * 2 - 1 ) : 0 ;
2144+ const std::vector<MonotoneDirection> monotonicity (cDimensions, direction);
2145+
2146+ validationMetricIteration = test.Boost (iTerm,
2147+ boostFlags,
2148+ learningRate,
2149+ minSamplesLeaf,
2150+ minHessian,
2151+ regAlpha,
2152+ regLambda,
2153+ maxDeltaStep,
2154+ categoricalSmoothing,
2155+ maxCategoricalThreshold,
2156+ categoricalInclusionPercent,
2157+ leaves,
2158+ monotonicity)
2159+ .validationMetric ;
2160+ }
2161+ }
2162+ validationMetric += validationMetricIteration;
21142163 }
21152164 }
21162165
2117- CHECK (validationMetric == 15092.795219174546 );
2166+ CHECK (validationMetric == 42031.143270308334 );
21182167}
0 commit comments