Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/xgboost/predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ class Predictor {
*/
virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds,
gbm::GBTreeModel const& model, bst_tree_t tree_begin,
bst_tree_t tree_end = 0) const = 0;
bst_tree_t tree_end = 0,
std::vector<float> const* tree_weights = nullptr) const = 0;

/**
* \brief Inplace prediction.
Expand Down
171 changes: 71 additions & 100 deletions src/gbm/gbtree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,34 @@ void CopyGradient(Context const* ctx, linalg::Matrix<GradientPair> const* in_gpa
}
}

/** Increment the prediction on GPU.
*
* \param out_predts Prediction for the whole model.
* \param predts Prediction for current tree.
* \param tree_w Tree weight.
*/
void GPUDartPredictInc(common::Span<float>, common::Span<float>, float, size_t, bst_group_t,
bst_group_t)
#if defined(XGBOOST_USE_CUDA)
; // NOLINT
#else
{
common::AssertGPUSupport();
}
#endif

void GPUDartInplacePredictInc(common::Span<float> /*out_predts*/, common::Span<float> /*predts*/,
float /*tree_w*/, size_t /*n_rows*/,
linalg::TensorView<float const, 1> /*base_score*/,
bst_group_t /*n_groups*/, bst_group_t /*group*/)
#if defined(XGBOOST_USE_CUDA)
; // NOLINT
#else
{
common::AssertGPUSupport();
}
#endif

void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const& predictions,
ObjFunction const* obj, std::int32_t group_idx,
std::vector<HostDeviceVector<bst_node_t>> const& node_position,
Expand Down Expand Up @@ -501,49 +529,63 @@ void GBTree::Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, Gradien
}

void GBTree::PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,
bst_layer_t layer_begin, bst_layer_t layer_end) const {
bst_layer_t layer_begin, bst_layer_t layer_end,
std::vector<float> const* tree_weights) const {
// Unweighted prediction can reuse a cached prefix of the model output by tracking how many
// boosting iterations have already been accumulated in `out_preds->version`.
//
// Weighted prediction is used by DART and does not participate in this cache, since tree
// weights can change the accumulated output independently of the cached unweighted prefix.
if (layer_end == 0) {
layer_end = this->BoostedRounds();
}
if (layer_begin != 0 || layer_end < static_cast<bst_layer_t>(out_preds->version)) {
// cache is dropped.

auto cache_version = out_preds->version;
// We can preserve the cache only when:
// - prediction is unweighted
// - prediction starts from iteration 0, so a cached prefix is usable
// - the requested range does not move backwards past the cached version
auto preserve_cache = tree_weights == nullptr && layer_begin == 0 &&
layer_end >= static_cast<bst_layer_t>(cache_version);
// Initialize output when:
// - the cache cannot be reused, or
// - the cache is valid but still empty
auto initialize_output = !preserve_cache || cache_version == 0;
auto prediction_begin = preserve_cache ? cache_version : layer_begin;

if (!preserve_cache) {
out_preds->version = 0;
cache_version = 0;
}
bool reset = false;
if (layer_begin == 0) {
layer_begin = out_preds->version;
} else {
// When begin layer is not 0, the cache is not useful.
reset = true;
}

if (out_preds->predictions.Size() == 0 && p_fmat->Info().num_row_ != 0) {
CHECK_EQ(out_preds->version, 0);
}

auto const& predictor = GetPredictor(is_training, &out_preds->predictions, p_fmat);
if (out_preds->version == 0) {
if (initialize_output) {
// out_preds->Size() can be non-zero as it's initialized here before any
// tree is built at the 0^th iterator.
predictor->InitOutPredictions(p_fmat->Info(), &out_preds->predictions, model_);
}

auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
auto [tree_begin, tree_end] = detail::LayerToTree(model_, prediction_begin, layer_end);
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
if (tree_end > tree_begin) {
predictor->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end);
predictor->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end, tree_weights);
}
if (reset) {

if (!preserve_cache) {
out_preds->version = 0;
} else {
std::uint32_t delta = layer_end - out_preds->version;
out_preds->Update(delta);
out_preds->Update(layer_end - cache_version);
}
}

void GBTree::PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,
bst_layer_t layer_begin, bst_layer_t layer_end) {
// dispatch to const function.
this->PredictBatchImpl(p_fmat, out_preds, is_training, layer_begin, layer_end);
this->PredictBatchImpl(p_fmat, out_preds, is_training, layer_begin, layer_end, nullptr);
}

void GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,
Expand Down Expand Up @@ -646,34 +688,6 @@ void GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,
return cpu_predictor_;
}

/** Increment the prediction on GPU.
*
* \param out_predts Prediction for the whole model.
* \param predts Prediction for current tree.
* \param tree_w Tree weight.
*/
void GPUDartPredictInc(common::Span<float>, common::Span<float>, float, size_t, bst_group_t,
bst_group_t)
#if defined(XGBOOST_USE_CUDA)
; // NOLINT
#else
{
common::AssertGPUSupport();
}
#endif

void GPUDartInplacePredictInc(common::Span<float> /*out_predts*/, common::Span<float> /*predts*/,
float /*tree_w*/, size_t /*n_rows*/,
linalg::TensorView<float const, 1> /*base_score*/,
bst_group_t /*n_groups*/, bst_group_t /*group*/)
#if defined(XGBOOST_USE_CUDA)
; // NOLINT
#else
{
common::AssertGPUSupport();
}
#endif

class Dart : public GBTree {
public:
explicit Dart(LearnerModelParam const* booster_config, Context const* ctx)
Expand Down Expand Up @@ -737,63 +751,19 @@ class Dart : public GBTree {
out["dart_train_param"] = ToJson(dparam_);
}

// An independent const function to make sure it's thread safe.
void PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* p_out_preds, bool training,
bst_layer_t layer_begin, bst_layer_t layer_end) const {
CHECK(!this->model_.learner_model_param->IsVectorLeaf()) << "dart" << MTNotImplemented();
auto& predictor = this->GetPredictor(training, &p_out_preds->predictions, p_fmat);
CHECK(predictor);
predictor->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions, model_);
p_out_preds->version = 0;
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
auto n_groups = model_.learner_model_param->num_output_group;

PredictionCacheEntry predts; // temporary storage for prediction
if (!ctx_->IsCPU()) {
predts.predictions.SetDevice(ctx_->Device());
}
predts.predictions.Resize(p_fmat->Info().num_row_ * n_groups, 0);
// multi-target is not yet supported.
auto layer_trees = [&]() {
return model_.param.num_parallel_tree * model_.learner_model_param->OutputLength();
};
auto const& h_tree_info = this->model_.tree_info.ConstHostVector();
for (bst_tree_t i = tree_begin; i < tree_end; i += 1) {
if (training && std::binary_search(idx_drop_.cbegin(), idx_drop_.cend(), i)) {
continue;
}

CHECK_GE(i, p_out_preds->version);
auto version = i / layer_trees();
p_out_preds->version = version;
predts.predictions.Fill(0);
predictor->PredictBatch(p_fmat, &predts, model_, i, i + 1);

// Multiple the weight to output prediction.
auto w = this->weight_drop_.at(i);
auto grp_idx = h_tree_info.at(i);
CHECK_EQ(p_out_preds->predictions.Size(), predts.predictions.Size());

size_t n_rows = p_fmat->Info().num_row_;
if (predts.predictions.Device().IsCUDA()) {
p_out_preds->predictions.SetDevice(predts.predictions.Device());
GPUDartPredictInc(p_out_preds->predictions.DeviceSpan(), predts.predictions.DeviceSpan(), w,
n_rows, n_groups, grp_idx);
} else {
auto& h_out_predts = p_out_preds->predictions.HostVector();
auto& h_predts = predts.predictions.ConstHostVector();
common::ParallelFor(p_fmat->Info().num_row_, ctx_->Threads(), [&](auto ridx) {
const size_t offset = ridx * n_groups + grp_idx;
h_out_predts[offset] += (h_predts[offset] * w);
});
}
}
}

void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* p_out_preds, bool training,
bst_layer_t layer_begin, bst_layer_t layer_end) override {
DropTrees(training);
this->PredictBatchImpl(p_fmat, p_out_preds, training, layer_begin, layer_end);
auto const* tree_weights = &weight_drop_;
std::vector<float> dropped_weights;
if (training && !idx_drop_.empty()) {
dropped_weights = weight_drop_;
for (auto idx : idx_drop_) {
dropped_weights.at(idx) = 0.0f;
}
tree_weights = &dropped_weights;
}
this->PredictBatchImpl(p_fmat, p_out_preds, training, layer_begin, layer_end, tree_weights);
}

void InplacePredict(std::shared_ptr<DMatrix> p_fmat, float missing,
Expand All @@ -808,7 +778,8 @@ class Dart : public GBTree {
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_fmat);
CHECK(proxy) << error::InplacePredictProxy();
auto p_fmat = data::CreateDMatrixFromProxy(ctx_, proxy, missing);
this->PredictBatchImpl(p_fmat.get(), p_out_preds, false, layer_begin, layer_end);
this->PredictBatchImpl(p_fmat.get(), p_out_preds, false, layer_begin, layer_end,
&weight_drop_);
return;
}

Expand Down
3 changes: 2 additions & 1 deletion src/gbm/gbtree.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ class GBTree : public GradientBooster {
}

void PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,
bst_layer_t layer_begin, bst_layer_t layer_end) const;
bst_layer_t layer_begin, bst_layer_t layer_end,
std::vector<float> const* tree_weights = nullptr) const;

void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool training,
bst_layer_t layer_begin, bst_layer_t layer_end) override;
Expand Down
Loading
Loading