Skip to content

Commit d8a549e

Browse files
authored
Avoid thread block with sparse data. (#7255)
1 parent ca17f8a commit d8a549e

File tree

6 files changed

+97
-16
lines changed

6 files changed

+97
-16
lines changed

src/predictor/cpu_predictor.cc

+30-15
Original file line numberDiff line numberDiff line change
@@ -253,17 +253,32 @@ class CPUPredictor : public Predictor {
253253
gbm::GBTreeModel const &model, int32_t tree_begin,
254254
int32_t tree_end) const {
255255
const int threads = omp_get_max_threads();
256+
constexpr double kDensityThresh = .5;
257+
size_t total = std::max(p_fmat->Info().num_row_ * p_fmat->Info().num_col_,
258+
static_cast<uint64_t>(1));
259+
double density = static_cast<double>(p_fmat->Info().num_nonzero_) /
260+
static_cast<double>(total);
261+
bool blocked = density > kDensityThresh;
262+
256263
std::vector<RegTree::FVec> feat_vecs;
257-
InitThreadTemp(threads * kBlockOfRowsSize,
264+
InitThreadTemp(threads * (blocked ? kBlockOfRowsSize : 1),
258265
model.learner_model_param->num_feature, &feat_vecs);
259-
for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
266+
for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
260267
CHECK_EQ(out_preds->size(),
261-
p_fmat->Info().num_row_ * model.learner_model_param->num_output_group);
268+
p_fmat->Info().num_row_ *
269+
model.learner_model_param->num_output_group);
262270
size_t constexpr kUnroll = 8;
263-
PredictBatchByBlockOfRowsKernel<SparsePageView<kUnroll>,
264-
kBlockOfRowsSize>(SparsePageView<kUnroll>{&batch},
265-
out_preds, model, tree_begin,
266-
tree_end, &feat_vecs);
271+
if (blocked) {
272+
PredictBatchByBlockOfRowsKernel<SparsePageView<kUnroll>,
273+
kBlockOfRowsSize>(
274+
SparsePageView<kUnroll>{&batch}, out_preds, model, tree_begin,
275+
tree_end, &feat_vecs);
276+
277+
} else {
278+
PredictBatchByBlockOfRowsKernel<SparsePageView<kUnroll>, 1>(
279+
SparsePageView<kUnroll>{&batch}, out_preds, model, tree_begin,
280+
tree_end, &feat_vecs);
281+
}
267282
}
268283
}
269284

@@ -316,7 +331,7 @@ class CPUPredictor : public Predictor {
316331
tree_end);
317332
}
318333

319-
template <typename Adapter>
334+
template <typename Adapter, size_t kBlockSize>
320335
void DispatchedInplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
321336
const gbm::GBTreeModel &model, float missing,
322337
PredictionCacheEntry *out_preds,
@@ -336,9 +351,9 @@ class CPUPredictor : public Predictor {
336351
std::vector<Entry> workspace(m->NumColumns() * 8 * threads);
337352
auto &predictions = out_preds->predictions.HostVector();
338353
std::vector<RegTree::FVec> thread_temp;
339-
InitThreadTemp(threads * kBlockOfRowsSize,
340-
model.learner_model_param->num_feature, &thread_temp);
341-
PredictBatchByBlockOfRowsKernel<AdapterView<Adapter>, kBlockOfRowsSize>(
354+
InitThreadTemp(threads * kBlockSize, model.learner_model_param->num_feature,
355+
&thread_temp);
356+
PredictBatchByBlockOfRowsKernel<AdapterView<Adapter>, kBlockSize>(
342357
AdapterView<Adapter>(m.get(), missing, common::Span<Entry>{workspace}),
343358
&predictions, model, tree_begin, tree_end, &thread_temp);
344359
}
@@ -348,16 +363,16 @@ class CPUPredictor : public Predictor {
348363
PredictionCacheEntry *out_preds, uint32_t tree_begin,
349364
unsigned tree_end) const override {
350365
if (x.type() == typeid(std::shared_ptr<data::DenseAdapter>)) {
351-
this->DispatchedInplacePredict<data::DenseAdapter>(
366+
this->DispatchedInplacePredict<data::DenseAdapter, kBlockOfRowsSize>(
352367
x, p_m, model, missing, out_preds, tree_begin, tree_end);
353368
} else if (x.type() == typeid(std::shared_ptr<data::CSRAdapter>)) {
354-
this->DispatchedInplacePredict<data::CSRAdapter>(
369+
this->DispatchedInplacePredict<data::CSRAdapter, 1>(
355370
x, p_m, model, missing, out_preds, tree_begin, tree_end);
356371
} else if (x.type() == typeid(std::shared_ptr<data::ArrayAdapter>)) {
357-
this->DispatchedInplacePredict<data::ArrayAdapter> (
372+
this->DispatchedInplacePredict<data::ArrayAdapter, kBlockOfRowsSize> (
358373
x, p_m, model, missing, out_preds, tree_begin, tree_end);
359374
} else if (x.type() == typeid(std::shared_ptr<data::CSRArrayAdapter>)) {
360-
this->DispatchedInplacePredict<data::CSRArrayAdapter> (
375+
this->DispatchedInplacePredict<data::CSRArrayAdapter, 1> (
361376
x, p_m, model, missing, out_preds, tree_begin, tree_end);
362377
} else {
363378
return false;

tests/cpp/common/test_hist_util.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ void TestCategoricalSketch(size_t n, size_t num_categories, int32_t num_bins,
247247
ASSERT_TRUE(is_unique);
248248

249249
x.resize(n_uniques);
250-
for (size_t i = 0; i < n_uniques; ++i) {
250+
for (decltype(n_uniques) i = 0; i < n_uniques; ++i) {
251251
ASSERT_EQ(x[i], values[i]);
252252
}
253253
}

tests/cpp/predictor/test_cpu_predictor.cc

+5
Original file line numberDiff line numberDiff line change
@@ -247,4 +247,9 @@ TEST(CpuPredictor, UpdatePredictionCache) {
247247
TEST(CpuPredictor, LesserFeatures) {
248248
TestPredictionWithLesserFeatures("cpu_predictor");
249249
}
250+
251+
TEST(CpuPredictor, Sparse) {
252+
TestSparsePrediction(0.2, "cpu_predictor");
253+
TestSparsePrediction(0.8, "cpu_predictor");
254+
}
250255
} // namespace xgboost

tests/cpp/predictor/test_gpu_predictor.cu

+5
Original file line numberDiff line numberDiff line change
@@ -256,5 +256,10 @@ TEST(GPUPredictor, PredictLeafBasic) {
256256
ASSERT_EQ(v, 0);
257257
}
258258
}
259+
260+
TEST(GPUPredictor, Sparse) {
261+
TestSparsePrediction(0.2, "gpu_predictor");
262+
TestSparsePrediction(0.8, "gpu_predictor");
263+
}
259264
} // namespace predictor
260265
} // namespace xgboost

tests/cpp/predictor/test_predictor.cc

+54
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "test_predictor.h"
1212

1313
#include "../helpers.h"
14+
#include "../../../src/data/adapter.h"
1415
#include "../../../src/common/io.h"
1516
#include "../../../src/common/categorical.h"
1617
#include "../../../src/common/bitfield.h"
@@ -355,4 +356,57 @@ void TestIterationRange(std::string name) {
355356
ASSERT_EQ(h_sliced, h_range);
356357
}
357358
}
359+
360+
void TestSparsePrediction(float sparsity, std::string predictor) {
361+
size_t constexpr kRows = 512, kCols = 128;
362+
auto Xy = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(true);
363+
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
364+
learner->Configure();
365+
for (size_t i = 0; i < 4; ++i) {
366+
learner->UpdateOneIter(i, Xy);
367+
}
368+
369+
HostDeviceVector<float> sparse_predt;
370+
371+
Json model{Object{}};
372+
learner->SaveModel(&model);
373+
374+
learner.reset(Learner::Create({Xy}));
375+
learner->LoadModel(model);
376+
377+
learner->SetParam("predictor", predictor);
378+
learner->Predict(Xy, false, &sparse_predt, 0, 0);
379+
380+
std::vector<float> with_nan(kRows * kCols, std::numeric_limits<float>::quiet_NaN());
381+
for (auto const& page : Xy->GetBatches<SparsePage>()) {
382+
auto batch = page.GetView();
383+
for (size_t i = 0; i < batch.Size(); ++i) {
384+
auto row = batch[i];
385+
for (auto e : row) {
386+
with_nan[i * kCols + e.index] = e.fvalue;
387+
}
388+
}
389+
}
390+
391+
learner->SetParam("predictor", "cpu_predictor");
392+
// Xcode_12.4 doesn't compile with `std::make_shared`.
393+
auto dense = std::shared_ptr<data::DenseAdapter>(
394+
new data::DenseAdapter(with_nan.data(), kRows, kCols));
395+
HostDeviceVector<float> *p_dense_predt;
396+
learner->InplacePredict(dmlc::any(dense), nullptr, PredictionType::kValue,
397+
std::numeric_limits<float>::quiet_NaN(), &p_dense_predt,
398+
0, 0);
399+
400+
auto const& dense_predt = *p_dense_predt;
401+
if (predictor == "cpu_predictor") {
402+
ASSERT_EQ(dense_predt.HostVector(), sparse_predt.HostVector());
403+
} else {
404+
auto const &h_dense = dense_predt.HostVector();
405+
auto const &h_sparse = sparse_predt.HostVector();
406+
ASSERT_EQ(h_dense.size(), h_sparse.size());
407+
for (size_t i = 0; i < h_dense.size(); ++i) {
408+
ASSERT_FLOAT_EQ(h_dense[i], h_sparse[i]);
409+
}
410+
}
411+
}
358412
} // namespace xgboost

tests/cpp/predictor/test_predictor.h

+2
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ void TestCategoricalPrediction(std::string name);
7070
void TestCategoricalPredictLeaf(StringView name);
7171

7272
void TestIterationRange(std::string name);
73+
74+
void TestSparsePrediction(float sparsity, std::string predictor);
7375
} // namespace xgboost
7476

7577
#endif // XGBOOST_TEST_PREDICTOR_H_

0 commit comments

Comments
 (0)