1717#include " xgboost/data.h" // for SparsePage, SortedCSCPage
1818
1919#if defined(XGBOOST_MM_PREFETCH_PRESENT)
20- #include < xmmintrin.h>
21- #define PREFETCH_READ_T0 (addr ) _mm_prefetch(reinterpret_cast <const char *>(addr), _MM_HINT_T0)
20+ #include < xmmintrin.h>
21+ #define PREFETCH_READ_T0 (addr ) _mm_prefetch(reinterpret_cast <const char *>(addr), _MM_HINT_T0)
2222#elif defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
23- #define PREFETCH_READ_T0 (addr ) __builtin_prefetch(reinterpret_cast <const char *>(addr), 0 , 3 )
23+ #define PREFETCH_READ_T0 (addr ) __builtin_prefetch(reinterpret_cast <const char *>(addr), 0 , 3 )
2424#else // no SW pre-fetching available; PREFETCH_READ_T0 is no-op
25- #define PREFETCH_READ_T0 (addr ) do {} while (0 )
25+ #define PREFETCH_READ_T0 (addr ) \
26+ do { \
27+ } while (0 )
2628#endif // defined(XGBOOST_MM_PREFETCH_PRESENT)
2729
2830namespace xgboost ::common {
29- HistogramCuts::HistogramCuts () {
30- cut_ptrs_.HostVector ().emplace_back (0 );
31- }
31+ HistogramCuts::HistogramCuts (bst_feature_t n_features)
32+ : cut_ptrs_(static_cast <std::size_t >(n_features) + 1 , 0 ) {}
3233
3334void HistogramCuts::Save (common::AlignedFileWriteStream *fo) const {
3435 auto const &ptrs = this ->Ptrs ();
3536 CHECK_LE (Span{ptrs}.size_bytes (), WriteVec (fo, ptrs));
3637 auto const &vals = this ->Values ();
3738 CHECK_LE (Span{vals}.size_bytes (), WriteVec (fo, vals));
38- auto const &mins = this ->MinValues ();
39- CHECK_LE (Span{mins}.size_bytes (), WriteVec (fo, mins));
4039 CHECK_GE (fo->Write (has_categorical_), sizeof (has_categorical_));
4140 CHECK_GE (fo->Write (max_cat_), sizeof (max_cat_));
4241}
4342
4443[[nodiscard]] HistogramCuts *HistogramCuts::Load (common::AlignedResourceReadStream *fi) {
45- auto p_cuts = new HistogramCuts;
44+ auto p_cuts = new HistogramCuts{ 0 } ;
4645 CHECK (ReadVec (fi, &p_cuts->cut_ptrs_ .HostVector ()));
4746 CHECK (ReadVec (fi, &p_cuts->cut_values_ .HostVector ()));
48- CHECK (ReadVec (fi, &p_cuts->min_vals_ .HostVector ()));
4947 CHECK (fi->Read (&p_cuts->has_categorical_ ));
5048 CHECK (fi->Read (&p_cuts->max_cat_ ));
5149 return p_cuts;
5250}
5351
5452HistogramCuts SketchOnDMatrix (Context const *ctx, DMatrix *m, bst_bin_t max_bins, bool use_sorted,
5553 Span<float const > hessian) {
56- HistogramCuts out;
5754 auto const &info = m->Info ();
5855 auto n_threads = ctx->Threads ();
5956 std::vector<bst_idx_t > reduced (info.num_col_ , 0 );
@@ -73,20 +70,15 @@ HistogramCuts SketchOnDMatrix(Context const *ctx, DMatrix *m, bst_bin_t max_bins
7370 for (auto const &page : m->GetBatches <SparsePage>()) {
7471 container.PushRowPage (page, info, hessian);
7572 }
76- container.MakeCuts (ctx, m->Info (), &out );
73+ return container.MakeCuts (ctx, m->Info ());
7774 } else {
78- SortedSketchContainer container{ctx,
79- max_bins,
80- m->Info ().feature_types .ConstHostSpan (),
81- reduced,
75+ SortedSketchContainer container{ctx, max_bins, m->Info ().feature_types .ConstHostSpan (), reduced,
8276 HostSketchContainer::UseGroup (info)};
8377 for (auto const &page : m->GetBatches <SortedCSCPage>(ctx)) {
8478 container.PushColPage (page, info, hessian);
8579 }
86- container.MakeCuts (ctx, m->Info (), &out );
80+ return container.MakeCuts (ctx, m->Info ());
8781 }
88-
89- return out;
9082}
9183
9284/* !
@@ -118,9 +110,9 @@ void CopyHist(GHistRow dst, const GHistRow src, size_t begin, size_t end) {
118110 */
119111void SubtractionHist (GHistRow dst, const GHistRow src1, const GHistRow src2, size_t begin,
120112 size_t end) {
121- double * pdst = reinterpret_cast <double *>(dst.data ());
122- const double * psrc1 = reinterpret_cast <const double *>(src1.data ());
123- const double * psrc2 = reinterpret_cast <const double *>(src2.data ());
113+ double * pdst = reinterpret_cast <double *>(dst.data ());
114+ const double * psrc1 = reinterpret_cast <const double *>(src1.data ());
115+ const double * psrc2 = reinterpret_cast <const double *>(src2.data ());
124116
125117 for (size_t i = 2 * begin; i < 2 * end; ++i) {
126118 pdst[i] = psrc1[i] - psrc2[i];
@@ -134,13 +126,10 @@ struct Prefetch {
134126
135127 private:
136128 static constexpr size_t kNoPrefetchSize =
137- kPrefetchOffset + kCacheLineSize /
138- sizeof (decltype (GHistIndexMatrix::row_ptr)::value_type);
129+ kPrefetchOffset + kCacheLineSize / sizeof (decltype (GHistIndexMatrix::row_ptr)::value_type);
139130
140131 public:
141- static size_t NoPrefetchSize (size_t rows) {
142- return std::min (rows, kNoPrefetchSize );
143- }
132+ static size_t NoPrefetchSize (size_t rows) { return std::min (rows, kNoPrefetchSize ); }
144133
145134 template <typename T>
146135 static constexpr size_t GetPrefetchStep () {
@@ -156,9 +145,7 @@ struct RuntimeFlags {
156145 const BinTypeSize bin_type_size;
157146};
158147
159- template <bool _any_missing,
160- bool _first_page = false ,
161- bool _read_by_column = false ,
148+ template <bool _any_missing, bool _first_page = false , bool _read_by_column = false ,
162149 typename BinIdxTypeName = uint8_t >
163150class GHistBuildingManager {
164151 public:
@@ -192,7 +179,7 @@ class GHistBuildingManager {
192179 * and forward the call there.
193180 */
194181 template <typename Fn>
195- static void DispatchAndExecute (const RuntimeFlags& flags, Fn&& fn) {
182+ static void DispatchAndExecute (const RuntimeFlags & flags, Fn && fn) {
196183 if (flags.first_page != kFirstPage ) {
197184 SetFirstPage<true >::Type::DispatchAndExecute (flags, std::forward<Fn>(fn));
198185 } else if (flags.read_by_column != kReadByColumn ) {
@@ -247,22 +234,19 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair, Span<bst_idx_t cons
247234 // to work with gradient pairs as a singe row FP array
248235
249236 for (std::size_t i = 0 ; i < size; ++i) {
250- const size_t icol_start =
251- kAnyMissing ? get_row_ptr (rid[i]) : get_rid (rid[i]) * n_features;
252- const size_t icol_end =
253- kAnyMissing ? get_row_ptr (rid[i] + 1 ) : icol_start + n_features;
237+ const size_t icol_start = kAnyMissing ? get_row_ptr (rid[i]) : get_rid (rid[i]) * n_features;
238+ const size_t icol_end = kAnyMissing ? get_row_ptr (rid[i] + 1 ) : icol_start + n_features;
254239
255240 const size_t row_size = icol_end - icol_start;
256241 const size_t idx_gh = two * rid[i];
257242
258243 if (do_prefetch) {
259244 const size_t icol_start_prefetch =
260- kAnyMissing
261- ? get_row_ptr (rid[i + Prefetch::kPrefetchOffset ])
262- : get_rid (rid[i + Prefetch::kPrefetchOffset ]) * n_features;
263- const size_t icol_end_prefetch =
264- kAnyMissing ? get_row_ptr (rid[i + Prefetch::kPrefetchOffset ] + 1 )
265- : icol_start_prefetch + n_features;
245+ kAnyMissing ? get_row_ptr (rid[i + Prefetch::kPrefetchOffset ])
246+ : get_rid (rid[i + Prefetch::kPrefetchOffset ]) * n_features;
247+ const size_t icol_end_prefetch = kAnyMissing
248+ ? get_row_ptr (rid[i + Prefetch::kPrefetchOffset ] + 1 )
249+ : icol_start_prefetch + n_features;
266250
267251 PREFETCH_READ_T0 (p_gpair + two * rid[i + Prefetch::kPrefetchOffset ]);
268252 for (size_t j = icol_start_prefetch; j < icol_end_prefetch;
@@ -301,7 +285,9 @@ void ColsWiseBuildHistKernel(Span<GradientPair const> gpair, Span<bst_idx_t cons
301285 auto get_row_ptr = [&](bst_idx_t ridx) {
302286 return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
303287 };
304- auto get_rid = [&](bst_idx_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
288+ auto get_rid = [&](bst_idx_t ridx) {
289+ return kFirstPage ? ridx : (ridx - base_rowid);
290+ };
305291
306292 const size_t n_features = gmat.cut .Ptrs ().size () - 1 ;
307293 const size_t n_columns = n_features;
@@ -314,10 +300,8 @@ void ColsWiseBuildHistKernel(Span<GradientPair const> gpair, Span<bst_idx_t cons
314300 const uint32_t offset = kAnyMissing ? 0 : offsets[cid];
315301 for (size_t i = 0 ; i < size; ++i) {
316302 const size_t row_id = rid[i];
317- const size_t icol_start =
318- kAnyMissing ? get_row_ptr (row_id) : get_rid (row_id) * n_features;
319- const size_t icol_end =
320- kAnyMissing ? get_row_ptr (rid[i] + 1 ) : icol_start + n_features;
303+ const size_t icol_start = kAnyMissing ? get_row_ptr (row_id) : get_rid (row_id) * n_features;
304+ const size_t icol_end = kAnyMissing ? get_row_ptr (rid[i] + 1 ) : icol_start + n_features;
321305
322306 if (cid < icol_end - icol_start) {
323307 const BinIdxType *gr_index_local = gradient_index + icol_start;
@@ -327,7 +311,7 @@ void ColsWiseBuildHistKernel(Span<GradientPair const> gpair, Span<bst_idx_t cons
327311 const size_t idx_gh = two * row_id;
328312 // The trick with pgh_t buffer helps the compiler to generate faster binary.
329313 const float pgh_t [] = {pgh[idx_gh], pgh[idx_gh + 1 ]};
330- *(hist_local) += pgh_t [0 ];
314+ *(hist_local) += pgh_t [0 ];
331315 *(hist_local + 1 ) += pgh_t [1 ];
332316 }
333317 }
@@ -369,7 +353,7 @@ void BuildHistDispatch(Span<GradientPair const> gpair, Span<bst_idx_t const> row
369353template <bool any_missing>
370354void BuildHist (Span<GradientPair const > gpair, Span<bst_idx_t const > row_indices,
371355 const GHistIndexMatrix &gmat, GHistRow hist, bool read_by_column) {
372- bool first_page = gmat.base_rowid == 0 ;;
356+ bool first_page = gmat.base_rowid == 0 ;
373357 auto bin_type_size = gmat.index .GetBinTypeSize ();
374358
375359 GHistBuildingManager<any_missing>::DispatchAndExecute (
@@ -380,10 +364,8 @@ void BuildHist(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices
380364}
381365
382366template void BuildHist<true >(Span<GradientPair const > gpair, Span<bst_idx_t const > row_indices,
383- const GHistIndexMatrix &gmat, GHistRow hist,
384- bool read_by_column);
367+ const GHistIndexMatrix &gmat, GHistRow hist, bool read_by_column);
385368
386369template void BuildHist<false >(Span<GradientPair const > gpair, Span<bst_idx_t const > row_indices,
387- const GHistIndexMatrix &gmat, GHistRow hist,
388- bool read_by_column);
370+ const GHistIndexMatrix &gmat, GHistRow hist, bool read_by_column);
389371} // namespace xgboost::common
0 commit comments