Skip to content

Commit 76c4407

Browse files
committed
fix sometimes python cachelist problem
1 parent ccde443 commit 76c4407

File tree

4 files changed

+27
-11
lines changed

4 files changed

+27
-11
lines changed

python/xgboost_python.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ namespace xgboost{
112112
private:
113113
bool init_trainer, init_model;
114114
public:
115-
Booster(const std::vector<const regrank::DMatrix *> mats){
115+
Booster(const std::vector<regrank::DMatrix *> mats){
116116
silent = 1;
117117
init_trainer = false;
118118
init_model = false;
@@ -223,7 +223,7 @@ extern "C"{
223223

224224
// xgboost implementation
225225
void *XGBoosterCreate( void *dmats[], size_t len ){
226-
std::vector<const xgboost::regrank::DMatrix*> mats;
226+
std::vector<xgboost::regrank::DMatrix*> mats;
227227
for( size_t i = 0; i < len; ++i ){
228228
DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
229229
dtr->CheckInit();

regrank/xgboost_regrank.h

+17-7
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ namespace xgboost{
3131
* \brief a regression booter associated with training and evaluating data
3232
* \param mats array of pointers to matrix whose prediction result need to be cached
3333
*/
34-
RegRankBoostLearner(const std::vector<const DMatrix *>& mats){
34+
RegRankBoostLearner(const std::vector<DMatrix *>& mats){
3535
silent = 0;
3636
obj_ = NULL;
3737
name_obj_ = "reg:linear";
@@ -45,7 +45,7 @@ namespace xgboost{
4545
* data matrices to continue training otherwise it will cause error
4646
* \param mats array of pointers to matrix whose prediction result need to be cached
4747
*/
48-
inline void SetCacheData(const std::vector<const DMatrix *>& mats){
48+
inline void SetCacheData(const std::vector<DMatrix *>& mats){
4949
// estimate feature bound
5050
int num_feature = 0;
5151
// assign buffer index
@@ -58,7 +58,9 @@ namespace xgboost{
5858
if( mats[i] == mats[j] ) dupilicate = true;
5959
}
6060
if( dupilicate ) continue;
61-
cache_.push_back( CacheEntry( mats[i], buffer_size ) );
61+
// set mats[i]'s cache learner pointer to this
62+
mats[i]->cache_learner_ptr_ = this;
63+
cache_.push_back( CacheEntry( mats[i], buffer_size, mats[i]->Size() ) );
6264
buffer_size += static_cast<unsigned>(mats[i]->Size());
6365
num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol()));
6466
}
@@ -342,17 +344,25 @@ namespace xgboost{
342344
private:
343345
struct CacheEntry{
344346
const DMatrix *mat_;
345-
int buffer_offset_;
346-
CacheEntry(const DMatrix *mat, int buffer_offset)
347-
:mat_(mat), buffer_offset_(buffer_offset){}
347+
int buffer_offset_;
348+
size_t num_row_;
349+
CacheEntry(const DMatrix *mat, int buffer_offset, size_t num_row)
350+
:mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row){}
348351
};
349352
/*! \brief the entries indicates that we have internal prediction cache */
350353
std::vector<CacheEntry> cache_;
351354
private:
352355
// find internal bufer offset for certain matrix, if not exist, return -1
353356
inline int FindBufferOffset(const DMatrix &mat){
354357
for(size_t i = 0; i < cache_.size(); ++i){
355-
if( cache_[i].mat_ == &mat ) return cache_[i].buffer_offset_;
358+
if( cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this ) {
359+
if( cache_[i].num_row_ == mat.Size() ){
360+
return cache_[i].buffer_offset_;
361+
}else{
362+
fprintf( stderr, "warning: number of rows in input matrix changed as remembered in cachelist, ignore cached results\n" );
363+
fflush( stderr );
364+
}
365+
}
356366
}
357367
return -1;
358368
}

regrank/xgboost_regrank_data.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,15 @@ namespace xgboost{
5252
booster::FMatrixS data;
5353
/*! \brief information fields */
5454
Info info;
55+
/*!
56+
* \brief cache pointer to verify if the data structure is cached in some learner
57+
* this is a bit ugly, we need to have double check verification, so if one side get deleted,
58+
* and some strange re-allocation gets the same pointer we will still be fine
59+
*/
60+
void *cache_learner_ptr_;
5561
public:
5662
/*! \brief default constructor */
57-
DMatrix(void){}
63+
DMatrix(void):cache_learner_ptr_(NULL){}
5864
/*! \brief get the number of instances */
5965
inline size_t Size() const{
6066
return data.NumRow();

regrank/xgboost_regrank_main.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ namespace xgboost{
126126
deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0);
127127
devalall.push_back(deval.back());
128128
}
129-
std::vector<const DMatrix *> dcache(1, &data);
129+
std::vector<DMatrix *> dcache(1, &data);
130130
for( size_t i = 0; i < deval.size(); ++ i){
131131
dcache.push_back( deval[i] );
132132
}

0 commit comments

Comments
 (0)