|
9 | 9 | import ray |
10 | 10 | from ray.exceptions import RayActorError, RayTaskError |
11 | 11 |
|
12 | | -from xgboost_ray import RayParams, train, RayDMatrix, predict |
| 12 | +from xgboost_ray import RayParams, train, RayDMatrix, predict, RayShardingMode |
13 | 13 | from xgboost_ray.main import RayXGBoostTrainingError |
14 | 14 | from xgboost_ray.callback import DistributedCallback |
15 | 15 | from xgboost_ray.tests.utils import get_num_trees |
@@ -134,20 +134,58 @@ def testHalfTraining(self): |
134 | 134 | pred_test = bst.predict(test_X) |
135 | 135 | self.assertSequenceEqual(test_y_second, list(pred_test)) |
136 | 136 |
|
137 | | - def testJointTraining(self): |
| 137 | + def _testJointTraining(self, |
| 138 | + sharding=RayShardingMode.INTERLEAVED, |
| 139 | + softprob=False): |
138 | 140 | """Train with Ray. The data will be split, but the trees |
139 | 141 | should be combined together and find the true model.""" |
140 | | - ray.init(num_cpus=2, num_gpus=0) |
| 142 | + params = self.params.copy() |
| 143 | + if softprob: |
| 144 | + params["objective"] = "multi:softprob" |
141 | 145 |
|
142 | 146 | bst = train( |
143 | | - self.params, |
144 | | - RayDMatrix(self.x, self.y), |
| 147 | + params, |
| 148 | + RayDMatrix(self.x, self.y, sharding=sharding), |
145 | 149 | ray_params=RayParams(num_actors=2)) |
146 | 150 |
|
147 | 151 | x_mat = xgb.DMatrix(self.x) |
148 | 152 | pred_y = bst.predict(x_mat) |
| 153 | + if softprob: |
| 154 | + pred_y = np.argmax(pred_y, axis=1) |
| 155 | + pred_y = pred_y.astype(int) |
149 | 156 | self.assertSequenceEqual(list(self.y), list(pred_y)) |
150 | 157 |
|
| 158 | + x_mat = RayDMatrix(self.x, sharding=sharding) |
| 159 | + pred_y = predict(bst, x_mat, ray_params=RayParams(num_actors=2)) |
| 160 | + if softprob: |
| 161 | + pred_y = np.argmax(pred_y, axis=1) |
| 162 | + pred_y = pred_y.astype(int) |
| 163 | + self.assertSequenceEqual(list(self.y), list(pred_y)) |
| 164 | + |
| 165 | + # try on an odd number of rows |
| 166 | + bst = train( |
| 167 | + params, |
| 168 | + RayDMatrix(self.x[:-1], self.y[:-1], sharding=sharding), |
| 169 | + ray_params=RayParams(num_actors=2)) |
| 170 | + |
| 171 | + x_mat = RayDMatrix(self.x[:-1], sharding=sharding) |
| 172 | + pred_y = predict(bst, x_mat, ray_params=RayParams(num_actors=2)) |
| 173 | + if softprob: |
| 174 | + pred_y = np.argmax(pred_y, axis=1) |
| 175 | + pred_y = pred_y.astype(int) |
| 176 | + self.assertSequenceEqual(list(self.y[:-1]), list(pred_y)) |
| 177 | + |
| 178 | + def testJointTrainingInterleaved(self): |
| 179 | + ray.init(num_cpus=2, num_gpus=0) |
| 180 | + self._testJointTraining(sharding=RayShardingMode.INTERLEAVED) |
| 181 | + self._testJointTraining( |
| 182 | + sharding=RayShardingMode.INTERLEAVED, softprob=True) |
| 183 | + |
| 184 | + def testJointTrainingBatch(self): |
| 185 | + ray.init(num_cpus=2, num_gpus=0) |
| 186 | + self._testJointTraining(sharding=RayShardingMode.BATCH) |
| 187 | + self._testJointTraining(sharding=RayShardingMode.BATCH, softprob=True) |
| 188 | + |
151 | 189 | def testTrainPredict(self, |
152 | 190 | init=True, |
153 | 191 | remote=None, |
|
0 commit comments