warpgbm 0.1.24__tar.gz → 0.1.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warpgbm-0.1.24/warpgbm.egg-info → warpgbm-0.1.26}/PKG-INFO +10 -1
- {warpgbm-0.1.24 → warpgbm-0.1.26}/README.md +9 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/pyproject.toml +1 -1
- {warpgbm-0.1.24 → warpgbm-0.1.26}/tests/test_fit_predict_corr.py +1 -0
- warpgbm-0.1.26/version.txt +1 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/core.py +35 -18
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/predict.cu +20 -21
- {warpgbm-0.1.24 → warpgbm-0.1.26/warpgbm.egg-info}/PKG-INFO +10 -1
- warpgbm-0.1.24/version.txt +0 -1
- {warpgbm-0.1.24 → warpgbm-0.1.26}/LICENSE +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/MANIFEST.in +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/setup.cfg +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/setup.py +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/tests/__init__.py +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/tests/numerai_test.py +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/__init__.py +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/__init__.py +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/best_split_kernel.cu +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/binner.cu +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/histogram_kernel.cu +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/node_kernel.cpp +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm.egg-info/SOURCES.txt +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm.egg-info/dependency_links.txt +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm.egg-info/requires.txt +0 -0
- {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warpgbm
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.26
|
4
4
|
Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
|
5
5
|
License: GNU GENERAL PUBLIC LICENSE
|
6
6
|
Version 3, 29 June 2007
|
@@ -877,6 +877,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
|
|
877
877
|
- `threads_per_block`: CUDA threads per block (default: 32)
|
878
878
|
- `rows_per_thread`: Number of training rows processed per thread (default: 4)
|
879
879
|
- `L2_reg`: L2 regularizer (default: 1e-6)
|
880
|
+
- `colsample_bytree`: Proportion of features to subsample to grow each tree (default: 1)
|
880
881
|
|
881
882
|
### Methods:
|
882
883
|
```
|
@@ -888,6 +889,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
|
|
888
889
|
y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
|
889
890
|
eval_every_n_trees=None, # const (int) >= 1
|
890
891
|
early_stopping_rounds=None, # const (int) >= 1
|
892
|
+
eval_metric='mse' # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
|
891
893
|
)
|
892
894
|
```
|
893
895
|
Train with optional validation set and early stopping.
|
@@ -918,3 +920,10 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
|
|
918
920
|
|
919
921
|
- Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
|
920
922
|
|
923
|
+
### v0.1.25
|
924
|
+
|
925
|
+
- Added `colsample_bytree` parameter and new test using Numerai data.
|
926
|
+
|
927
|
+
### v0.1.26
|
928
|
+
|
929
|
+
- Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.
|
@@ -189,6 +189,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
|
|
189
189
|
- `threads_per_block`: CUDA threads per block (default: 32)
|
190
190
|
- `rows_per_thread`: Number of training rows processed per thread (default: 4)
|
191
191
|
- `L2_reg`: L2 regularizer (default: 1e-6)
|
192
|
+
- `colsample_bytree`: Proportion of features to subsample to grow each tree (default: 1)
|
192
193
|
|
193
194
|
### Methods:
|
194
195
|
```
|
@@ -200,6 +201,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
|
|
200
201
|
y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
|
201
202
|
eval_every_n_trees=None, # const (int) >= 1
|
202
203
|
early_stopping_rounds=None, # const (int) >= 1
|
204
|
+
eval_metric='mse' # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
|
203
205
|
)
|
204
206
|
```
|
205
207
|
Train with optional validation set and early stopping.
|
@@ -230,3 +232,10 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
|
|
230
232
|
|
231
233
|
- Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
|
232
234
|
|
235
|
+
### v0.1.25
|
236
|
+
|
237
|
+
- Added `colsample_bytree` parameter and new test using Numerai data.
|
238
|
+
|
239
|
+
### v0.1.26
|
240
|
+
|
241
|
+
- Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.
|
@@ -0,0 +1 @@
|
|
1
|
+
0.1.26
|
@@ -138,7 +138,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
138
138
|
)
|
139
139
|
|
140
140
|
def validate_fit_params(
|
141
|
-
self, X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds
|
141
|
+
self, X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds, eval_metric
|
142
142
|
):
|
143
143
|
# ─── Required: X and y ───
|
144
144
|
if not isinstance(X, np.ndarray) or not isinstance(y, np.ndarray):
|
@@ -206,6 +206,11 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
206
206
|
# No early stopping = set to "never trigger"
|
207
207
|
early_stopping_rounds = self.n_estimators + 1
|
208
208
|
|
209
|
+
if eval_metric not in ["mse", "corr"]:
|
210
|
+
raise ValueError(
|
211
|
+
f"Invalid eval_metric: {eval_metric}. Choose 'mse' or 'corr'."
|
212
|
+
)
|
213
|
+
|
209
214
|
return early_stopping_rounds # May have been defaulted here
|
210
215
|
|
211
216
|
def fit(
|
@@ -217,9 +222,10 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
217
222
|
y_eval=None,
|
218
223
|
eval_every_n_trees=None,
|
219
224
|
early_stopping_rounds=None,
|
225
|
+
eval_metric = "mse",
|
220
226
|
):
|
221
227
|
early_stopping_rounds = self.validate_fit_params(
|
222
|
-
X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds
|
228
|
+
X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds, eval_metric
|
223
229
|
)
|
224
230
|
|
225
231
|
if era_id is None:
|
@@ -242,10 +248,11 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
242
248
|
|
243
249
|
# ─── Optional Eval Set ───
|
244
250
|
if X_eval is not None and y_eval is not None:
|
245
|
-
self.bin_indices_eval = self.
|
251
|
+
self.bin_indices_eval = self.bin_inference_data(X_eval)
|
246
252
|
self.Y_gpu_eval = torch.from_numpy(y_eval).to(torch.float32).to(self.device)
|
247
253
|
self.eval_every_n_trees = eval_every_n_trees
|
248
254
|
self.early_stopping_rounds = early_stopping_rounds
|
255
|
+
self.eval_metric = eval_metric
|
249
256
|
else:
|
250
257
|
self.bin_indices_eval = None
|
251
258
|
self.Y_gpu_eval = None
|
@@ -379,7 +386,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
379
386
|
self.gradients[node_indices] += self.learning_rate * leaf_value
|
380
387
|
return {"leaf_value": leaf_value.item(), "samples": parent_size}
|
381
388
|
|
382
|
-
split_mask = self.
|
389
|
+
split_mask = self.bin_indices[node_indices, self.feat_indices_tree[local_feature]] <= best_bin
|
383
390
|
left_indices = node_indices[split_mask]
|
384
391
|
right_indices = node_indices[~split_mask]
|
385
392
|
|
@@ -388,13 +395,15 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
388
395
|
|
389
396
|
if left_size <= right_size:
|
390
397
|
grad_hist_left, hess_hist_left = self.compute_histograms(
|
391
|
-
self.
|
398
|
+
self.bin_indices.index_select(0, left_indices).index_select(1, self.feat_indices_tree)
|
399
|
+
, self.residual[left_indices]
|
392
400
|
)
|
393
401
|
grad_hist_right = gradient_histogram - grad_hist_left
|
394
402
|
hess_hist_right = hessian_histogram - hess_hist_left
|
395
403
|
else:
|
396
404
|
grad_hist_right, hess_hist_right = self.compute_histograms(
|
397
|
-
self.
|
405
|
+
self.bin_indices.index_select(0, right_indices).index_select(1, self.feat_indices_tree)
|
406
|
+
, self.residual[right_indices]
|
398
407
|
)
|
399
408
|
grad_hist_left = gradient_histogram - grad_hist_right
|
400
409
|
hess_hist_left = hessian_histogram - hess_hist_right
|
@@ -413,25 +422,33 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
413
422
|
"left": left_child,
|
414
423
|
"right": right_child,
|
415
424
|
}
|
425
|
+
|
426
|
+
def get_eval_metric(self, y_true, y_pred):
|
427
|
+
if self.eval_metric == "mse":
|
428
|
+
return ((y_true - y_pred) ** 2).mean().item()
|
429
|
+
elif self.eval_metric == "corr":
|
430
|
+
return 1 - torch.corrcoef(torch.vstack([y_true, y_pred]))[0, 1].item()
|
431
|
+
else:
|
432
|
+
raise ValueError(f"Invalid eval_metric: {self.eval_metric}.")
|
416
433
|
|
417
434
|
def compute_eval(self, i):
|
418
435
|
if self.eval_every_n_trees == None:
|
419
436
|
return
|
437
|
+
|
438
|
+
train_loss = ((self.Y_gpu - self.gradients) ** 2).mean().item()
|
439
|
+
self.training_loss.append(train_loss)
|
420
440
|
|
421
441
|
if i % self.eval_every_n_trees == 0:
|
422
442
|
eval_preds = self.predict_binned(self.bin_indices_eval)
|
423
|
-
eval_loss = (
|
443
|
+
eval_loss = self.get_eval_metric( self.Y_gpu_eval, eval_preds )
|
424
444
|
self.eval_loss.append(eval_loss)
|
425
445
|
|
426
|
-
train_loss = ((self.Y_gpu - self.gradients) ** 2).mean().item()
|
427
|
-
self.training_loss.append(train_loss)
|
428
|
-
|
429
446
|
if len(self.eval_loss) > self.early_stopping_rounds:
|
430
|
-
if self.eval_loss[-self.early_stopping_rounds] < self.eval_loss[-1]:
|
447
|
+
if self.eval_loss[-(self.early_stopping_rounds+1)] < self.eval_loss[-1]:
|
431
448
|
self.stop = True
|
432
449
|
|
433
450
|
print(
|
434
|
-
f"🌲 Tree {i+1}/{self.n_estimators} | Train MSE: {train_loss:.6f} | Eval
|
451
|
+
f"🌲 Tree {i+1}/{self.n_estimators} | Train MSE: {train_loss:.6f} | Eval {self.eval_metric}: {eval_loss:.6f}"
|
435
452
|
)
|
436
453
|
|
437
454
|
del eval_preds, eval_loss, train_loss
|
@@ -445,7 +462,6 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
445
462
|
k = max(1, int(self.colsample_bytree * self.num_features))
|
446
463
|
else:
|
447
464
|
self.feat_indices_tree = self.feature_indices
|
448
|
-
self.bin_indices_tree = self.bin_indices
|
449
465
|
|
450
466
|
for i in range(self.n_estimators):
|
451
467
|
self.residual = self.Y_gpu - self.gradients
|
@@ -454,10 +470,9 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
454
470
|
self.feat_indices_tree = torch.randperm(
|
455
471
|
self.num_features, device=self.device
|
456
472
|
)[:k]
|
457
|
-
self.bin_indices_tree = self.bin_indices[:, self.feat_indices_tree]
|
458
473
|
|
459
474
|
self.root_gradient_histogram, self.root_hessian_histogram = (
|
460
|
-
self.compute_histograms(self.
|
475
|
+
self.compute_histograms(self.bin_indices[:, self.feat_indices_tree], self.residual)
|
461
476
|
)
|
462
477
|
|
463
478
|
tree = self.grow_tree(
|
@@ -493,7 +508,6 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
493
508
|
|
494
509
|
def predict_binned(self, bin_indices):
|
495
510
|
num_samples = bin_indices.size(0)
|
496
|
-
|
497
511
|
tree_tensor = torch.stack(
|
498
512
|
[
|
499
513
|
self.flatten_tree(tree, max_nodes=2 ** (self.max_depth + 1))
|
@@ -508,8 +522,8 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
508
522
|
)
|
509
523
|
|
510
524
|
return out
|
511
|
-
|
512
|
-
def
|
525
|
+
|
526
|
+
def bin_inference_data(self, X_np):
|
513
527
|
is_integer_type = np.issubdtype(X_np.dtype, np.integer)
|
514
528
|
|
515
529
|
if is_integer_type and X_np.shape[1] == self.num_features:
|
@@ -528,7 +542,10 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
528
542
|
)
|
529
543
|
else:
|
530
544
|
bin_indices = self.bin_data_with_existing_edges(X_np)
|
545
|
+
return bin_indices
|
531
546
|
|
547
|
+
def predict(self, X_np):
|
548
|
+
bin_indices = self.bin_inference_data(X_np)
|
532
549
|
preds = self.predict_binned(bin_indices).cpu().numpy()
|
533
550
|
del bin_indices
|
534
551
|
return preds
|
@@ -5,23 +5,18 @@
|
|
5
5
|
__global__ void predict_forest_kernel(
|
6
6
|
const int8_t *__restrict__ bin_indices, // [N x F]
|
7
7
|
const float *__restrict__ tree_tensor, // [T x max_nodes x 6]
|
8
|
-
|
8
|
+
int64_t N, int64_t F, int64_t T, int64_t max_nodes,
|
9
9
|
float learning_rate,
|
10
10
|
float *__restrict__ out // [N]
|
11
11
|
)
|
12
12
|
{
|
13
|
-
|
14
|
-
|
13
|
+
int64_t idx = static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x;
|
14
|
+
int64_t total_jobs = N * T;
|
15
15
|
if (idx >= total_jobs)
|
16
16
|
return;
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
// if (i == 0 && t == 0)
|
22
|
-
// {
|
23
|
-
// printf("[DEBUG] Thread (i=%d, t=%d): starting prediction\n", i, t);
|
24
|
-
// }
|
18
|
+
int64_t i = idx % N; // sample index
|
19
|
+
int64_t t = idx / N; // tree index
|
25
20
|
|
26
21
|
const float *tree = tree_tensor + t * max_nodes * 6;
|
27
22
|
|
@@ -35,32 +30,36 @@ __global__ void predict_forest_kernel(
|
|
35
30
|
atomicAdd(&out[i], learning_rate * val);
|
36
31
|
return;
|
37
32
|
}
|
33
|
+
|
38
34
|
int feat = static_cast<int>(tree[node_id * 6 + 0]);
|
39
35
|
int split_bin = static_cast<int>(tree[node_id * 6 + 1]);
|
40
36
|
int left_id = static_cast<int>(tree[node_id * 6 + 2]);
|
41
37
|
int right_id = static_cast<int>(tree[node_id * 6 + 3]);
|
42
38
|
|
43
|
-
|
39
|
+
// prevent overflow
|
40
|
+
int64_t bin_idx = i * F + feat;
|
41
|
+
int8_t bin = bin_indices[bin_idx];
|
42
|
+
|
44
43
|
node_id = (bin <= split_bin) ? left_id : right_id;
|
45
|
-
// printf("sample %d, tree %d, feat %d, bin %d, split %d → %s\n", i, t, feat, bin, split_bin, (bin <= split_bin ? "L" : "R"));
|
46
44
|
}
|
47
45
|
}
|
48
46
|
|
47
|
+
|
49
48
|
void predict_with_forest(
|
50
|
-
const at::Tensor &bin_indices,
|
51
|
-
const at::Tensor &tree_tensor,
|
49
|
+
const at::Tensor &bin_indices,
|
50
|
+
const at::Tensor &tree_tensor,
|
52
51
|
float learning_rate,
|
53
|
-
at::Tensor &out
|
52
|
+
at::Tensor &out
|
54
53
|
)
|
55
54
|
{
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
55
|
+
int64_t N = bin_indices.size(0);
|
56
|
+
int64_t F = bin_indices.size(1);
|
57
|
+
int64_t T = tree_tensor.size(0);
|
58
|
+
int64_t max_nodes = tree_tensor.size(1);
|
60
59
|
|
61
|
-
|
60
|
+
int64_t total_jobs = N * T;
|
62
61
|
int threads_per_block = 256;
|
63
|
-
|
62
|
+
int64_t blocks = (total_jobs + threads_per_block - 1) / threads_per_block;
|
64
63
|
|
65
64
|
predict_forest_kernel<<<blocks, threads_per_block>>>(
|
66
65
|
bin_indices.data_ptr<int8_t>(),
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warpgbm
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.26
|
4
4
|
Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
|
5
5
|
License: GNU GENERAL PUBLIC LICENSE
|
6
6
|
Version 3, 29 June 2007
|
@@ -877,6 +877,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
|
|
877
877
|
- `threads_per_block`: CUDA threads per block (default: 32)
|
878
878
|
- `rows_per_thread`: Number of training rows processed per thread (default: 4)
|
879
879
|
- `L2_reg`: L2 regularizer (default: 1e-6)
|
880
|
+
- `colsample_bytree`: Proportion of features to subsample to grow each tree (default: 1)
|
880
881
|
|
881
882
|
### Methods:
|
882
883
|
```
|
@@ -888,6 +889,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
|
|
888
889
|
y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
|
889
890
|
eval_every_n_trees=None, # const (int) >= 1
|
890
891
|
early_stopping_rounds=None, # const (int) >= 1
|
892
|
+
eval_metric='mse' # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
|
891
893
|
)
|
892
894
|
```
|
893
895
|
Train with optional validation set and early stopping.
|
@@ -918,3 +920,10 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
|
|
918
920
|
|
919
921
|
- Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
|
920
922
|
|
923
|
+
### v0.1.25
|
924
|
+
|
925
|
+
- Added `colsample_bytree` parameter and new test using Numerai data.
|
926
|
+
|
927
|
+
### v0.1.26
|
928
|
+
|
929
|
+
- Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.
|
warpgbm-0.1.24/version.txt
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.1.24
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|