warpgbm 0.1.25__tar.gz → 0.1.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warpgbm-0.1.25/warpgbm.egg-info → warpgbm-0.1.26}/PKG-INFO +6 -1
- {warpgbm-0.1.25 → warpgbm-0.1.26}/README.md +6 -1
- {warpgbm-0.1.25 → warpgbm-0.1.26}/pyproject.toml +1 -1
- {warpgbm-0.1.25 → warpgbm-0.1.26}/tests/test_fit_predict_corr.py +1 -0
- warpgbm-0.1.26/version.txt +1 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm/core.py +35 -18
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm/cuda/predict.cu +20 -21
- {warpgbm-0.1.25 → warpgbm-0.1.26/warpgbm.egg-info}/PKG-INFO +6 -1
- warpgbm-0.1.25/version.txt +0 -1
- {warpgbm-0.1.25 → warpgbm-0.1.26}/LICENSE +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/MANIFEST.in +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/setup.cfg +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/setup.py +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/tests/__init__.py +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/tests/numerai_test.py +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm/__init__.py +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm/cuda/__init__.py +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm/cuda/best_split_kernel.cu +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm/cuda/binner.cu +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm/cuda/histogram_kernel.cu +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm/cuda/node_kernel.cpp +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm.egg-info/SOURCES.txt +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm.egg-info/dependency_links.txt +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm.egg-info/requires.txt +0 -0
- {warpgbm-0.1.25 → warpgbm-0.1.26}/warpgbm.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warpgbm
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.26
|
4
4
|
Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
|
5
5
|
License: GNU GENERAL PUBLIC LICENSE
|
6
6
|
Version 3, 29 June 2007
|
@@ -889,6 +889,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
|
|
889
889
|
y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
|
890
890
|
eval_every_n_trees=None, # const (int) >= 1
|
891
891
|
early_stopping_rounds=None, # const (int) >= 1
|
892
|
+
eval_metric='mse' # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
|
892
893
|
)
|
893
894
|
```
|
894
895
|
Train with optional validation set and early stopping.
|
@@ -922,3 +923,7 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
|
|
922
923
|
### v0.1.25
|
923
924
|
|
924
925
|
- Added `colsample_bytree` parameter and new test using Numerai data.
|
926
|
+
|
927
|
+
### v0.1.26
|
928
|
+
|
929
|
+
- Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.
|
@@ -201,6 +201,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
|
|
201
201
|
y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
|
202
202
|
eval_every_n_trees=None, # const (int) >= 1
|
203
203
|
early_stopping_rounds=None, # const (int) >= 1
|
204
|
+
eval_metric='mse' # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
|
204
205
|
)
|
205
206
|
```
|
206
207
|
Train with optional validation set and early stopping.
|
@@ -233,4 +234,8 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
|
|
233
234
|
|
234
235
|
### v0.1.25
|
235
236
|
|
236
|
-
- Added `colsample_bytree` parameter and new test using Numerai data.
|
237
|
+
- Added `colsample_bytree` parameter and new test using Numerai data.
|
238
|
+
|
239
|
+
### v0.1.26
|
240
|
+
|
241
|
+
- Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.
|
@@ -0,0 +1 @@
|
|
1
|
+
0.1.26
|
@@ -138,7 +138,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
138
138
|
)
|
139
139
|
|
140
140
|
def validate_fit_params(
|
141
|
-
self, X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds
|
141
|
+
self, X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds, eval_metric
|
142
142
|
):
|
143
143
|
# ─── Required: X and y ───
|
144
144
|
if not isinstance(X, np.ndarray) or not isinstance(y, np.ndarray):
|
@@ -206,6 +206,11 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
206
206
|
# No early stopping = set to "never trigger"
|
207
207
|
early_stopping_rounds = self.n_estimators + 1
|
208
208
|
|
209
|
+
if eval_metric not in ["mse", "corr"]:
|
210
|
+
raise ValueError(
|
211
|
+
f"Invalid eval_metric: {eval_metric}. Choose 'mse' or 'corr'."
|
212
|
+
)
|
213
|
+
|
209
214
|
return early_stopping_rounds # May have been defaulted here
|
210
215
|
|
211
216
|
def fit(
|
@@ -217,9 +222,10 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
217
222
|
y_eval=None,
|
218
223
|
eval_every_n_trees=None,
|
219
224
|
early_stopping_rounds=None,
|
225
|
+
eval_metric = "mse",
|
220
226
|
):
|
221
227
|
early_stopping_rounds = self.validate_fit_params(
|
222
|
-
X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds
|
228
|
+
X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds, eval_metric
|
223
229
|
)
|
224
230
|
|
225
231
|
if era_id is None:
|
@@ -242,10 +248,11 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
242
248
|
|
243
249
|
# ─── Optional Eval Set ───
|
244
250
|
if X_eval is not None and y_eval is not None:
|
245
|
-
self.bin_indices_eval = self.
|
251
|
+
self.bin_indices_eval = self.bin_inference_data(X_eval)
|
246
252
|
self.Y_gpu_eval = torch.from_numpy(y_eval).to(torch.float32).to(self.device)
|
247
253
|
self.eval_every_n_trees = eval_every_n_trees
|
248
254
|
self.early_stopping_rounds = early_stopping_rounds
|
255
|
+
self.eval_metric = eval_metric
|
249
256
|
else:
|
250
257
|
self.bin_indices_eval = None
|
251
258
|
self.Y_gpu_eval = None
|
@@ -379,7 +386,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
379
386
|
self.gradients[node_indices] += self.learning_rate * leaf_value
|
380
387
|
return {"leaf_value": leaf_value.item(), "samples": parent_size}
|
381
388
|
|
382
|
-
split_mask = self.
|
389
|
+
split_mask = self.bin_indices[node_indices, self.feat_indices_tree[local_feature]] <= best_bin
|
383
390
|
left_indices = node_indices[split_mask]
|
384
391
|
right_indices = node_indices[~split_mask]
|
385
392
|
|
@@ -388,13 +395,15 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
388
395
|
|
389
396
|
if left_size <= right_size:
|
390
397
|
grad_hist_left, hess_hist_left = self.compute_histograms(
|
391
|
-
self.
|
398
|
+
self.bin_indices.index_select(0, left_indices).index_select(1, self.feat_indices_tree)
|
399
|
+
, self.residual[left_indices]
|
392
400
|
)
|
393
401
|
grad_hist_right = gradient_histogram - grad_hist_left
|
394
402
|
hess_hist_right = hessian_histogram - hess_hist_left
|
395
403
|
else:
|
396
404
|
grad_hist_right, hess_hist_right = self.compute_histograms(
|
397
|
-
self.
|
405
|
+
self.bin_indices.index_select(0, right_indices).index_select(1, self.feat_indices_tree)
|
406
|
+
, self.residual[right_indices]
|
398
407
|
)
|
399
408
|
grad_hist_left = gradient_histogram - grad_hist_right
|
400
409
|
hess_hist_left = hessian_histogram - hess_hist_right
|
@@ -413,25 +422,33 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
413
422
|
"left": left_child,
|
414
423
|
"right": right_child,
|
415
424
|
}
|
425
|
+
|
426
|
+
def get_eval_metric(self, y_true, y_pred):
|
427
|
+
if self.eval_metric == "mse":
|
428
|
+
return ((y_true - y_pred) ** 2).mean().item()
|
429
|
+
elif self.eval_metric == "corr":
|
430
|
+
return 1 - torch.corrcoef(torch.vstack([y_true, y_pred]))[0, 1].item()
|
431
|
+
else:
|
432
|
+
raise ValueError(f"Invalid eval_metric: {self.eval_metric}.")
|
416
433
|
|
417
434
|
def compute_eval(self, i):
|
418
435
|
if self.eval_every_n_trees == None:
|
419
436
|
return
|
437
|
+
|
438
|
+
train_loss = ((self.Y_gpu - self.gradients) ** 2).mean().item()
|
439
|
+
self.training_loss.append(train_loss)
|
420
440
|
|
421
441
|
if i % self.eval_every_n_trees == 0:
|
422
442
|
eval_preds = self.predict_binned(self.bin_indices_eval)
|
423
|
-
eval_loss = (
|
443
|
+
eval_loss = self.get_eval_metric( self.Y_gpu_eval, eval_preds )
|
424
444
|
self.eval_loss.append(eval_loss)
|
425
445
|
|
426
|
-
train_loss = ((self.Y_gpu - self.gradients) ** 2).mean().item()
|
427
|
-
self.training_loss.append(train_loss)
|
428
|
-
|
429
446
|
if len(self.eval_loss) > self.early_stopping_rounds:
|
430
|
-
if self.eval_loss[-self.early_stopping_rounds] < self.eval_loss[-1]:
|
447
|
+
if self.eval_loss[-(self.early_stopping_rounds+1)] < self.eval_loss[-1]:
|
431
448
|
self.stop = True
|
432
449
|
|
433
450
|
print(
|
434
|
-
f"🌲 Tree {i+1}/{self.n_estimators} | Train MSE: {train_loss:.6f} | Eval
|
451
|
+
f"🌲 Tree {i+1}/{self.n_estimators} | Train MSE: {train_loss:.6f} | Eval {self.eval_metric}: {eval_loss:.6f}"
|
435
452
|
)
|
436
453
|
|
437
454
|
del eval_preds, eval_loss, train_loss
|
@@ -445,7 +462,6 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
445
462
|
k = max(1, int(self.colsample_bytree * self.num_features))
|
446
463
|
else:
|
447
464
|
self.feat_indices_tree = self.feature_indices
|
448
|
-
self.bin_indices_tree = self.bin_indices
|
449
465
|
|
450
466
|
for i in range(self.n_estimators):
|
451
467
|
self.residual = self.Y_gpu - self.gradients
|
@@ -454,10 +470,9 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
454
470
|
self.feat_indices_tree = torch.randperm(
|
455
471
|
self.num_features, device=self.device
|
456
472
|
)[:k]
|
457
|
-
self.bin_indices_tree = self.bin_indices[:, self.feat_indices_tree]
|
458
473
|
|
459
474
|
self.root_gradient_histogram, self.root_hessian_histogram = (
|
460
|
-
self.compute_histograms(self.
|
475
|
+
self.compute_histograms(self.bin_indices[:, self.feat_indices_tree], self.residual)
|
461
476
|
)
|
462
477
|
|
463
478
|
tree = self.grow_tree(
|
@@ -493,7 +508,6 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
493
508
|
|
494
509
|
def predict_binned(self, bin_indices):
|
495
510
|
num_samples = bin_indices.size(0)
|
496
|
-
|
497
511
|
tree_tensor = torch.stack(
|
498
512
|
[
|
499
513
|
self.flatten_tree(tree, max_nodes=2 ** (self.max_depth + 1))
|
@@ -508,8 +522,8 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
508
522
|
)
|
509
523
|
|
510
524
|
return out
|
511
|
-
|
512
|
-
def
|
525
|
+
|
526
|
+
def bin_inference_data(self, X_np):
|
513
527
|
is_integer_type = np.issubdtype(X_np.dtype, np.integer)
|
514
528
|
|
515
529
|
if is_integer_type and X_np.shape[1] == self.num_features:
|
@@ -528,7 +542,10 @@ class WarpGBM(BaseEstimator, RegressorMixin):
|
|
528
542
|
)
|
529
543
|
else:
|
530
544
|
bin_indices = self.bin_data_with_existing_edges(X_np)
|
545
|
+
return bin_indices
|
531
546
|
|
547
|
+
def predict(self, X_np):
|
548
|
+
bin_indices = self.bin_inference_data(X_np)
|
532
549
|
preds = self.predict_binned(bin_indices).cpu().numpy()
|
533
550
|
del bin_indices
|
534
551
|
return preds
|
@@ -5,23 +5,18 @@
|
|
5
5
|
__global__ void predict_forest_kernel(
|
6
6
|
const int8_t *__restrict__ bin_indices, // [N x F]
|
7
7
|
const float *__restrict__ tree_tensor, // [T x max_nodes x 6]
|
8
|
-
|
8
|
+
int64_t N, int64_t F, int64_t T, int64_t max_nodes,
|
9
9
|
float learning_rate,
|
10
10
|
float *__restrict__ out // [N]
|
11
11
|
)
|
12
12
|
{
|
13
|
-
|
14
|
-
|
13
|
+
int64_t idx = static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x;
|
14
|
+
int64_t total_jobs = N * T;
|
15
15
|
if (idx >= total_jobs)
|
16
16
|
return;
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
// if (i == 0 && t == 0)
|
22
|
-
// {
|
23
|
-
// printf("[DEBUG] Thread (i=%d, t=%d): starting prediction\n", i, t);
|
24
|
-
// }
|
18
|
+
int64_t i = idx % N; // sample index
|
19
|
+
int64_t t = idx / N; // tree index
|
25
20
|
|
26
21
|
const float *tree = tree_tensor + t * max_nodes * 6;
|
27
22
|
|
@@ -35,32 +30,36 @@ __global__ void predict_forest_kernel(
|
|
35
30
|
atomicAdd(&out[i], learning_rate * val);
|
36
31
|
return;
|
37
32
|
}
|
33
|
+
|
38
34
|
int feat = static_cast<int>(tree[node_id * 6 + 0]);
|
39
35
|
int split_bin = static_cast<int>(tree[node_id * 6 + 1]);
|
40
36
|
int left_id = static_cast<int>(tree[node_id * 6 + 2]);
|
41
37
|
int right_id = static_cast<int>(tree[node_id * 6 + 3]);
|
42
38
|
|
43
|
-
|
39
|
+
// prevent overflow
|
40
|
+
int64_t bin_idx = i * F + feat;
|
41
|
+
int8_t bin = bin_indices[bin_idx];
|
42
|
+
|
44
43
|
node_id = (bin <= split_bin) ? left_id : right_id;
|
45
|
-
// printf("sample %d, tree %d, feat %d, bin %d, split %d → %s\n", i, t, feat, bin, split_bin, (bin <= split_bin ? "L" : "R"));
|
46
44
|
}
|
47
45
|
}
|
48
46
|
|
47
|
+
|
49
48
|
void predict_with_forest(
|
50
|
-
const at::Tensor &bin_indices,
|
51
|
-
const at::Tensor &tree_tensor,
|
49
|
+
const at::Tensor &bin_indices,
|
50
|
+
const at::Tensor &tree_tensor,
|
52
51
|
float learning_rate,
|
53
|
-
at::Tensor &out
|
52
|
+
at::Tensor &out
|
54
53
|
)
|
55
54
|
{
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
55
|
+
int64_t N = bin_indices.size(0);
|
56
|
+
int64_t F = bin_indices.size(1);
|
57
|
+
int64_t T = tree_tensor.size(0);
|
58
|
+
int64_t max_nodes = tree_tensor.size(1);
|
60
59
|
|
61
|
-
|
60
|
+
int64_t total_jobs = N * T;
|
62
61
|
int threads_per_block = 256;
|
63
|
-
|
62
|
+
int64_t blocks = (total_jobs + threads_per_block - 1) / threads_per_block;
|
64
63
|
|
65
64
|
predict_forest_kernel<<<blocks, threads_per_block>>>(
|
66
65
|
bin_indices.data_ptr<int8_t>(),
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warpgbm
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.26
|
4
4
|
Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
|
5
5
|
License: GNU GENERAL PUBLIC LICENSE
|
6
6
|
Version 3, 29 June 2007
|
@@ -889,6 +889,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
|
|
889
889
|
y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
|
890
890
|
eval_every_n_trees=None, # const (int) >= 1
|
891
891
|
early_stopping_rounds=None, # const (int) >= 1
|
892
|
+
eval_metric='mse' # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
|
892
893
|
)
|
893
894
|
```
|
894
895
|
Train with optional validation set and early stopping.
|
@@ -922,3 +923,7 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
|
|
922
923
|
### v0.1.25
|
923
924
|
|
924
925
|
- Added `colsample_bytree` parameter and new test using Numerai data.
|
926
|
+
|
927
|
+
### v0.1.26
|
928
|
+
|
929
|
+
- Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.
|
warpgbm-0.1.25/version.txt
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.1.25
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|