warpgbm 0.1.24__tar.gz → 0.1.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {warpgbm-0.1.24/warpgbm.egg-info → warpgbm-0.1.26}/PKG-INFO +10 -1
  2. {warpgbm-0.1.24 → warpgbm-0.1.26}/README.md +9 -0
  3. {warpgbm-0.1.24 → warpgbm-0.1.26}/pyproject.toml +1 -1
  4. {warpgbm-0.1.24 → warpgbm-0.1.26}/tests/test_fit_predict_corr.py +1 -0
  5. warpgbm-0.1.26/version.txt +1 -0
  6. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/core.py +35 -18
  7. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/predict.cu +20 -21
  8. {warpgbm-0.1.24 → warpgbm-0.1.26/warpgbm.egg-info}/PKG-INFO +10 -1
  9. warpgbm-0.1.24/version.txt +0 -1
  10. {warpgbm-0.1.24 → warpgbm-0.1.26}/LICENSE +0 -0
  11. {warpgbm-0.1.24 → warpgbm-0.1.26}/MANIFEST.in +0 -0
  12. {warpgbm-0.1.24 → warpgbm-0.1.26}/setup.cfg +0 -0
  13. {warpgbm-0.1.24 → warpgbm-0.1.26}/setup.py +0 -0
  14. {warpgbm-0.1.24 → warpgbm-0.1.26}/tests/__init__.py +0 -0
  15. {warpgbm-0.1.24 → warpgbm-0.1.26}/tests/numerai_test.py +0 -0
  16. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/__init__.py +0 -0
  17. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/__init__.py +0 -0
  18. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/best_split_kernel.cu +0 -0
  19. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/binner.cu +0 -0
  20. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/histogram_kernel.cu +0 -0
  21. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/node_kernel.cpp +0 -0
  22. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm.egg-info/SOURCES.txt +0 -0
  23. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm.egg-info/dependency_links.txt +0 -0
  24. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm.egg-info/requires.txt +0 -0
  25. {warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warpgbm
3
- Version: 0.1.24
3
+ Version: 0.1.26
4
4
  Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
5
5
  License: GNU GENERAL PUBLIC LICENSE
6
6
  Version 3, 29 June 2007
@@ -877,6 +877,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
877
877
  - `threads_per_block`: CUDA threads per block (default: 32)
878
878
  - `rows_per_thread`: Number of training rows processed per thread (default: 4)
879
879
  - `L2_reg`: L2 regularizer (default: 1e-6)
880
+ - `colsample_bytree`: Proportion of features to subsample to grow each tree (default: 1)
880
881
 
881
882
  ### Methods:
882
883
  ```
@@ -888,6 +889,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
888
889
  y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
889
890
  eval_every_n_trees=None, # const (int) >= 1
890
891
  early_stopping_rounds=None, # const (int) >= 1
892
+ eval_metric='mse' # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
891
893
  )
892
894
  ```
893
895
  Train with optional validation set and early stopping.
@@ -918,3 +920,10 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
918
920
 
919
921
  - Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
920
922
 
923
+ ### v0.1.25
924
+
925
+ - Added `colsample_bytree` parameter and new test using Numerai data.
926
+
927
+ ### v0.1.26
928
+
929
+ - Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.
@@ -189,6 +189,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
189
189
  - `threads_per_block`: CUDA threads per block (default: 32)
190
190
  - `rows_per_thread`: Number of training rows processed per thread (default: 4)
191
191
  - `L2_reg`: L2 regularizer (default: 1e-6)
192
+ - `colsample_bytree`: Proportion of features to subsample to grow each tree (default: 1)
192
193
 
193
194
  ### Methods:
194
195
  ```
@@ -200,6 +201,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
200
201
  y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
201
202
  eval_every_n_trees=None, # const (int) >= 1
202
203
  early_stopping_rounds=None, # const (int) >= 1
204
+ eval_metric='mse' # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
203
205
  )
204
206
  ```
205
207
  Train with optional validation set and early stopping.
@@ -230,3 +232,10 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
230
232
 
231
233
  - Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
232
234
 
235
+ ### v0.1.25
236
+
237
+ - Added `colsample_bytree` parameter and new test using Numerai data.
238
+
239
+ ### v0.1.26
240
+
241
+ - Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "warpgbm"
7
- version = "0.1.24"
7
+ version = "0.1.26"
8
8
  description = "A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -37,6 +37,7 @@ def test_fit_predictpytee_correlation():
37
37
  y_eval=y,
38
38
  eval_every_n_trees=10,
39
39
  early_stopping_rounds=1,
40
+ eval_metric="corr",
40
41
  )
41
42
  fit_time = time.time() - start_fit
42
43
  print(f" Fit time: {fit_time:.3f} seconds")
@@ -0,0 +1 @@
1
+ 0.1.26
@@ -138,7 +138,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
138
138
  )
139
139
 
140
140
  def validate_fit_params(
141
- self, X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds
141
+ self, X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds, eval_metric
142
142
  ):
143
143
  # ─── Required: X and y ───
144
144
  if not isinstance(X, np.ndarray) or not isinstance(y, np.ndarray):
@@ -206,6 +206,11 @@ class WarpGBM(BaseEstimator, RegressorMixin):
206
206
  # No early stopping = set to "never trigger"
207
207
  early_stopping_rounds = self.n_estimators + 1
208
208
 
209
+ if eval_metric not in ["mse", "corr"]:
210
+ raise ValueError(
211
+ f"Invalid eval_metric: {eval_metric}. Choose 'mse' or 'corr'."
212
+ )
213
+
209
214
  return early_stopping_rounds # May have been defaulted here
210
215
 
211
216
  def fit(
@@ -217,9 +222,10 @@ class WarpGBM(BaseEstimator, RegressorMixin):
217
222
  y_eval=None,
218
223
  eval_every_n_trees=None,
219
224
  early_stopping_rounds=None,
225
+ eval_metric = "mse",
220
226
  ):
221
227
  early_stopping_rounds = self.validate_fit_params(
222
- X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds
228
+ X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds, eval_metric
223
229
  )
224
230
 
225
231
  if era_id is None:
@@ -242,10 +248,11 @@ class WarpGBM(BaseEstimator, RegressorMixin):
242
248
 
243
249
  # ─── Optional Eval Set ───
244
250
  if X_eval is not None and y_eval is not None:
245
- self.bin_indices_eval = self.bin_data_with_existing_edges(X_eval)
251
+ self.bin_indices_eval = self.bin_inference_data(X_eval)
246
252
  self.Y_gpu_eval = torch.from_numpy(y_eval).to(torch.float32).to(self.device)
247
253
  self.eval_every_n_trees = eval_every_n_trees
248
254
  self.early_stopping_rounds = early_stopping_rounds
255
+ self.eval_metric = eval_metric
249
256
  else:
250
257
  self.bin_indices_eval = None
251
258
  self.Y_gpu_eval = None
@@ -379,7 +386,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
379
386
  self.gradients[node_indices] += self.learning_rate * leaf_value
380
387
  return {"leaf_value": leaf_value.item(), "samples": parent_size}
381
388
 
382
- split_mask = self.bin_indices_tree[node_indices, local_feature] <= best_bin
389
+ split_mask = self.bin_indices[node_indices, self.feat_indices_tree[local_feature]] <= best_bin
383
390
  left_indices = node_indices[split_mask]
384
391
  right_indices = node_indices[~split_mask]
385
392
 
@@ -388,13 +395,15 @@ class WarpGBM(BaseEstimator, RegressorMixin):
388
395
 
389
396
  if left_size <= right_size:
390
397
  grad_hist_left, hess_hist_left = self.compute_histograms(
391
- self.bin_indices_tree[left_indices], self.residual[left_indices]
398
+ self.bin_indices.index_select(0, left_indices).index_select(1, self.feat_indices_tree)
399
+ , self.residual[left_indices]
392
400
  )
393
401
  grad_hist_right = gradient_histogram - grad_hist_left
394
402
  hess_hist_right = hessian_histogram - hess_hist_left
395
403
  else:
396
404
  grad_hist_right, hess_hist_right = self.compute_histograms(
397
- self.bin_indices_tree[right_indices], self.residual[right_indices]
405
+ self.bin_indices.index_select(0, right_indices).index_select(1, self.feat_indices_tree)
406
+ , self.residual[right_indices]
398
407
  )
399
408
  grad_hist_left = gradient_histogram - grad_hist_right
400
409
  hess_hist_left = hessian_histogram - hess_hist_right
@@ -413,25 +422,33 @@ class WarpGBM(BaseEstimator, RegressorMixin):
413
422
  "left": left_child,
414
423
  "right": right_child,
415
424
  }
425
+
426
+ def get_eval_metric(self, y_true, y_pred):
427
+ if self.eval_metric == "mse":
428
+ return ((y_true - y_pred) ** 2).mean().item()
429
+ elif self.eval_metric == "corr":
430
+ return 1 - torch.corrcoef(torch.vstack([y_true, y_pred]))[0, 1].item()
431
+ else:
432
+ raise ValueError(f"Invalid eval_metric: {self.eval_metric}.")
416
433
 
417
434
  def compute_eval(self, i):
418
435
  if self.eval_every_n_trees == None:
419
436
  return
437
+
438
+ train_loss = ((self.Y_gpu - self.gradients) ** 2).mean().item()
439
+ self.training_loss.append(train_loss)
420
440
 
421
441
  if i % self.eval_every_n_trees == 0:
422
442
  eval_preds = self.predict_binned(self.bin_indices_eval)
423
- eval_loss = ((self.Y_gpu_eval - eval_preds) ** 2).mean().item()
443
+ eval_loss = self.get_eval_metric( self.Y_gpu_eval, eval_preds )
424
444
  self.eval_loss.append(eval_loss)
425
445
 
426
- train_loss = ((self.Y_gpu - self.gradients) ** 2).mean().item()
427
- self.training_loss.append(train_loss)
428
-
429
446
  if len(self.eval_loss) > self.early_stopping_rounds:
430
- if self.eval_loss[-self.early_stopping_rounds] < self.eval_loss[-1]:
447
+ if self.eval_loss[-(self.early_stopping_rounds+1)] < self.eval_loss[-1]:
431
448
  self.stop = True
432
449
 
433
450
  print(
434
- f"🌲 Tree {i+1}/{self.n_estimators} | Train MSE: {train_loss:.6f} | Eval MSE: {eval_loss:.6f}"
451
+ f"🌲 Tree {i+1}/{self.n_estimators} | Train MSE: {train_loss:.6f} | Eval {self.eval_metric}: {eval_loss:.6f}"
435
452
  )
436
453
 
437
454
  del eval_preds, eval_loss, train_loss
@@ -445,7 +462,6 @@ class WarpGBM(BaseEstimator, RegressorMixin):
445
462
  k = max(1, int(self.colsample_bytree * self.num_features))
446
463
  else:
447
464
  self.feat_indices_tree = self.feature_indices
448
- self.bin_indices_tree = self.bin_indices
449
465
 
450
466
  for i in range(self.n_estimators):
451
467
  self.residual = self.Y_gpu - self.gradients
@@ -454,10 +470,9 @@ class WarpGBM(BaseEstimator, RegressorMixin):
454
470
  self.feat_indices_tree = torch.randperm(
455
471
  self.num_features, device=self.device
456
472
  )[:k]
457
- self.bin_indices_tree = self.bin_indices[:, self.feat_indices_tree]
458
473
 
459
474
  self.root_gradient_histogram, self.root_hessian_histogram = (
460
- self.compute_histograms(self.bin_indices_tree, self.residual)
475
+ self.compute_histograms(self.bin_indices[:, self.feat_indices_tree], self.residual)
461
476
  )
462
477
 
463
478
  tree = self.grow_tree(
@@ -493,7 +508,6 @@ class WarpGBM(BaseEstimator, RegressorMixin):
493
508
 
494
509
  def predict_binned(self, bin_indices):
495
510
  num_samples = bin_indices.size(0)
496
-
497
511
  tree_tensor = torch.stack(
498
512
  [
499
513
  self.flatten_tree(tree, max_nodes=2 ** (self.max_depth + 1))
@@ -508,8 +522,8 @@ class WarpGBM(BaseEstimator, RegressorMixin):
508
522
  )
509
523
 
510
524
  return out
511
-
512
- def predict(self, X_np):
525
+
526
+ def bin_inference_data(self, X_np):
513
527
  is_integer_type = np.issubdtype(X_np.dtype, np.integer)
514
528
 
515
529
  if is_integer_type and X_np.shape[1] == self.num_features:
@@ -528,7 +542,10 @@ class WarpGBM(BaseEstimator, RegressorMixin):
528
542
  )
529
543
  else:
530
544
  bin_indices = self.bin_data_with_existing_edges(X_np)
545
+ return bin_indices
531
546
 
547
+ def predict(self, X_np):
548
+ bin_indices = self.bin_inference_data(X_np)
532
549
  preds = self.predict_binned(bin_indices).cpu().numpy()
533
550
  del bin_indices
534
551
  return preds
@@ -5,23 +5,18 @@
5
5
  __global__ void predict_forest_kernel(
6
6
  const int8_t *__restrict__ bin_indices, // [N x F]
7
7
  const float *__restrict__ tree_tensor, // [T x max_nodes x 6]
8
- int N, int F, int T, int max_nodes,
8
+ int64_t N, int64_t F, int64_t T, int64_t max_nodes,
9
9
  float learning_rate,
10
10
  float *__restrict__ out // [N]
11
11
  )
12
12
  {
13
- int idx = blockIdx.x * blockDim.x + threadIdx.x;
14
- int total_jobs = N * T;
13
+ int64_t idx = static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x;
14
+ int64_t total_jobs = N * T;
15
15
  if (idx >= total_jobs)
16
16
  return;
17
17
 
18
- int i = idx % N; // sample index
19
- int t = idx / N; // tree index
20
-
21
- // if (i == 0 && t == 0)
22
- // {
23
- // printf("[DEBUG] Thread (i=%d, t=%d): starting prediction\n", i, t);
24
- // }
18
+ int64_t i = idx % N; // sample index
19
+ int64_t t = idx / N; // tree index
25
20
 
26
21
  const float *tree = tree_tensor + t * max_nodes * 6;
27
22
 
@@ -35,32 +30,36 @@ __global__ void predict_forest_kernel(
35
30
  atomicAdd(&out[i], learning_rate * val);
36
31
  return;
37
32
  }
33
+
38
34
  int feat = static_cast<int>(tree[node_id * 6 + 0]);
39
35
  int split_bin = static_cast<int>(tree[node_id * 6 + 1]);
40
36
  int left_id = static_cast<int>(tree[node_id * 6 + 2]);
41
37
  int right_id = static_cast<int>(tree[node_id * 6 + 3]);
42
38
 
43
- int8_t bin = bin_indices[i * F + feat];
39
+ // prevent overflow
40
+ int64_t bin_idx = i * F + feat;
41
+ int8_t bin = bin_indices[bin_idx];
42
+
44
43
  node_id = (bin <= split_bin) ? left_id : right_id;
45
- // printf("sample %d, tree %d, feat %d, bin %d, split %d → %s\n", i, t, feat, bin, split_bin, (bin <= split_bin ? "L" : "R"));
46
44
  }
47
45
  }
48
46
 
47
+
49
48
  void predict_with_forest(
50
- const at::Tensor &bin_indices, // [N x F], int8
51
- const at::Tensor &tree_tensor, // [T x max_nodes x 6], float32
49
+ const at::Tensor &bin_indices,
50
+ const at::Tensor &tree_tensor,
52
51
  float learning_rate,
53
- at::Tensor &out // [N], float32
52
+ at::Tensor &out
54
53
  )
55
54
  {
56
- int N = bin_indices.size(0);
57
- int F = bin_indices.size(1);
58
- int T = tree_tensor.size(0);
59
- int max_nodes = tree_tensor.size(1);
55
+ int64_t N = bin_indices.size(0);
56
+ int64_t F = bin_indices.size(1);
57
+ int64_t T = tree_tensor.size(0);
58
+ int64_t max_nodes = tree_tensor.size(1);
60
59
 
61
- int total_jobs = N * T;
60
+ int64_t total_jobs = N * T;
62
61
  int threads_per_block = 256;
63
- int blocks = (total_jobs + threads_per_block - 1) / threads_per_block;
62
+ int64_t blocks = (total_jobs + threads_per_block - 1) / threads_per_block;
64
63
 
65
64
  predict_forest_kernel<<<blocks, threads_per_block>>>(
66
65
  bin_indices.data_ptr<int8_t>(),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warpgbm
3
- Version: 0.1.24
3
+ Version: 0.1.26
4
4
  Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
5
5
  License: GNU GENERAL PUBLIC LICENSE
6
6
  Version 3, 29 June 2007
@@ -877,6 +877,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
877
877
  - `threads_per_block`: CUDA threads per block (default: 32)
878
878
  - `rows_per_thread`: Number of training rows processed per thread (default: 4)
879
879
  - `L2_reg`: L2 regularizer (default: 1e-6)
880
+ - `colsample_bytree`: Proportion of features to subsample to grow each tree (default: 1)
880
881
 
881
882
  ### Methods:
882
883
  ```
@@ -888,6 +889,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
888
889
  y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
889
890
  eval_every_n_trees=None, # const (int) >= 1
890
891
  early_stopping_rounds=None, # const (int) >= 1
892
+ eval_metric='mse' # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
891
893
  )
892
894
  ```
893
895
  Train with optional validation set and early stopping.
@@ -918,3 +920,10 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
918
920
 
919
921
  - Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
920
922
 
923
+ ### v0.1.25
924
+
925
+ - Added `colsample_bytree` parameter and new test using Numerai data.
926
+
927
+ ### v0.1.26
928
+
929
+ - Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.
@@ -1 +0,0 @@
1
- 0.1.24
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes