warpgbm 0.1.23__tar.gz → 0.1.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {warpgbm-0.1.23/warpgbm.egg-info → warpgbm-0.1.24}/PKG-INFO +25 -3
  2. {warpgbm-0.1.23 → warpgbm-0.1.24}/README.md +24 -2
  3. {warpgbm-0.1.23 → warpgbm-0.1.24}/pyproject.toml +1 -1
  4. warpgbm-0.1.24/tests/numerai_test.py +62 -0
  5. warpgbm-0.1.24/version.txt +1 -0
  6. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm/core.py +63 -27
  7. {warpgbm-0.1.23 → warpgbm-0.1.24/warpgbm.egg-info}/PKG-INFO +25 -3
  8. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm.egg-info/SOURCES.txt +1 -0
  9. warpgbm-0.1.23/version.txt +0 -1
  10. {warpgbm-0.1.23 → warpgbm-0.1.24}/LICENSE +0 -0
  11. {warpgbm-0.1.23 → warpgbm-0.1.24}/MANIFEST.in +0 -0
  12. {warpgbm-0.1.23 → warpgbm-0.1.24}/setup.cfg +0 -0
  13. {warpgbm-0.1.23 → warpgbm-0.1.24}/setup.py +0 -0
  14. {warpgbm-0.1.23 → warpgbm-0.1.24}/tests/__init__.py +0 -0
  15. {warpgbm-0.1.23 → warpgbm-0.1.24}/tests/test_fit_predict_corr.py +0 -0
  16. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm/__init__.py +0 -0
  17. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm/cuda/__init__.py +0 -0
  18. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm/cuda/best_split_kernel.cu +0 -0
  19. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm/cuda/binner.cu +0 -0
  20. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm/cuda/histogram_kernel.cu +0 -0
  21. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm/cuda/node_kernel.cpp +0 -0
  22. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm/cuda/predict.cu +0 -0
  23. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm.egg-info/dependency_links.txt +0 -0
  24. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm.egg-info/requires.txt +0 -0
  25. {warpgbm-0.1.23 → warpgbm-0.1.24}/warpgbm.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warpgbm
3
- Version: 0.1.23
3
+ Version: 0.1.24
4
4
  Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
5
5
  License: GNU GENERAL PUBLIC LICENSE
6
6
  Version 3, 29 June 2007
@@ -879,8 +879,26 @@ No installation required — just press **"Open in Playground"**, then **Run All
879
879
  - `L2_reg`: L2 regularizer (default: 1e-6)
880
880
 
881
881
  ### Methods:
882
- - `.fit(X, y, era_id=None)`: Train the model. `X` can be raw floats or pre-binned `int8` data. `era_id` is optional and used internally.
883
- - `.predict(X)`: Predict on new data, using parallelized CUDA kernel.
882
+ ```
883
+ .fit(
884
+ X, # numpy array (float or int) 2 dimensions (num_samples, num_features)
885
+ y, # numpy array (float or int) 1 dimension (num_samples)
886
+ era_id=None, # numpy array (int) 1 dimension (num_samples)
887
+ X_eval=None, # numpy array (float or int) 2 dimensions (eval_num_samples, num_features)
888
+ y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
889
+ eval_every_n_trees=None, # const (int) >= 1
890
+ early_stopping_rounds=None, # const (int) >= 1
891
+ )
892
+ ```
893
+ Train with optional validation set and early stopping.
894
+
895
+
896
+ ```
897
+ .predict(
898
+ X # numpy array (float or int) 2 dimensions (predict_num_samples, num_features)
899
+ )
900
+ ```
901
+ Predict on new data, using parallelized CUDA kernel.
884
902
 
885
903
  ---
886
904
 
@@ -896,3 +914,7 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
896
914
 
897
915
  - Vectorized predict function replaced with CUDA kernel (`warpgbm/cuda/predict.cu`), parallelizing per sample, per tree.
898
916
 
917
+ ### v0.1.23
918
+
919
+ - Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
920
+
@@ -191,8 +191,26 @@ No installation required — just press **"Open in Playground"**, then **Run All
191
191
  - `L2_reg`: L2 regularizer (default: 1e-6)
192
192
 
193
193
  ### Methods:
194
- - `.fit(X, y, era_id=None)`: Train the model. `X` can be raw floats or pre-binned `int8` data. `era_id` is optional and used internally.
195
- - `.predict(X)`: Predict on new data, using parallelized CUDA kernel.
194
+ ```
195
+ .fit(
196
+ X, # numpy array (float or int) 2 dimensions (num_samples, num_features)
197
+ y, # numpy array (float or int) 1 dimension (num_samples)
198
+ era_id=None, # numpy array (int) 1 dimension (num_samples)
199
+ X_eval=None, # numpy array (float or int) 2 dimensions (eval_num_samples, num_features)
200
+ y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
201
+ eval_every_n_trees=None, # const (int) >= 1
202
+ early_stopping_rounds=None, # const (int) >= 1
203
+ )
204
+ ```
205
+ Train with optional validation set and early stopping.
206
+
207
+
208
+ ```
209
+ .predict(
210
+ X # numpy array (float or int) 2 dimensions (predict_num_samples, num_features)
211
+ )
212
+ ```
213
+ Predict on new data, using parallelized CUDA kernel.
196
214
 
197
215
  ---
198
216
 
@@ -208,3 +226,7 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
208
226
 
209
227
  - Vectorized predict function replaced with CUDA kernel (`warpgbm/cuda/predict.cu`), parallelizing per sample, per tree.
210
228
 
229
+ ### v0.1.23
230
+
231
+ - Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
232
+
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "warpgbm"
7
- version = "0.1.23"
7
+ version = "0.1.24"
8
8
  description = "A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -0,0 +1,62 @@
1
+ from numerapi import NumerAPI
2
+ import pandas as pd
3
+ import numpy as np
4
+ from warpgbm import WarpGBM
5
+ import time
6
+ from sklearn.metrics import mean_squared_error
7
+
8
+
9
+ def predict_in_chunks(model, X, chunk_size=100_000):
10
+ preds = []
11
+ for i in range(0, X.shape[0], chunk_size):
12
+ X_chunk = X[i : i + chunk_size]
13
+ preds.append(model.predict(X_chunk))
14
+ return np.concatenate(preds)
15
+
16
+
17
+ def test_numerai_data():
18
+ napi = NumerAPI()
19
+ napi.download_dataset("v5.0/train.parquet", "numerai_train.parquet")
20
+
21
+ data = pd.read_parquet("numerai_train.parquet")
22
+ features = [f for f in list(data) if "feature" in f][:1000]
23
+ target = "target"
24
+
25
+ X = data[features].astype("int8").values[:]
26
+ y = data[target].values
27
+
28
+ model = WarpGBM(
29
+ max_depth=10,
30
+ num_bins=5,
31
+ n_estimators=100,
32
+ learning_rate=1,
33
+ threads_per_block=64,
34
+ rows_per_thread=4,
35
+ colsample_bytree=0.8,
36
+ )
37
+
38
+ start_fit = time.time()
39
+ model.fit(
40
+ X,
41
+ y,
42
+ # era_id=era,
43
+ # X_eval=X,
44
+ # y_eval=y,
45
+ # eval_every_n_trees=10,
46
+ # early_stopping_rounds=1,
47
+ )
48
+ fit_time = time.time() - start_fit
49
+ print(f" Fit time: {fit_time:.3f} seconds")
50
+
51
+ start_pred = time.time()
52
+ preds = predict_in_chunks(model, X, chunk_size=500_000)
53
+ pred_time = time.time() - start_pred
54
+ print(f" Predict time: {pred_time:.3f} seconds")
55
+
56
+ corr = np.corrcoef(preds, y)[0, 1]
57
+ mse = mean_squared_error(preds, y)
58
+ print(f" Correlation: {corr:.4f}")
59
+ print(f" MSE: {mse:.4f}")
60
+
61
+ assert corr > 0.68, f"In-sample correlation too low: {corr}"
62
+ assert mse < 0.03, f"In-sample mse too high: {mse}"
@@ -0,0 +1 @@
1
+ 0.1.24
@@ -5,6 +5,7 @@ from warpgbm.cuda import node_kernel
5
5
  from tqdm import tqdm
6
6
  from typing import Tuple
7
7
  from torch import Tensor
8
+ import gc
8
9
 
9
10
  histogram_kernels = {
10
11
  "hist1": node_kernel.compute_histogram,
@@ -29,6 +30,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
29
30
  L2_reg=1e-6,
30
31
  L1_reg=0.0,
31
32
  device="cuda",
33
+ colsample_bytree=1.0,
32
34
  ):
33
35
  # Validate arguments
34
36
  self._validate_hyperparams(
@@ -43,6 +45,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
43
45
  rows_per_thread=rows_per_thread,
44
46
  L2_reg=L2_reg,
45
47
  L1_reg=L1_reg,
48
+ colsample_bytree=colsample_bytree,
46
49
  )
47
50
 
48
51
  self.num_bins = num_bins
@@ -71,6 +74,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
71
74
  self.L2_reg = L2_reg
72
75
  self.L1_reg = L1_reg
73
76
  self.forest = [{} for _ in range(self.n_estimators)]
77
+ self.colsample_bytree = colsample_bytree
74
78
 
75
79
  def _validate_hyperparams(self, **kwargs):
76
80
  # Type checks
@@ -82,7 +86,13 @@ class WarpGBM(BaseEstimator, RegressorMixin):
82
86
  "threads_per_block",
83
87
  "rows_per_thread",
84
88
  ]
85
- float_params = ["learning_rate", "min_split_gain", "L2_reg", "L1_reg"]
89
+ float_params = [
90
+ "learning_rate",
91
+ "min_split_gain",
92
+ "L2_reg",
93
+ "L1_reg",
94
+ "colsample_bytree",
95
+ ]
86
96
 
87
97
  for param in int_params:
88
98
  if not isinstance(kwargs[param], int):
@@ -122,6 +132,10 @@ class WarpGBM(BaseEstimator, RegressorMixin):
122
132
  raise ValueError(
123
133
  f"Invalid histogram_computer: {kwargs['histogram_computer']}. Choose from {list(histogram_kernels.keys())}."
124
134
  )
135
+ if kwargs["colsample_bytree"] <= 0 or kwargs["colsample_bytree"] > 1:
136
+ raise ValueError(
137
+ f"Invalid colsample_bytree: {kwargs['colsample_bytree']}. Must be a float value > 0 and <= 1."
138
+ )
125
139
 
126
140
  def validate_fit_params(
127
141
  self, X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds
@@ -224,6 +238,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
224
238
  self.best_bins = torch.zeros(
225
239
  self.num_features, device=self.device, dtype=torch.int32
226
240
  )
241
+ self.feature_indices = torch.arange(self.num_features, device=self.device)
227
242
 
228
243
  # ─── Optional Eval Set ───
229
244
  if X_eval is not None and y_eval is not None:
@@ -241,6 +256,11 @@ class WarpGBM(BaseEstimator, RegressorMixin):
241
256
  with torch.no_grad():
242
257
  self.grow_forest()
243
258
 
259
+ del self.bin_indices
260
+ del self.Y_gpu
261
+
262
+ gc.collect()
263
+
244
264
  return self
245
265
 
246
266
  def preprocess_gpu_data(self, X_np, Y_np, era_id_np):
@@ -350,16 +370,16 @@ class WarpGBM(BaseEstimator, RegressorMixin):
350
370
  return {"leaf_value": leaf_value.item(), "samples": node_indices.numel()}
351
371
 
352
372
  parent_size = node_indices.numel()
353
- best_feature, best_bin = self.find_best_split(
373
+ local_feature, best_bin = self.find_best_split(
354
374
  gradient_histogram, hessian_histogram
355
375
  )
356
376
 
357
- if best_feature == -1:
377
+ if local_feature == -1:
358
378
  leaf_value = self.residual[node_indices].mean()
359
379
  self.gradients[node_indices] += self.learning_rate * leaf_value
360
380
  return {"leaf_value": leaf_value.item(), "samples": parent_size}
361
381
 
362
- split_mask = self.bin_indices[node_indices, best_feature] <= best_bin
382
+ split_mask = self.bin_indices_tree[node_indices, local_feature] <= best_bin
363
383
  left_indices = node_indices[split_mask]
364
384
  right_indices = node_indices[~split_mask]
365
385
 
@@ -368,13 +388,13 @@ class WarpGBM(BaseEstimator, RegressorMixin):
368
388
 
369
389
  if left_size <= right_size:
370
390
  grad_hist_left, hess_hist_left = self.compute_histograms(
371
- self.bin_indices[left_indices], self.residual[left_indices]
391
+ self.bin_indices_tree[left_indices], self.residual[left_indices]
372
392
  )
373
393
  grad_hist_right = gradient_histogram - grad_hist_left
374
394
  hess_hist_right = hessian_histogram - hess_hist_left
375
395
  else:
376
396
  grad_hist_right, hess_hist_right = self.compute_histograms(
377
- self.bin_indices[right_indices], self.residual[right_indices]
397
+ self.bin_indices_tree[right_indices], self.residual[right_indices]
378
398
  )
379
399
  grad_hist_left = gradient_histogram - grad_hist_right
380
400
  hess_hist_left = hessian_histogram - hess_hist_right
@@ -388,7 +408,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
388
408
  )
389
409
 
390
410
  return {
391
- "feature": best_feature,
411
+ "feature": self.feat_indices_tree[local_feature],
392
412
  "bin": best_bin,
393
413
  "left": left_child,
394
414
  "right": right_child,
@@ -418,21 +438,33 @@ class WarpGBM(BaseEstimator, RegressorMixin):
418
438
 
419
439
  def grow_forest(self):
420
440
  self.training_loss = []
421
- self.eval_loss = [] # <-- if eval set is given
441
+ self.eval_loss = [] # if eval set is given
422
442
  self.stop = False
423
443
 
444
+ if self.colsample_bytree < 1.0:
445
+ k = max(1, int(self.colsample_bytree * self.num_features))
446
+ else:
447
+ self.feat_indices_tree = self.feature_indices
448
+ self.bin_indices_tree = self.bin_indices
449
+
424
450
  for i in range(self.n_estimators):
425
451
  self.residual = self.Y_gpu - self.gradients
426
452
 
453
+ if self.colsample_bytree < 1.0:
454
+ self.feat_indices_tree = torch.randperm(
455
+ self.num_features, device=self.device
456
+ )[:k]
457
+ self.bin_indices_tree = self.bin_indices[:, self.feat_indices_tree]
458
+
427
459
  self.root_gradient_histogram, self.root_hessian_histogram = (
428
- self.compute_histograms(self.bin_indices, self.residual)
460
+ self.compute_histograms(self.bin_indices_tree, self.residual)
429
461
  )
430
462
 
431
463
  tree = self.grow_tree(
432
464
  self.root_gradient_histogram,
433
465
  self.root_hessian_histogram,
434
466
  self.root_node_indices,
435
- depth=0,
467
+ 0,
436
468
  )
437
469
  self.forest[i] = tree
438
470
 
@@ -444,7 +476,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
444
476
  print("Finished training forest.")
445
477
 
446
478
  def bin_data_with_existing_edges(self, X_np):
447
- X_tensor = torch.from_numpy(X_np).to(torch.float32).pin_memory()
479
+ X_tensor = torch.from_numpy(X_np).type(torch.float32).pin_memory()
448
480
  num_samples = X_tensor.size(0)
449
481
  bin_indices = torch.zeros(
450
482
  (num_samples, self.num_features), dtype=torch.int8, device=self.device
@@ -478,26 +510,30 @@ class WarpGBM(BaseEstimator, RegressorMixin):
478
510
  return out
479
511
 
480
512
  def predict(self, X_np):
481
- bin_indices = self.bin_data_with_existing_edges(X_np)
482
- out = self.predict_binned(bin_indices)
483
- return out.cpu().numpy()
513
+ is_integer_type = np.issubdtype(X_np.dtype, np.integer)
484
514
 
485
- def flatten_tree(self, tree, max_nodes):
486
- """
487
- Convert a recursive tree structure into a flat matrix format.
515
+ if is_integer_type and X_np.shape[1] == self.num_features:
516
+ max_vals = X_np.max(axis=0)
517
+ if np.all(max_vals < self.num_bins):
518
+ print("Detected pre-binned input at predict-time — skipping binning.")
519
+ is_prebinned = True
520
+ else:
521
+ is_prebinned = False
522
+ else:
523
+ is_prebinned = False
488
524
 
489
- Each row in the output represents a node:
490
- - Columns: [feature, bin, left_id, right_id, is_leaf, value]
491
- - Internal nodes fill columns 0–3 and set is_leaf = 0
492
- - Leaf nodes fill only value and set is_leaf = 1
525
+ if is_prebinned:
526
+ bin_indices = (
527
+ torch.from_numpy(X_np).to(self.device).contiguous().to(torch.int8)
528
+ )
529
+ else:
530
+ bin_indices = self.bin_data_with_existing_edges(X_np)
493
531
 
494
- Args:
495
- tree (list): A list containing a single root node (recursive dict form).
496
- max_nodes (int): Max number of nodes to allocate in the flat matrix.
532
+ preds = self.predict_binned(bin_indices).cpu().numpy()
533
+ del bin_indices
534
+ return preds
497
535
 
498
- Returns:
499
- torch.Tensor: [max_nodes x 6] matrix representing the flattened tree.
500
- """
536
+ def flatten_tree(self, tree, max_nodes):
501
537
  flat = torch.full((max_nodes, 6), float("nan"), dtype=torch.float32)
502
538
  node_counter = [0]
503
539
  node_list = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warpgbm
3
- Version: 0.1.23
3
+ Version: 0.1.24
4
4
  Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
5
5
  License: GNU GENERAL PUBLIC LICENSE
6
6
  Version 3, 29 June 2007
@@ -879,8 +879,26 @@ No installation required — just press **"Open in Playground"**, then **Run All
879
879
  - `L2_reg`: L2 regularizer (default: 1e-6)
880
880
 
881
881
  ### Methods:
882
- - `.fit(X, y, era_id=None)`: Train the model. `X` can be raw floats or pre-binned `int8` data. `era_id` is optional and used internally.
883
- - `.predict(X)`: Predict on new data, using parallelized CUDA kernel.
882
+ ```
883
+ .fit(
884
+ X, # numpy array (float or int) 2 dimensions (num_samples, num_features)
885
+ y, # numpy array (float or int) 1 dimension (num_samples)
886
+ era_id=None, # numpy array (int) 1 dimension (num_samples)
887
+ X_eval=None, # numpy array (float or int) 2 dimensions (eval_num_samples, num_features)
888
+ y_eval=None, # numpy array (float or int) 1 dimension (eval_num_samples)
889
+ eval_every_n_trees=None, # const (int) >= 1
890
+ early_stopping_rounds=None, # const (int) >= 1
891
+ )
892
+ ```
893
+ Train with optional validation set and early stopping.
894
+
895
+
896
+ ```
897
+ .predict(
898
+ X # numpy array (float or int) 2 dimensions (predict_num_samples, num_features)
899
+ )
900
+ ```
901
+ Predict on new data, using parallelized CUDA kernel.
884
902
 
885
903
  ---
886
904
 
@@ -896,3 +914,7 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
896
914
 
897
915
  - Vectorized predict function replaced with CUDA kernel (`warpgbm/cuda/predict.cu`), parallelizing per sample, per tree.
898
916
 
917
+ ### v0.1.23
918
+
919
+ - Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
920
+
@@ -5,6 +5,7 @@ pyproject.toml
5
5
  setup.py
6
6
  version.txt
7
7
  tests/__init__.py
8
+ tests/numerai_test.py
8
9
  tests/test_fit_predict_corr.py
9
10
  warpgbm/__init__.py
10
11
  warpgbm/core.py
@@ -1 +0,0 @@
1
- 0.1.23
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes