warpgbm 0.1.18__tar.gz → 0.1.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warpgbm
3
- Version: 0.1.18
3
+ Version: 0.1.20
4
4
  Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
5
5
  License: GNU GENERAL PUBLIC LICENSE
6
6
  Version 3, 29 June 2007
@@ -710,6 +710,23 @@ In our initial tests on an NVIDIA 3090 (local) and A100 (Google Colab Pro), Warp
710
710
 
711
711
  ---
712
712
 
713
+ ## Benchmarks
714
+
715
+ ### Scikit-Learn Synthetic Data: 1 Million Rows and 1,000 Features
716
+
717
+ In this benchmark we compare the speed and in-sample correlation of **WarpGBM v0.1.19** against LightGBM, XGBoost and CatBoost, all with their GPU-enabled versions. This benchmark runs on Google Colab with the L4 GPU environment. The CPU versions don't even come close to the speed here so we didn't test them.
718
+
719
+ ```
720
+ WarpGBM: corr = 0.8882, train = 21.8s, infer = 11.6s
721
+ XGBoost: corr = 0.8877, train = 33.4s, infer = 8.1s
722
+ LightGBM: corr = 0.8604, train = 30.2s, infer = 1.4s
723
+ CatBoost: corr = 0.8935, train = 377.9s, infer = 375.8s
724
+ ```
725
+
726
+ Colab Notebook: https://colab.research.google.com/drive/16U1kbYlD5HibGbnF5NGsjChZ1p1IA2pK
727
+
728
+ ---
729
+
713
730
  ## Installation
714
731
 
715
732
  ### Recommended (GitHub, always latest):
@@ -22,6 +22,23 @@ In our initial tests on an NVIDIA 3090 (local) and A100 (Google Colab Pro), Warp
22
22
 
23
23
  ---
24
24
 
25
+ ## Benchmarks
26
+
27
+ ### Scikit-Learn Synthetic Data: 1 Million Rows and 1,000 Features
28
+
29
+ In this benchmark we compare the speed and in-sample correlation of **WarpGBM v0.1.19** against LightGBM, XGBoost and CatBoost, all with their GPU-enabled versions. This benchmark runs on Google Colab with the L4 GPU environment. The CPU versions don't even come close to the speed here so we didn't test them.
30
+
31
+ ```
32
+ WarpGBM: corr = 0.8882, train = 21.8s, infer = 11.6s
33
+ XGBoost: corr = 0.8877, train = 33.4s, infer = 8.1s
34
+ LightGBM: corr = 0.8604, train = 30.2s, infer = 1.4s
35
+ CatBoost: corr = 0.8935, train = 377.9s, infer = 375.8s
36
+ ```
37
+
38
+ Colab Notebook: https://colab.research.google.com/drive/16U1kbYlD5HibGbnF5NGsjChZ1p1IA2pK
39
+
40
+ ---
41
+
25
42
  ## Installation
26
43
 
27
44
  ### Recommended (GitHub, always latest):
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "warpgbm"
7
- version = "0.1.18"
7
+ version = "0.1.20"
8
8
  description = "A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -25,7 +25,7 @@ def test_fit_predictpytee_correlation():
25
25
  learning_rate=1,
26
26
  verbosity=False,
27
27
  histogram_computer=hist_type,
28
- threads_per_block=128,
28
+ threads_per_block=64,
29
29
  rows_per_thread=4
30
30
  )
31
31
 
@@ -0,0 +1 @@
1
+ 0.1.20
@@ -25,16 +25,31 @@ class WarpGBM(BaseEstimator, RegressorMixin):
25
25
  histogram_computer='hist3',
26
26
  threads_per_block=64,
27
27
  rows_per_thread=4,
28
- L2_reg = 1e-6,
29
- L1_reg = 0.0,
30
- device = 'cuda'
28
+ L2_reg=1e-6,
29
+ L1_reg=0.0,
30
+ device='cuda'
31
31
  ):
32
+ # Validate arguments
33
+ self._validate_hyperparams(
34
+ num_bins=num_bins,
35
+ max_depth=max_depth,
36
+ learning_rate=learning_rate,
37
+ n_estimators=n_estimators,
38
+ min_child_weight=min_child_weight,
39
+ min_split_gain=min_split_gain,
40
+ histogram_computer=histogram_computer,
41
+ threads_per_block=threads_per_block,
42
+ rows_per_thread=rows_per_thread,
43
+ L2_reg=L2_reg,
44
+ L1_reg=L1_reg
45
+ )
46
+
32
47
  self.num_bins = num_bins
33
48
  self.max_depth = max_depth
34
49
  self.learning_rate = learning_rate
35
50
  self.n_estimators = n_estimators
36
51
  self.forest = None
37
- self.bin_edges = None # shape: [num_features, num_bins-1] if using quantile binning
52
+ self.bin_edges = None
38
53
  self.base_prediction = None
39
54
  self.unique_eras = None
40
55
  self.device = device
@@ -55,6 +70,45 @@ class WarpGBM(BaseEstimator, RegressorMixin):
55
70
  self.L2_reg = L2_reg
56
71
  self.L1_reg = L1_reg
57
72
 
73
+ def _validate_hyperparams(self, **kwargs):
74
+ # Type checks
75
+ int_params = [
76
+ "num_bins", "max_depth", "n_estimators", "min_child_weight",
77
+ "threads_per_block", "rows_per_thread"
78
+ ]
79
+ float_params = [
80
+ "learning_rate", "min_split_gain", "L2_reg", "L1_reg"
81
+ ]
82
+
83
+ for param in int_params:
84
+ if not isinstance(kwargs[param], int):
85
+ raise TypeError(f"{param} must be an integer, got {type(kwargs[param])}.")
86
+
87
+ for param in float_params:
88
+ if not isinstance(kwargs[param], (float, int)): # Accept ints as valid floats
89
+ raise TypeError(f"{param} must be a float, got {type(kwargs[param])}.")
90
+
91
+ if not ( 2 <= kwargs["num_bins"] <= 127 ):
92
+ raise ValueError("num_bins must be between 2 and 127 inclusive.")
93
+ if kwargs["max_depth"] < 1:
94
+ raise ValueError("max_depth must be at least 1.")
95
+ if not (0.0 < kwargs["learning_rate"] <= 1.0):
96
+ raise ValueError("learning_rate must be in (0.0, 1.0].")
97
+ if kwargs["n_estimators"] <= 0:
98
+ raise ValueError("n_estimators must be positive.")
99
+ if kwargs["min_child_weight"] < 1:
100
+ raise ValueError("min_child_weight must be a positive integer.")
101
+ if kwargs["min_split_gain"] < 0:
102
+ raise ValueError("min_split_gain must be non-negative.")
103
+ if kwargs["threads_per_block"] <= 0 or kwargs["threads_per_block"] % 32 != 0:
104
+ raise ValueError("threads_per_block should be a positive multiple of 32 (warp size).")
105
+ if not ( 1 <= kwargs["rows_per_thread"] <= 16 ):
106
+ raise ValueError("rows_per_thread must be positive between 1 and 16 inclusive.")
107
+ if kwargs["L2_reg"] < 0 or kwargs["L1_reg"] < 0:
108
+ raise ValueError("L2_reg and L1_reg must be non-negative.")
109
+ if kwargs["histogram_computer"] not in histogram_kernels:
110
+ raise ValueError(f"Invalid histogram_computer: {kwargs['histogram_computer']}. Choose from {list(histogram_kernels.keys())}.")
111
+
58
112
  def fit(self, X, y, era_id=None):
59
113
  if era_id is None:
60
114
  era_id = np.ones(X.shape[0], dtype='int32')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warpgbm
3
- Version: 0.1.18
3
+ Version: 0.1.20
4
4
  Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
5
5
  License: GNU GENERAL PUBLIC LICENSE
6
6
  Version 3, 29 June 2007
@@ -710,6 +710,23 @@ In our initial tests on an NVIDIA 3090 (local) and A100 (Google Colab Pro), Warp
710
710
 
711
711
  ---
712
712
 
713
+ ## Benchmarks
714
+
715
+ ### Scikit-Learn Synthetic Data: 1 Million Rows and 1,000 Features
716
+
717
+ In this benchmark we compare the speed and in-sample correlation of **WarpGBM v0.1.19** against LightGBM, XGBoost and CatBoost, all with their GPU-enabled versions. This benchmark runs on Google Colab with the L4 GPU environment. The CPU versions don't even come close to the speed here so we didn't test them.
718
+
719
+ ```
720
+ WarpGBM: corr = 0.8882, train = 21.8s, infer = 11.6s
721
+ XGBoost: corr = 0.8877, train = 33.4s, infer = 8.1s
722
+ LightGBM: corr = 0.8604, train = 30.2s, infer = 1.4s
723
+ CatBoost: corr = 0.8935, train = 377.9s, infer = 375.8s
724
+ ```
725
+
726
+ Colab Notebook: https://colab.research.google.com/drive/16U1kbYlD5HibGbnF5NGsjChZ1p1IA2pK
727
+
728
+ ---
729
+
713
730
  ## Installation
714
731
 
715
732
  ### Recommended (GitHub, always latest):
@@ -1 +0,0 @@
1
- 0.1.18
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes