PyPI - warpgbm - Versions diffs - 0.1.18__tar.gz → 0.1.20__tar.gz - Mend

warpgbm 0.1.18tar.gz → 0.1.20tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{warpgbm-0.1.18/warpgbm.egg-info → warpgbm-0.1.20}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: warpgbm
-Version: 0.1.18
+Version: 0.1.20
 Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
 License:                     GNU GENERAL PUBLIC LICENSE
                                Version 3, 29 June 2007
@@ -710,6 +710,23 @@ In our initial tests on an NVIDIA 3090 (local) and A100 (Google Colab Pro), Warp
 ---
+## Benchmarks
+### Scikit-Learn Synthetic Data: 1 Million Rows and 1,000 Features
+In this benchmark we compare the speed and in-sample correlation of **WarpGBM v0.1.19** against LightGBM, XGBoost and CatBoost, all with their GPU-enabled versions. This benchmark runs on Google Colab with the L4 GPU environment. The CPU versions don't even come close to the speed here so we didn't test them.
+```
+   WarpGBM:   corr = 0.8882, train = 21.8s, infer = 11.6s
+   XGBoost:   corr = 0.8877, train = 33.4s, infer = 8.1s
+  LightGBM:   corr = 0.8604, train = 30.2s, infer = 1.4s
+  CatBoost:   corr = 0.8935, train = 377.9s, infer = 375.8s
+```
+Colab Notebook: https://colab.research.google.com/drive/16U1kbYlD5HibGbnF5NGsjChZ1p1IA2pK
+---
 ## Installation
 ### Recommended (GitHub, always latest):

{warpgbm-0.1.18 → warpgbm-0.1.20}/README.md RENAMED Viewed

@@ -22,6 +22,23 @@ In our initial tests on an NVIDIA 3090 (local) and A100 (Google Colab Pro), Warp
 ---
+## Benchmarks
+### Scikit-Learn Synthetic Data: 1 Million Rows and 1,000 Features
+In this benchmark we compare the speed and in-sample correlation of **WarpGBM v0.1.19** against LightGBM, XGBoost and CatBoost, all with their GPU-enabled versions. This benchmark runs on Google Colab with the L4 GPU environment. The CPU versions don't even come close to the speed here so we didn't test them.
+```
+   WarpGBM:   corr = 0.8882, train = 21.8s, infer = 11.6s
+   XGBoost:   corr = 0.8877, train = 33.4s, infer = 8.1s
+  LightGBM:   corr = 0.8604, train = 30.2s, infer = 1.4s
+  CatBoost:   corr = 0.8935, train = 377.9s, infer = 375.8s
+```
+Colab Notebook: https://colab.research.google.com/drive/16U1kbYlD5HibGbnF5NGsjChZ1p1IA2pK
+---
 ## Installation
 ### Recommended (GitHub, always latest):

{warpgbm-0.1.18 → warpgbm-0.1.20}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "warpgbm"
-version = "0.1.18"
+version = "0.1.20"
 description = "A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA"
 readme = "README.md"
 requires-python = ">=3.8"

{warpgbm-0.1.18 → warpgbm-0.1.20}/tests/test_fit_predict_corr.py RENAMED Viewed

@@ -25,7 +25,7 @@ def test_fit_predictpytee_correlation():
             learning_rate=1,
             verbosity=False,
             histogram_computer=hist_type,
-            threads_per_block=128,
+            threads_per_block=64,
             rows_per_thread=4
         )

warpgbm-0.1.20/version.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.20

{warpgbm-0.1.18 → warpgbm-0.1.20}/warpgbm/core.py RENAMED Viewed

@@ -25,16 +25,31 @@ class WarpGBM(BaseEstimator, RegressorMixin):
         histogram_computer='hist3',
         threads_per_block=64,
         rows_per_thread=4,
-        L2_reg = 1e-6,
-        L1_reg = 0.0,
-        device = 'cuda'
+        L2_reg=1e-6,
+        L1_reg=0.0,
+        device='cuda'
     ):
+        # Validate arguments
+        self._validate_hyperparams(
+            num_bins=num_bins,
+            max_depth=max_depth,
+            learning_rate=learning_rate,
+            n_estimators=n_estimators,
+            min_child_weight=min_child_weight,
+            min_split_gain=min_split_gain,
+            histogram_computer=histogram_computer,
+            threads_per_block=threads_per_block,
+            rows_per_thread=rows_per_thread,
+            L2_reg=L2_reg,
+            L1_reg=L1_reg
+        )
         self.num_bins = num_bins
         self.max_depth = max_depth
         self.learning_rate = learning_rate
         self.n_estimators = n_estimators
         self.forest = None
-        self.bin_edges = None  # shape: [num_features, num_bins-1] if using quantile binning
+        self.bin_edges = None
         self.base_prediction = None
         self.unique_eras = None
         self.device = device
@@ -55,6 +70,45 @@ class WarpGBM(BaseEstimator, RegressorMixin):
         self.L2_reg = L2_reg
         self.L1_reg = L1_reg
+    def _validate_hyperparams(self, **kwargs):
+        # Type checks
+        int_params = [
+            "num_bins", "max_depth", "n_estimators", "min_child_weight",
+            "threads_per_block", "rows_per_thread"
+        ]
+        float_params = [
+            "learning_rate", "min_split_gain", "L2_reg", "L1_reg"
+        ]
+        for param in int_params:
+            if not isinstance(kwargs[param], int):
+                raise TypeError(f"{param} must be an integer, got {type(kwargs[param])}.")
+        for param in float_params:
+            if not isinstance(kwargs[param], (float, int)):  # Accept ints as valid floats
+                raise TypeError(f"{param} must be a float, got {type(kwargs[param])}.")
+        if not ( 2 <= kwargs["num_bins"] <= 127 ):
+            raise ValueError("num_bins must be between 2 and 127 inclusive.")
+        if kwargs["max_depth"] < 1:
+            raise ValueError("max_depth must be at least 1.")
+        if not (0.0 < kwargs["learning_rate"] <= 1.0):
+            raise ValueError("learning_rate must be in (0.0, 1.0].")
+        if kwargs["n_estimators"] <= 0:
+            raise ValueError("n_estimators must be positive.")
+        if kwargs["min_child_weight"] < 1:
+            raise ValueError("min_child_weight must be a positive integer.")
+        if kwargs["min_split_gain"] < 0:
+            raise ValueError("min_split_gain must be non-negative.")
+        if kwargs["threads_per_block"] <= 0 or kwargs["threads_per_block"] % 32 != 0:
+            raise ValueError("threads_per_block should be a positive multiple of 32 (warp size).")
+        if not ( 1 <= kwargs["rows_per_thread"] <= 16 ):
+            raise ValueError("rows_per_thread must be positive between 1 and 16 inclusive.")
+        if kwargs["L2_reg"] < 0 or kwargs["L1_reg"] < 0:
+            raise ValueError("L2_reg and L1_reg must be non-negative.")
+        if kwargs["histogram_computer"] not in histogram_kernels:
+            raise ValueError(f"Invalid histogram_computer: {kwargs['histogram_computer']}. Choose from {list(histogram_kernels.keys())}.")
     def fit(self, X, y, era_id=None):
         if era_id is None:
             era_id = np.ones(X.shape[0], dtype='int32')

{warpgbm-0.1.18 → warpgbm-0.1.20/warpgbm.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: warpgbm
-Version: 0.1.18
+Version: 0.1.20
 Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
 License:                     GNU GENERAL PUBLIC LICENSE
                                Version 3, 29 June 2007
@@ -710,6 +710,23 @@ In our initial tests on an NVIDIA 3090 (local) and A100 (Google Colab Pro), Warp
 ---
+## Benchmarks
+### Scikit-Learn Synthetic Data: 1 Million Rows and 1,000 Features
+In this benchmark we compare the speed and in-sample correlation of **WarpGBM v0.1.19** against LightGBM, XGBoost and CatBoost, all with their GPU-enabled versions. This benchmark runs on Google Colab with the L4 GPU environment. The CPU versions don't even come close to the speed here so we didn't test them.
+```
+   WarpGBM:   corr = 0.8882, train = 21.8s, infer = 11.6s
+   XGBoost:   corr = 0.8877, train = 33.4s, infer = 8.1s
+  LightGBM:   corr = 0.8604, train = 30.2s, infer = 1.4s
+  CatBoost:   corr = 0.8935, train = 377.9s, infer = 375.8s
+```
+Colab Notebook: https://colab.research.google.com/drive/16U1kbYlD5HibGbnF5NGsjChZ1p1IA2pK
+---
 ## Installation
 ### Recommended (GitHub, always latest):