PyPI - warpgbm - Versions diffs - 0.1.24__tar.gz → 0.1.26__tar.gz - Mend

warpgbm 0.1.24tar.gz → 0.1.26tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{warpgbm-0.1.24/warpgbm.egg-info → warpgbm-0.1.26}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: warpgbm
-Version: 0.1.24
+Version: 0.1.26
 Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
 License:                     GNU GENERAL PUBLIC LICENSE
                                Version 3, 29 June 2007
@@ -877,6 +877,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
 - `threads_per_block`: CUDA threads per block (default: 32)
 - `rows_per_thread`: Number of training rows processed per thread (default: 4)
 - `L2_reg`: L2 regularizer (default: 1e-6)
+- `colsample_bytree`: Proportion of features to subsample to grow each tree (default: 1)
 ### Methods:
 ```
@@ -888,6 +889,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
    y_eval=None,                   # numpy array (float or int) 1 dimension (eval_num_samples)
    eval_every_n_trees=None,       # const (int) >= 1
    early_stopping_rounds=None,    # const (int) >= 1
+   eval_metric='mse'              # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
 )
 ```
 Train with optional validation set and early stopping.
@@ -918,3 +920,10 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
 - Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
+### v0.1.25
+- Added `colsample_bytree` parameter and new test using Numerai data.
+### v0.1.26
+- Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.

{warpgbm-0.1.24 → warpgbm-0.1.26}/README.md RENAMED Viewed

@@ -189,6 +189,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
 - `threads_per_block`: CUDA threads per block (default: 32)
 - `rows_per_thread`: Number of training rows processed per thread (default: 4)
 - `L2_reg`: L2 regularizer (default: 1e-6)
+- `colsample_bytree`: Proportion of features to subsample to grow each tree (default: 1)
 ### Methods:
 ```
@@ -200,6 +201,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
    y_eval=None,                   # numpy array (float or int) 1 dimension (eval_num_samples)
    eval_every_n_trees=None,       # const (int) >= 1
    early_stopping_rounds=None,    # const (int) >= 1
+   eval_metric='mse'              # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
 )
 ```
 Train with optional validation set and early stopping.
@@ -230,3 +232,10 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
 - Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
+### v0.1.25
+- Added `colsample_bytree` parameter and new test using Numerai data.
+### v0.1.26
+- Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.

{warpgbm-0.1.24 → warpgbm-0.1.26}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "warpgbm"
-version = "0.1.24"
+version = "0.1.26"
 description = "A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA"
 readme = "README.md"
 requires-python = ">=3.8"

{warpgbm-0.1.24 → warpgbm-0.1.26}/tests/test_fit_predict_corr.py RENAMED Viewed

@@ -37,6 +37,7 @@ def test_fit_predictpytee_correlation():
             y_eval=y,
             eval_every_n_trees=10,
             early_stopping_rounds=1,
+            eval_metric="corr",
         )
         fit_time = time.time() - start_fit
         print(f"  Fit time:     {fit_time:.3f} seconds")

warpgbm-0.1.26/version.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.26

{warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/core.py RENAMED Viewed

@@ -138,7 +138,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
             )
     def validate_fit_params(
-        self, X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds
+        self, X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds, eval_metric
     ):
         # ─── Required: X and y ───
         if not isinstance(X, np.ndarray) or not isinstance(y, np.ndarray):
@@ -206,6 +206,11 @@ class WarpGBM(BaseEstimator, RegressorMixin):
                 # No early stopping = set to "never trigger"
                 early_stopping_rounds = self.n_estimators + 1
+            if eval_metric not in ["mse", "corr"]:
+                raise ValueError(
+                    f"Invalid eval_metric: {eval_metric}. Choose 'mse' or 'corr'."
+                )
         return early_stopping_rounds  # May have been defaulted here
     def fit(
@@ -217,9 +222,10 @@ class WarpGBM(BaseEstimator, RegressorMixin):
         y_eval=None,
         eval_every_n_trees=None,
         early_stopping_rounds=None,
+        eval_metric = "mse",
     ):
         early_stopping_rounds = self.validate_fit_params(
-            X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds
+            X, y, era_id, X_eval, y_eval, eval_every_n_trees, early_stopping_rounds, eval_metric
         )
         if era_id is None:
@@ -242,10 +248,11 @@ class WarpGBM(BaseEstimator, RegressorMixin):
         # ─── Optional Eval Set ───
         if X_eval is not None and y_eval is not None:
-            self.bin_indices_eval = self.bin_data_with_existing_edges(X_eval)
+            self.bin_indices_eval = self.bin_inference_data(X_eval)
             self.Y_gpu_eval = torch.from_numpy(y_eval).to(torch.float32).to(self.device)
             self.eval_every_n_trees = eval_every_n_trees
             self.early_stopping_rounds = early_stopping_rounds
+            self.eval_metric = eval_metric
         else:
             self.bin_indices_eval = None
             self.Y_gpu_eval = None
@@ -379,7 +386,7 @@ class WarpGBM(BaseEstimator, RegressorMixin):
             self.gradients[node_indices] += self.learning_rate * leaf_value
             return {"leaf_value": leaf_value.item(), "samples": parent_size}
-        split_mask = self.bin_indices_tree[node_indices, local_feature] <= best_bin
+        split_mask = self.bin_indices[node_indices, self.feat_indices_tree[local_feature]] <= best_bin
         left_indices = node_indices[split_mask]
         right_indices = node_indices[~split_mask]
@@ -388,13 +395,15 @@ class WarpGBM(BaseEstimator, RegressorMixin):
         if left_size <= right_size:
             grad_hist_left, hess_hist_left = self.compute_histograms(
-                self.bin_indices_tree[left_indices], self.residual[left_indices]
+                self.bin_indices.index_select(0, left_indices).index_select(1, self.feat_indices_tree)
+, self.residual[left_indices]
             )
             grad_hist_right = gradient_histogram - grad_hist_left
             hess_hist_right = hessian_histogram - hess_hist_left
         else:
             grad_hist_right, hess_hist_right = self.compute_histograms(
-                self.bin_indices_tree[right_indices], self.residual[right_indices]
+                self.bin_indices.index_select(0, right_indices).index_select(1, self.feat_indices_tree)
+, self.residual[right_indices]
             )
             grad_hist_left = gradient_histogram - grad_hist_right
             hess_hist_left = hessian_histogram - hess_hist_right
@@ -413,25 +422,33 @@ class WarpGBM(BaseEstimator, RegressorMixin):
             "left": left_child,
             "right": right_child,
         }
+    def get_eval_metric(self, y_true, y_pred):
+        if self.eval_metric == "mse":
+            return ((y_true - y_pred) ** 2).mean().item()
+        elif self.eval_metric == "corr":
+            return 1 - torch.corrcoef(torch.vstack([y_true, y_pred]))[0, 1].item()
+        else:
+            raise ValueError(f"Invalid eval_metric: {self.eval_metric}.")
     def compute_eval(self, i):
         if self.eval_every_n_trees == None:
             return
+        train_loss = ((self.Y_gpu - self.gradients) ** 2).mean().item()
+        self.training_loss.append(train_loss)
         if i % self.eval_every_n_trees == 0:
             eval_preds = self.predict_binned(self.bin_indices_eval)
-            eval_loss = ((self.Y_gpu_eval - eval_preds) ** 2).mean().item()
+            eval_loss = self.get_eval_metric( self.Y_gpu_eval, eval_preds )
             self.eval_loss.append(eval_loss)
-            train_loss = ((self.Y_gpu - self.gradients) ** 2).mean().item()
-            self.training_loss.append(train_loss)
             if len(self.eval_loss) > self.early_stopping_rounds:
-                if self.eval_loss[-self.early_stopping_rounds] < self.eval_loss[-1]:
+                if self.eval_loss[-(self.early_stopping_rounds+1)] < self.eval_loss[-1]:
                     self.stop = True
             print(
-                f"🌲 Tree {i+1}/{self.n_estimators} | Train MSE: {train_loss:.6f} | Eval MSE: {eval_loss:.6f}"
+                f"🌲 Tree {i+1}/{self.n_estimators} | Train MSE: {train_loss:.6f} | Eval {self.eval_metric}: {eval_loss:.6f}"
             )
             del eval_preds, eval_loss, train_loss
@@ -445,7 +462,6 @@ class WarpGBM(BaseEstimator, RegressorMixin):
             k = max(1, int(self.colsample_bytree * self.num_features))
         else:
             self.feat_indices_tree = self.feature_indices
-            self.bin_indices_tree = self.bin_indices
         for i in range(self.n_estimators):
             self.residual = self.Y_gpu - self.gradients
@@ -454,10 +470,9 @@ class WarpGBM(BaseEstimator, RegressorMixin):
                 self.feat_indices_tree = torch.randperm(
                     self.num_features, device=self.device
                 )[:k]
-                self.bin_indices_tree = self.bin_indices[:, self.feat_indices_tree]
             self.root_gradient_histogram, self.root_hessian_histogram = (
-                self.compute_histograms(self.bin_indices_tree, self.residual)
+                self.compute_histograms(self.bin_indices[:, self.feat_indices_tree], self.residual)
             )
             tree = self.grow_tree(
@@ -493,7 +508,6 @@ class WarpGBM(BaseEstimator, RegressorMixin):
     def predict_binned(self, bin_indices):
         num_samples = bin_indices.size(0)
         tree_tensor = torch.stack(
             [
                 self.flatten_tree(tree, max_nodes=2 ** (self.max_depth + 1))
@@ -508,8 +522,8 @@ class WarpGBM(BaseEstimator, RegressorMixin):
         )
         return out
-    def predict(self, X_np):
+    def bin_inference_data(self, X_np):
         is_integer_type = np.issubdtype(X_np.dtype, np.integer)
         if is_integer_type and X_np.shape[1] == self.num_features:
@@ -528,7 +542,10 @@ class WarpGBM(BaseEstimator, RegressorMixin):
             )
         else:
             bin_indices = self.bin_data_with_existing_edges(X_np)
+        return bin_indices
+    def predict(self, X_np):
+        bin_indices = self.bin_inference_data(X_np)
         preds = self.predict_binned(bin_indices).cpu().numpy()
         del bin_indices
         return preds

{warpgbm-0.1.24 → warpgbm-0.1.26}/warpgbm/cuda/predict.cu RENAMED Viewed

@@ -5,23 +5,18 @@
 __global__ void predict_forest_kernel(
     const int8_t *__restrict__ bin_indices, // [N x F]
     const float *__restrict__ tree_tensor,  // [T x max_nodes x 6]
-    int N, int F, int T, int max_nodes,
+    int64_t N, int64_t F, int64_t T, int64_t max_nodes,
     float learning_rate,
     float *__restrict__ out // [N]
 )
 {
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-    int total_jobs = N * T;
+    int64_t idx = static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x;
+    int64_t total_jobs = N * T;
     if (idx >= total_jobs)
         return;
-    int i = idx % N; // sample index
-    int t = idx / N; // tree index
-    // if (i == 0 && t == 0)
-    // {
-    //     printf("[DEBUG] Thread (i=%d, t=%d): starting prediction\n", i, t);
-    // }
+    int64_t i = idx % N; // sample index
+    int64_t t = idx / N; // tree index
     const float *tree = tree_tensor + t * max_nodes * 6;
@@ -35,32 +30,36 @@ __global__ void predict_forest_kernel(
             atomicAdd(&out[i], learning_rate * val);
             return;
         }
         int feat = static_cast<int>(tree[node_id * 6 + 0]);
         int split_bin = static_cast<int>(tree[node_id * 6 + 1]);
         int left_id = static_cast<int>(tree[node_id * 6 + 2]);
         int right_id = static_cast<int>(tree[node_id * 6 + 3]);
-        int8_t bin = bin_indices[i * F + feat];
+        // prevent overflow
+        int64_t bin_idx = i * F + feat;
+        int8_t bin = bin_indices[bin_idx];
         node_id = (bin <= split_bin) ? left_id : right_id;
-        // printf("sample %d, tree %d, feat %d, bin %d, split %d → %s\n", i, t, feat, bin, split_bin, (bin <= split_bin ? "L" : "R"));
     }
 }
 void predict_with_forest(
-    const at::Tensor &bin_indices, // [N x F], int8
-    const at::Tensor &tree_tensor, // [T x max_nodes x 6], float32
+    const at::Tensor &bin_indices,
+    const at::Tensor &tree_tensor,
     float learning_rate,
-    at::Tensor &out // [N], float32
+    at::Tensor &out
 )
 {
-    int N = bin_indices.size(0);
-    int F = bin_indices.size(1);
-    int T = tree_tensor.size(0);
-    int max_nodes = tree_tensor.size(1);
+    int64_t N = bin_indices.size(0);
+    int64_t F = bin_indices.size(1);
+    int64_t T = tree_tensor.size(0);
+    int64_t max_nodes = tree_tensor.size(1);
-    int total_jobs = N * T;
+    int64_t total_jobs = N * T;
     int threads_per_block = 256;
-    int blocks = (total_jobs + threads_per_block - 1) / threads_per_block;
+    int64_t blocks = (total_jobs + threads_per_block - 1) / threads_per_block;
     predict_forest_kernel<<<blocks, threads_per_block>>>(
         bin_indices.data_ptr<int8_t>(),

{warpgbm-0.1.24 → warpgbm-0.1.26/warpgbm.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: warpgbm
-Version: 0.1.24
+Version: 0.1.26
 Summary: A fast GPU-accelerated Gradient Boosted Decision Tree library with PyTorch + CUDA
 License:                     GNU GENERAL PUBLIC LICENSE
                                Version 3, 29 June 2007
@@ -877,6 +877,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
 - `threads_per_block`: CUDA threads per block (default: 32)
 - `rows_per_thread`: Number of training rows processed per thread (default: 4)
 - `L2_reg`: L2 regularizer (default: 1e-6)
+- `colsample_bytree`: Proportion of features to subsample to grow each tree (default: 1)
 ### Methods:
 ```
@@ -888,6 +889,7 @@ No installation required — just press **"Open in Playground"**, then **Run All
    y_eval=None,                   # numpy array (float or int) 1 dimension (eval_num_samples)
    eval_every_n_trees=None,       # const (int) >= 1
    early_stopping_rounds=None,    # const (int) >= 1
+   eval_metric='mse'              # string, one of 'mse' or 'corr'. For corr, loss is 1 - correlation(y_true, preds)
 )
 ```
 Train with optional validation set and early stopping.
@@ -918,3 +920,10 @@ WarpGBM builds on the shoulders of PyTorch, scikit-learn, LightGBM, and the CUDA
 - Adjust gain in split kernel and added support for an eval set with early stopping based on MSE.
+### v0.1.25
+- Added `colsample_bytree` parameter and new test using Numerai data.
+### v0.1.26
+- Fix Memory bugs in prediction and colsample bytree logic. Added "corr" eval metric.