PyPI - snowflake-ml-python - Versions diffs - 1.7.3__py3-none-any.whl → 1.7.5__py3-none-any.whl - Mend

snowflake-ml-python 1.7.3py3-none-any.whl → 1.7.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (208) hide show

snowflake/ml/modeling/linear_model/multi_task_elastic_net.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class MultiTaskElasticNet(BaseTransformer):
     r"""Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer
     For more details on this class, see [sklearn.linear_model.MultiTaskElasticNet]
@@ -457,7 +460,7 @@ class MultiTaskElasticNet(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1114,7 +1117,7 @@ class MultiTaskElasticNet(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1122,7 +1125,7 @@ class MultiTaskElasticNet(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class MultiTaskElasticNetCV(BaseTransformer):
     r"""Multi-task L1/L2 ElasticNet with built-in cross-validation
     For more details on this class, see [sklearn.linear_model.MultiTaskElasticNetCV]
@@ -498,7 +501,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1155,7 +1158,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1163,7 +1166,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/multi_task_lasso.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class MultiTaskLasso(BaseTransformer):
     r"""Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer
     For more details on this class, see [sklearn.linear_model.MultiTaskLasso]
@@ -449,7 +452,7 @@ class MultiTaskLasso(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1106,7 +1109,7 @@ class MultiTaskLasso(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1114,7 +1117,7 @@ class MultiTaskLasso(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class MultiTaskLassoCV(BaseTransformer):
     r"""Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer
     For more details on this class, see [sklearn.linear_model.MultiTaskLassoCV]
@@ -484,7 +487,7 @@ class MultiTaskLassoCV(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1141,7 +1144,7 @@ class MultiTaskLassoCV(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1149,7 +1152,7 @@ class MultiTaskLassoCV(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class OrthogonalMatchingPursuit(BaseTransformer):
     r"""Orthogonal Matching Pursuit model (OMP)
     For more details on this class, see [sklearn.linear_model.OrthogonalMatchingPursuit]
@@ -423,7 +426,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1080,7 +1083,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1088,7 +1091,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class PassiveAggressiveClassifier(BaseTransformer):
     r"""Passive Aggressive Classifier
     For more details on this class, see [sklearn.linear_model.PassiveAggressiveClassifier]
@@ -506,7 +509,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1165,7 +1168,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1173,7 +1176,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class PassiveAggressiveRegressor(BaseTransformer):
     r"""Passive Aggressive Regressor
     For more details on this class, see [sklearn.linear_model.PassiveAggressiveRegressor]
@@ -492,7 +495,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1149,7 +1152,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1157,7 +1160,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/perceptron.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class Perceptron(BaseTransformer):
     r"""Linear perceptron classifier
     For more details on this class, see [sklearn.linear_model.Perceptron]
@@ -505,7 +508,7 @@ class Perceptron(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1164,7 +1167,7 @@ class Perceptron(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1172,7 +1175,7 @@ class Perceptron(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/poisson_regressor.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class PoissonRegressor(BaseTransformer):
     r"""Generalized Linear Model with a Poisson distribution
     For more details on this class, see [sklearn.linear_model.PoissonRegressor]
@@ -454,7 +457,7 @@ class PoissonRegressor(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1111,7 +1114,7 @@ class PoissonRegressor(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1119,7 +1122,7 @@ class PoissonRegressor(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/ransac_regressor.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class RANSACRegressor(BaseTransformer):
     r"""RANSAC (RANdom SAmple Consensus) algorithm
     For more details on this class, see [sklearn.linear_model.RANSACRegressor]
@@ -511,7 +514,7 @@ class RANSACRegressor(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1168,7 +1171,7 @@ class RANSACRegressor(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1176,7 +1179,7 @@ class RANSACRegressor(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/ridge.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class Ridge(BaseTransformer):
     r"""Linear least squares with l2 regularization
     For more details on this class, see [sklearn.linear_model.Ridge]
@@ -502,7 +505,7 @@ class Ridge(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1159,7 +1162,7 @@ class Ridge(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1167,7 +1170,7 @@ class Ridge(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/ridge_classifier.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class RidgeClassifier(BaseTransformer):
     r"""Classifier using Ridge regression
     For more details on this class, see [sklearn.linear_model.RidgeClassifier]
@@ -502,7 +505,7 @@ class RidgeClassifier(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1161,7 +1164,7 @@ class RidgeClassifier(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1169,7 +1172,7 @@ class RidgeClassifier(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/ridge_classifier_cv.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class RidgeClassifierCV(BaseTransformer):
     r"""Ridge classifier with built-in cross-validation
     For more details on this class, see [sklearn.linear_model.RidgeClassifierCV]
@@ -461,7 +464,7 @@ class RidgeClassifierCV(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1120,7 +1123,7 @@ class RidgeClassifierCV(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1128,7 +1131,7 @@ class RidgeClassifierCV(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/linear_model/ridge_cv.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class RidgeCV(BaseTransformer):
     r"""Ridge regression with built-in cross-validation
     For more details on this class, see [sklearn.linear_model.RidgeCV]
@@ -480,7 +483,7 @@ class RidgeCV(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1137,7 +1140,7 @@ class RidgeCV(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1145,7 +1148,7 @@ class RidgeCV(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.5__py3-none-any.whl

snowflake-ml-python 1.7.3py3-none-any.whl → 1.7.5py3-none-any.whl