PyPI - snowflake-ml-python - Versions diffs - 1.7.3__py3-none-any.whl → 1.7.4__py3-none-any.whl - Mend

snowflake-ml-python 1.7.3py3-none-any.whl → 1.7.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.mixture".replace("sklear
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class BayesianGaussianMixture(BaseTransformer):
     r"""Variational Bayesian estimation of a Gaussian mixture
     For more details on this class, see [sklearn.mixture.BayesianGaussianMixture]
@@ -532,7 +535,7 @@ class BayesianGaussianMixture(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1197,7 +1200,7 @@ class BayesianGaussianMixture(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1205,7 +1208,7 @@ class BayesianGaussianMixture(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/mixture/gaussian_mixture.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.mixture".replace("sklear
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class GaussianMixture(BaseTransformer):
     r"""Gaussian Mixture
     For more details on this class, see [sklearn.mixture.GaussianMixture]
@@ -505,7 +508,7 @@ class GaussianMixture(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1170,7 +1173,7 @@ class GaussianMixture(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1178,7 +1181,7 @@ class GaussianMixture(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/model_selection/grid_search_cv.py CHANGED Viewed

@@ -21,6 +21,7 @@ from snowflake.ml.model.model_signature import (
     ModelSignature,
     _infer_signature,
     _rename_signature_with_snowflake_identifiers,
+    _truncate_data,
 )
 from snowflake.ml.modeling._internal.estimator_utils import (
     gather_dependencies,
@@ -47,6 +48,8 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "ModelSelection"
 DEFAULT_UDTF_NJOBS = 3
+INFER_SIGNATURE_MAX_ROWS = 100
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
@@ -810,7 +813,13 @@ class GridSearchCV(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(
+            _infer_signature(
+                _truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS),
+                "input",
+                use_snowflake_identifiers=True,
+            )
+        )
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -818,7 +827,13 @@ class GridSearchCV(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(
+                    _infer_signature(
+                        _truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS),
+                        "output",
+                        use_snowflake_identifiers=True,
+                    )
+                )
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/model_selection/randomized_search_cv.py CHANGED Viewed

@@ -18,6 +18,7 @@ from snowflake.ml.model.model_signature import (
     ModelSignature,
     _infer_signature,
     _rename_signature_with_snowflake_identifiers,
+    _truncate_data,
 )
 from snowflake.ml.modeling._internal.estimator_utils import (
     gather_dependencies,
@@ -44,6 +45,8 @@ _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "ModelSelection"
 DEFAULT_UDTF_NJOBS = 3
+INFER_SIGNATURE_MAX_ROWS = 100
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
@@ -825,7 +828,13 @@ class RandomizedSearchCV(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(
+            _infer_signature(
+                _truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS),
+                "input",
+                use_snowflake_identifiers=True,
+            )
+        )
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -833,7 +842,13 @@ class RandomizedSearchCV(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(
+                    _infer_signature(
+                        _truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS),
+                        "output",
+                        use_snowflake_identifiers=True,
+                    )
+                )
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/multiclass/one_vs_one_classifier.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.multiclass".replace("skl
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class OneVsOneClassifier(BaseTransformer):
     r"""One-vs-one multiclass strategy
     For more details on this class, see [sklearn.multiclass.OneVsOneClassifier]
@@ -415,7 +418,7 @@ class OneVsOneClassifier(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1074,7 +1077,7 @@ class OneVsOneClassifier(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1082,7 +1085,7 @@ class OneVsOneClassifier(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.multiclass".replace("skl
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class OneVsRestClassifier(BaseTransformer):
     r"""One-vs-the-rest (OvR) multiclass strategy
     For more details on this class, see [sklearn.multiclass.OneVsRestClassifier]
@@ -424,7 +427,7 @@ class OneVsRestClassifier(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1087,7 +1090,7 @@ class OneVsRestClassifier(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1095,7 +1098,7 @@ class OneVsRestClassifier(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/multiclass/output_code_classifier.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.multiclass".replace("skl
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class OutputCodeClassifier(BaseTransformer):
     r"""(Error-Correcting) Output-Code multiclass strategy
     For more details on this class, see [sklearn.multiclass.OutputCodeClassifier]
@@ -427,7 +430,7 @@ class OutputCodeClassifier(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1084,7 +1087,7 @@ class OutputCodeClassifier(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1092,7 +1095,7 @@ class OutputCodeClassifier(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/naive_bayes/bernoulli_nb.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.naive_bayes".replace("sk
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class BernoulliNB(BaseTransformer):
     r"""Naive Bayes classifier for multivariate Bernoulli models
     For more details on this class, see [sklearn.naive_bayes.BernoulliNB]
@@ -427,7 +430,7 @@ class BernoulliNB(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1088,7 +1091,7 @@ class BernoulliNB(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1096,7 +1099,7 @@ class BernoulliNB(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/naive_bayes/categorical_nb.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.naive_bayes".replace("sk
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class CategoricalNB(BaseTransformer):
     r"""Naive Bayes classifier for categorical features
     For more details on this class, see [sklearn.naive_bayes.CategoricalNB]
@@ -433,7 +436,7 @@ class CategoricalNB(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1094,7 +1097,7 @@ class CategoricalNB(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1102,7 +1105,7 @@ class CategoricalNB(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/naive_bayes/complement_nb.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.naive_bayes".replace("sk
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class ComplementNB(BaseTransformer):
     r"""The Complement Naive Bayes classifier described in Rennie et al
     For more details on this class, see [sklearn.naive_bayes.ComplementNB]
@@ -427,7 +430,7 @@ class ComplementNB(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1088,7 +1091,7 @@ class ComplementNB(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1096,7 +1099,7 @@ class ComplementNB(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/naive_bayes/gaussian_nb.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.naive_bayes".replace("sk
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class GaussianNB(BaseTransformer):
     r"""Gaussian Naive Bayes (GaussianNB)
     For more details on this class, see [sklearn.naive_bayes.GaussianNB]
@@ -408,7 +411,7 @@ class GaussianNB(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1069,7 +1072,7 @@ class GaussianNB(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1077,7 +1080,7 @@ class GaussianNB(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/naive_bayes/multinomial_nb.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.naive_bayes".replace("sk
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class MultinomialNB(BaseTransformer):
     r"""Naive Bayes classifier for multinomial models
     For more details on this class, see [sklearn.naive_bayes.MultinomialNB]
@@ -421,7 +424,7 @@ class MultinomialNB(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1082,7 +1085,7 @@ class MultinomialNB(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1090,7 +1093,7 @@ class MultinomialNB(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/neighbors/k_neighbors_classifier.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.neighbors".replace("skle
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class KNeighborsClassifier(BaseTransformer):
     r"""Classifier implementing the k-nearest neighbors vote
     For more details on this class, see [sklearn.neighbors.KNeighborsClassifier]
@@ -484,7 +487,7 @@ class KNeighborsClassifier(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1147,7 +1150,7 @@ class KNeighborsClassifier(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1155,7 +1158,7 @@ class KNeighborsClassifier(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/neighbors/k_neighbors_regressor.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.neighbors".replace("skle
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class KNeighborsRegressor(BaseTransformer):
     r"""Regression based on k-nearest neighbors
     For more details on this class, see [sklearn.neighbors.KNeighborsRegressor]
@@ -483,7 +486,7 @@ class KNeighborsRegressor(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1142,7 +1145,7 @@ class KNeighborsRegressor(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1150,7 +1153,7 @@ class KNeighborsRegressor(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.4__py3-none-any.whl

snowflake-ml-python 1.7.3py3-none-any.whl → 1.7.4py3-none-any.whl