PyPI - snowflake-ml-python - Versions diffs - 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl - Mend

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (237) hide show

snowflake/ml/modeling/feature_selection/select_fwe.py CHANGED Viewed

@@ -38,6 +38,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -58,6 +59,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.feature_selection".repla
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class SelectFwe(BaseTransformer):
     r"""Filter: Select the p-values corresponding to Family-wise error rate
     For more details on this class, see [sklearn.feature_selection.SelectFwe]
@@ -410,7 +413,7 @@ class SelectFwe(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1067,7 +1070,7 @@ class SelectFwe(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1075,7 +1078,7 @@ class SelectFwe(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/feature_selection/select_k_best.py CHANGED Viewed

@@ -38,6 +38,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -58,6 +59,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.feature_selection".repla
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class SelectKBest(BaseTransformer):
     r"""Select features according to the k highest scores
     For more details on this class, see [sklearn.feature_selection.SelectKBest]
@@ -411,7 +414,7 @@ class SelectKBest(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1068,7 +1071,7 @@ class SelectKBest(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1076,7 +1079,7 @@ class SelectKBest(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/feature_selection/select_percentile.py CHANGED Viewed

@@ -38,6 +38,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -58,6 +59,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.feature_selection".repla
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class SelectPercentile(BaseTransformer):
     r"""Select features according to a percentile of the highest scores
     For more details on this class, see [sklearn.feature_selection.SelectPercentile]
@@ -410,7 +413,7 @@ class SelectPercentile(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1067,7 +1070,7 @@ class SelectPercentile(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1075,7 +1078,7 @@ class SelectPercentile(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/feature_selection/sequential_feature_selector.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.feature_selection".repla
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class SequentialFeatureSelector(BaseTransformer):
     r"""Transformer that performs Sequential Feature Selection
     For more details on this class, see [sklearn.feature_selection.SequentialFeatureSelector]
@@ -472,7 +475,7 @@ class SequentialFeatureSelector(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1129,7 +1132,7 @@ class SequentialFeatureSelector(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1137,7 +1140,7 @@ class SequentialFeatureSelector(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/feature_selection/variance_threshold.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.feature_selection".repla
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class VarianceThreshold(BaseTransformer):
     r"""Feature selector that removes all low-variance features
     For more details on this class, see [sklearn.feature_selection.VarianceThreshold]
@@ -403,7 +406,7 @@ class VarianceThreshold(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1060,7 +1063,7 @@ class VarianceThreshold(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1068,7 +1071,7 @@ class VarianceThreshold(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.gaussian_process".replac
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class GaussianProcessClassifier(BaseTransformer):
     r"""Gaussian process classification (GPC) based on Laplace approximation
     For more details on this class, see [sklearn.gaussian_process.GaussianProcessClassifier]
@@ -496,7 +499,7 @@ class GaussianProcessClassifier(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1157,7 +1160,7 @@ class GaussianProcessClassifier(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1165,7 +1168,7 @@ class GaussianProcessClassifier(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.gaussian_process".replac
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class GaussianProcessRegressor(BaseTransformer):
     r"""Gaussian process regression (GPR)
     For more details on this class, see [sklearn.gaussian_process.GaussianProcessRegressor]
@@ -487,7 +490,7 @@ class GaussianProcessRegressor(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1144,7 +1147,7 @@ class GaussianProcessRegressor(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1152,7 +1155,7 @@ class GaussianProcessRegressor(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/impute/iterative_imputer.py CHANGED Viewed

@@ -38,6 +38,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -58,6 +59,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.impute".replace("sklearn
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class IterativeImputer(BaseTransformer):
     r"""Multivariate imputer that estimates each feature from all the others
     For more details on this class, see [sklearn.impute.IterativeImputer]
@@ -531,7 +534,7 @@ class IterativeImputer(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1188,7 +1191,7 @@ class IterativeImputer(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1196,7 +1199,7 @@ class IterativeImputer(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/impute/knn_imputer.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.impute".replace("sklearn
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class KNNImputer(BaseTransformer):
     r"""Imputation for completing missing values using k-Nearest Neighbors
     For more details on this class, see [sklearn.impute.KNNImputer]
@@ -457,7 +460,7 @@ class KNNImputer(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1114,7 +1117,7 @@ class KNNImputer(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1122,7 +1125,7 @@ class KNNImputer(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/impute/missing_indicator.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.impute".replace("sklearn
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class MissingIndicator(BaseTransformer):
     r"""Binary indicators for missing values
     For more details on this class, see [sklearn.impute.MissingIndicator]
@@ -431,7 +434,7 @@ class MissingIndicator(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1088,7 +1091,7 @@ class MissingIndicator(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1096,7 +1099,7 @@ class MissingIndicator(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.kernel_approximation".re
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class AdditiveChi2Sampler(BaseTransformer):
     r"""Approximate feature map for additive chi2 kernel
     For more details on this class, see [sklearn.kernel_approximation.AdditiveChi2Sampler]
@@ -406,7 +409,7 @@ class AdditiveChi2Sampler(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1063,7 +1066,7 @@ class AdditiveChi2Sampler(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1071,7 +1074,7 @@ class AdditiveChi2Sampler(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/kernel_approximation/nystroem.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.kernel_approximation".re
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class Nystroem(BaseTransformer):
     r"""Approximate a kernel map using a subset of the training data
     For more details on this class, see [sklearn.kernel_approximation.Nystroem]
@@ -454,7 +457,7 @@ class Nystroem(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1111,7 +1114,7 @@ class Nystroem(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1119,7 +1122,7 @@ class Nystroem(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.kernel_approximation".re
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class PolynomialCountSketch(BaseTransformer):
     r"""Polynomial kernel approximation via Tensor Sketch
     For more details on this class, see [sklearn.kernel_approximation.PolynomialCountSketch]
@@ -430,7 +433,7 @@ class PolynomialCountSketch(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1087,7 +1090,7 @@ class PolynomialCountSketch(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1095,7 +1098,7 @@ class PolynomialCountSketch(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake/ml/modeling/kernel_approximation/rbf_sampler.py CHANGED Viewed

@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
     FeatureSpec,
     ModelSignature,
     _infer_signature,
+    _truncate_data,
     _rename_signature_with_snowflake_identifiers,
 )
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.kernel_approximation".re
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
+INFER_SIGNATURE_MAX_ROWS = 100
 class RBFSampler(BaseTransformer):
     r"""Approximate a RBF kernel feature map using random Fourier features
     For more details on this class, see [sklearn.kernel_approximation.RBFSampler]
@@ -417,7 +420,7 @@ class RBFSampler(BaseTransformer):
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
                     expected_dtype = "array"
                 else:
-                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
                     # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
                     # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1074,7 +1077,7 @@ class RBFSampler(BaseTransformer):
         PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
-        inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
+        inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
         outputs: List[BaseFeatureSpec] = []
         if hasattr(self, "predict"):
             # keep mypy happy
@@ -1082,7 +1085,7 @@ class RBFSampler(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
+                outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
                 # rename the output columns
                 outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(

snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.4py3-none-any.whl