PyPI - snowflake-ml-python - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

snowflake-ml-python 1.2.0py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (176) hide show

snowflake/ml/modeling/covariance/oas.py CHANGED Viewed

@@ -522,6 +522,22 @@ class OAS(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/covariance/shrunk_covariance.py CHANGED Viewed

@@ -528,6 +528,22 @@ class ShrunkCovariance(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/decomposition/dictionary_learning.py CHANGED Viewed

@@ -636,6 +636,22 @@ class DictionaryLearning(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/decomposition/factor_analysis.py CHANGED Viewed

@@ -573,6 +573,22 @@ class FactorAnalysis(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/decomposition/fast_ica.py CHANGED Viewed

@@ -591,6 +591,22 @@ class FastICA(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/decomposition/incremental_pca.py CHANGED Viewed

@@ -543,6 +543,22 @@ class IncrementalPCA(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/decomposition/kernel_pca.py CHANGED Viewed

@@ -639,6 +639,22 @@ class KernelPCA(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py CHANGED Viewed

@@ -661,6 +661,22 @@ class MiniBatchDictionaryLearning(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py CHANGED Viewed

@@ -606,6 +606,22 @@ class MiniBatchSparsePCA(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/decomposition/pca.py CHANGED Viewed

@@ -608,6 +608,22 @@ class PCA(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/decomposition/sparse_pca.py CHANGED Viewed

@@ -581,6 +581,22 @@ class SparsePCA(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/decomposition/truncated_svd.py CHANGED Viewed

@@ -562,6 +562,22 @@ class TruncatedSVD(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py CHANGED Viewed

@@ -581,6 +581,22 @@ class LinearDiscriminantAnalysis(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py CHANGED Viewed

@@ -541,6 +541,22 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/ensemble/ada_boost_classifier.py CHANGED Viewed

@@ -566,6 +566,22 @@ class AdaBoostClassifier(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/ensemble/ada_boost_regressor.py CHANGED Viewed

@@ -563,6 +563,22 @@ class AdaBoostRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/ensemble/bagging_classifier.py CHANGED Viewed

@@ -598,6 +598,22 @@ class BaggingClassifier(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/ensemble/bagging_regressor.py CHANGED Viewed

@@ -598,6 +598,22 @@ class BaggingRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/ensemble/extra_trees_classifier.py CHANGED Viewed

@@ -701,6 +701,22 @@ class ExtraTreesClassifier(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/ensemble/extra_trees_regressor.py CHANGED Viewed

@@ -680,6 +680,22 @@ class ExtraTreesRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py CHANGED Viewed

@@ -713,6 +713,22 @@ class GradientBoostingClassifier(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py CHANGED Viewed

@@ -722,6 +722,22 @@ class GradientBoostingRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py CHANGED Viewed

@@ -694,6 +694,22 @@ class HistGradientBoostingClassifier(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py CHANGED Viewed

@@ -685,6 +685,22 @@ class HistGradientBoostingRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

snowflake-ml-python 1.2.0py3-none-any.whl → 1.2.1py3-none-any.whl