PyPI - snowflake-ml-python - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

snowflake-ml-python 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -255,7 +255,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=MultiTaskElasticNetCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskElasticNetCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -615,6 +615,22 @@ class MultiTaskElasticNetCV(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/multi_task_lasso.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -206,7 +206,7 @@ class MultiTaskLasso(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=MultiTaskLasso.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskLasso.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -566,6 +566,22 @@ class MultiTaskLasso(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -241,7 +241,7 @@ class MultiTaskLassoCV(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=MultiTaskLassoCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskLassoCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -601,6 +601,22 @@ class MultiTaskLassoCV(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -189,7 +189,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=OrthogonalMatchingPursuit.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=OrthogonalMatchingPursuit.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -549,6 +549,22 @@ class OrthogonalMatchingPursuit(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -263,7 +263,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=PassiveAggressiveClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=PassiveAggressiveClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -623,6 +623,22 @@ class PassiveAggressiveClassifier(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -249,7 +249,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=PassiveAggressiveRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=PassiveAggressiveRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -609,6 +609,22 @@ class PassiveAggressiveRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/perceptron.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -262,7 +262,7 @@ class Perceptron(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=Perceptron.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=Perceptron.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -622,6 +622,22 @@ class Perceptron(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/poisson_regressor.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -211,7 +211,7 @@ class PoissonRegressor(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=PoissonRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=PoissonRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -571,6 +571,22 @@ class PoissonRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/ransac_regressor.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -267,7 +267,7 @@ class RANSACRegressor(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=RANSACRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=RANSACRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -627,6 +627,22 @@ class RANSACRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/ridge.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -259,7 +259,7 @@ class Ridge(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=Ridge.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=Ridge.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -619,6 +619,22 @@ class Ridge(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/ridge_classifier.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -259,7 +259,7 @@ class RidgeClassifier(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=RidgeClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=RidgeClassifier.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -619,6 +619,22 @@ class RidgeClassifier(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/ridge_classifier_cv.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -210,7 +210,7 @@ class RidgeClassifierCV(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=RidgeClassifierCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=RidgeClassifierCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -570,6 +570,22 @@ class RidgeClassifierCV(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

snowflake-ml-python 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl