PyPI - snowflake-ml-python - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

snowflake-ml-python 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

snowflake/ml/modeling/linear_model/huber_regressor.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -194,7 +194,7 @@ class HuberRegressor(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=HuberRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=HuberRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -554,6 +554,22 @@ class HuberRegressor(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/lars.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -223,7 +223,7 @@ class Lars(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=Lars.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=Lars.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -583,6 +583,22 @@ class Lars(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/lars_cv.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -231,7 +231,7 @@ class LarsCV(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=LarsCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=LarsCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -591,6 +591,22 @@ class LarsCV(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/lasso.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -224,7 +224,7 @@ class Lasso(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=Lasso.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=Lasso.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -584,6 +584,22 @@ class Lasso(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/lasso_cv.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -252,7 +252,7 @@ class LassoCV(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=LassoCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -612,6 +612,22 @@ class LassoCV(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/lasso_lars.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -244,7 +244,7 @@ class LassoLars(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=LassoLars.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoLars.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -604,6 +604,22 @@ class LassoLars(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/lasso_lars_cv.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -245,7 +245,7 @@ class LassoLarsCV(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=LassoLarsCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoLarsCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -605,6 +605,22 @@ class LassoLarsCV(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/lasso_lars_ic.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -228,7 +228,7 @@ class LassoLarsIC(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=LassoLarsIC.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=LassoLarsIC.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -588,6 +588,22 @@ class LassoLarsIC(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/linear_regression.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -181,7 +181,7 @@ class LinearRegression(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=LinearRegression.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=LinearRegression.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -541,6 +541,22 @@ class LinearRegression(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/logistic_regression.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -295,7 +295,7 @@ class LogisticRegression(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=LogisticRegression.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=LogisticRegression.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -655,6 +655,22 @@ class LogisticRegression(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/logistic_regression_cv.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -316,7 +316,7 @@ class LogisticRegressionCV(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=LogisticRegressionCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=LogisticRegressionCV.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -676,6 +676,22 @@ class LogisticRegressionCV(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake/ml/modeling/linear_model/multi_task_elastic_net.py CHANGED Viewed

@@ -26,7 +26,7 @@ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
-from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
+from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import SnowparkHandlers as HandlersImpl
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.estimator_utils import (
@@ -35,7 +35,7 @@ from snowflake.ml.modeling._internal.estimator_utils import (
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
-from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
+from snowflake.ml.modeling._internal.estimator_protocols import TransformerHandlers
 from snowflake.ml.model.model_signature import (
     DataType,
@@ -214,7 +214,7 @@ class MultiTaskElasticNet(BaseTransformer):
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
         self._snowpark_cols: Optional[List[str]] = self.input_cols
-        self._handlers: FitPredictHandlers = HandlersImpl(class_name=MultiTaskElasticNet.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
+        self._handlers: TransformerHandlers = HandlersImpl(class_name=MultiTaskElasticNet.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
         self._autogenerated = True
     def _get_rand_id(self) -> str:
@@ -574,6 +574,22 @@ class MultiTaskElasticNet(BaseTransformer):
                 # each row containing a list of values.
                 expected_dtype = "ARRAY"
+            # If we were unable to assign a type to this transform in the factory, infer the type here.
+            if expected_dtype == "":
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
+                    expected_dtype = "ARRAY"
+                else:
+                    output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
+                    # We can only infer the output types from the input types if the following two statemetns are true:
+                    # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
+                    # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
+                    if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
+                        expected_dtype = convert_sp_to_sf_type(output_types[0])
             output_df = self._batch_inference(
                 dataset=dataset,
                 inference_method="transform",

snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

snowflake-ml-python 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl