PyPI - snowflake-ml-python - Versions diffs - 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl - Mend

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

snowflake/ml/modeling/_internal/model_trainer_builder.py CHANGED Viewed

@@ -1,11 +1,9 @@
-import os
 from typing import List, Optional, Union
 import pandas as pd
 from sklearn import model_selection
 from snowflake.ml._internal.exceptions import error_codes, exceptions
-from snowflake.ml.modeling._internal.constants import IN_ML_RUNTIME_ENV_VAR
 from snowflake.ml.modeling._internal.estimator_utils import (
     get_module_name,
     is_single_node,
@@ -13,9 +11,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
 from snowflake.ml.modeling._internal.local_implementations.pandas_trainer import (
     PandasModelTrainer,
 )
-from snowflake.ml.modeling._internal.ml_runtime_implementations.ml_runtime_trainer import (
-    MLRuntimeModelTrainer,
-)
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.snowpark_implementations.distributed_hpo_trainer import (
     DistributedHPOTrainer,
@@ -107,9 +102,6 @@ class ModelTrainerBuilder:
                 "autogenerated": autogenerated,
                 "subproject": subproject,
             }
-            if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
-                return MLRuntimeModelTrainer(**init_args)  # type: ignore[arg-type, return-value]
             trainer_klass = SnowparkModelTrainer
             assert dataset._session is not None  # Make MyPy happy

snowflake/ml/modeling/_internal/model_transformer_builder.py CHANGED Viewed

@@ -1,16 +1,11 @@
-import os
 from typing import Optional, Union
 import pandas as pd
 from snowflake import snowpark
-from snowflake.ml.modeling._internal.constants import IN_ML_RUNTIME_ENV_VAR
 from snowflake.ml.modeling._internal.local_implementations.pandas_handlers import (
     PandasTransformHandlers,
 )
-from snowflake.ml.modeling._internal.ml_runtime_implementations.ml_runtime_handlers import (
-    MLRuntimeTransformHandlers,
-)
 from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_handlers import (
     SnowparkTransformHandlers,
 )
@@ -63,14 +58,6 @@ class ModelTransformerBuilder:
             )
         elif isinstance(dataset, snowpark.DataFrame):
-            if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
-                return MLRuntimeTransformHandlers(
-                    dataset=dataset,
-                    estimator=estimator,
-                    class_name=class_name,
-                    subproject=subproject,
-                    autogenerated=autogenerated,
-                )
             return SnowparkTransformHandlers(
                 dataset=dataset,
                 estimator=estimator,

snowflake/ml/modeling/pipeline/pipeline.py CHANGED Viewed

@@ -30,7 +30,6 @@ from snowflake.snowpark._internal import utils as snowpark_utils
 _PROJECT = "ModelDevelopment"
 _SUBPROJECT = "Framework"
-IN_ML_RUNTIME_ENV_VAR = "IN_SPCS_ML_RUNTIME"
 def _final_step_has(attr: str) -> Callable[..., bool]:
@@ -432,10 +431,7 @@ class Pipeline(base.BaseTransformer):
             data_sources = [data_source.DataFrameInfo(dataset.queries["queries"][-1])]
         lineage_utils.set_data_sources(self, data_sources)
-        if self._can_be_trained_in_ml_runtime(dataset):
-            self._fit_ml_runtime(dataset)
-        elif squash and isinstance(dataset, snowpark.DataFrame):
+        if squash and isinstance(dataset, snowpark.DataFrame):
             session = dataset._session
             assert session is not None
             self._fit_snowpark_dataframe_within_one_sproc(session=session, dataset=dataset)
@@ -606,25 +602,7 @@ class Pipeline(base.BaseTransformer):
         Returns:
             Output dataset.
         """
-        if os.environ.get(IN_ML_RUNTIME_ENV_VAR) and self._sklearn_object is not None:
-            expected_output_cols = self._infer_output_cols()
-            handler = ModelTransformerBuilder.build(
-                dataset=dataset,
-                estimator=self._sklearn_object,
-                class_name="Pipeline",
-                subproject="",
-                autogenerated=False,
-            )
-            return handler.batch_inference(
-                inference_method="predict",
-                input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
-                expected_output_cols=expected_output_cols,
-                session=dataset._session,
-                dependencies=self._deps,
-            )
-        else:
-            return self._invoke_estimator_func("predict", dataset)
+        return self._invoke_estimator_func("predict", dataset)
     @metaestimators.available_if(_final_step_has("score_samples"))  # type: ignore[misc]
     @telemetry.send_api_usage_telemetry(
@@ -642,32 +620,8 @@ class Pipeline(base.BaseTransformer):
         Returns:
             Output dataset.
-        Raises:
-            ValueError: An sklearn object has not been fit before calling this function
         """
-        if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
-            if self._sklearn_object is None:
-                raise ValueError("Model must be fit before inference.")
-            expected_output_cols = self._get_output_column_names("score_samples")
-            handler = ModelTransformerBuilder.build(
-                dataset=dataset,
-                estimator=self._sklearn_object,
-                class_name="Pipeline",
-                subproject="",
-                autogenerated=False,
-            )
-            return handler.batch_inference(
-                inference_method="score_samples",
-                input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
-                expected_output_cols=expected_output_cols,
-                session=dataset._session,
-                dependencies=self._deps,
-            )
-        else:
-            return self._invoke_estimator_func("score_samples", dataset)
+        return self._invoke_estimator_func("score_samples", dataset)
     @metaestimators.available_if(_final_step_has("predict_proba"))  # type: ignore[misc]
     @telemetry.send_api_usage_telemetry(
@@ -685,32 +639,8 @@ class Pipeline(base.BaseTransformer):
         Returns:
             Output dataset.
-        Raises:
-            ValueError: An sklearn object has not been fit before calling this function
         """
-        if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
-            if self._sklearn_object is None:
-                raise ValueError("Model must be fit before inference.")
-            expected_output_cols = self._get_output_column_names("predict_proba")
-            handler = ModelTransformerBuilder.build(
-                dataset=dataset,
-                estimator=self._sklearn_object,
-                class_name="Pipeline",
-                subproject="",
-                autogenerated=False,
-            )
-            return handler.batch_inference(
-                inference_method="predict_proba",
-                input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
-                expected_output_cols=expected_output_cols,
-                session=dataset._session,
-                dependencies=self._deps,
-            )
-        else:
-            return self._invoke_estimator_func("predict_proba", dataset)
+        return self._invoke_estimator_func("predict_proba", dataset)
     @metaestimators.available_if(_final_step_has("predict_log_proba"))  # type: ignore[misc]
     @telemetry.send_api_usage_telemetry(
@@ -729,31 +659,8 @@ class Pipeline(base.BaseTransformer):
         Returns:
             Output dataset.
-        Raises:
-            ValueError: An sklearn object has not been fit before calling this function
         """
-        if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
-            if self._sklearn_object is None:
-                raise ValueError("Model must be fit before inference.")
-            expected_output_cols = self._get_output_column_names("predict_log_proba")
-            handler = ModelTransformerBuilder.build(
-                dataset=dataset,
-                estimator=self._sklearn_object,
-                class_name="Pipeline",
-                subproject="",
-                autogenerated=False,
-            )
-            return handler.batch_inference(
-                inference_method="predict_log_proba",
-                input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
-                expected_output_cols=expected_output_cols,
-                session=dataset._session,
-                dependencies=self._deps,
-            )
-        else:
-            return self._invoke_estimator_func("predict_log_proba", dataset)
+        return self._invoke_estimator_func("predict_log_proba", dataset)
     @metaestimators.available_if(_final_step_has("score"))  # type: ignore[misc]
     @telemetry.send_api_usage_telemetry(
@@ -769,30 +676,9 @@ class Pipeline(base.BaseTransformer):
         Returns:
             Output dataset.
-        Raises:
-            ValueError: An sklearn object has not been fit before calling this function
         """
-        if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
-            if self._sklearn_object is None:
-                raise ValueError("Model must be fit before scoreing.")
-            handler = ModelTransformerBuilder.build(
-                dataset=dataset,
-                estimator=self._sklearn_object,
-                class_name="Pipeline",
-                subproject="",
-                autogenerated=False,
-            )
-            return handler.score(
-                input_cols=self._infer_input_cols(),
-                label_cols=self._get_label_cols(),
-                session=dataset._session,
-                dependencies=self._deps,
-                score_sproc_imports=[],
-            )
-        else:
-            return self._invoke_estimator_func("score", dataset)
+        return self._invoke_estimator_func("score", dataset)
     def _invoke_estimator_func(
         self, func_name: str, dataset: Union[snowpark.DataFrame, pd.DataFrame]
@@ -882,39 +768,6 @@ class Pipeline(base.BaseTransformer):
         return ct
-    def _fit_ml_runtime(self, dataset: snowpark.DataFrame) -> None:
-        """Train the pipeline in the ML Runtime.
-        Args:
-            dataset: The training Snowpark dataframe
-        Raises:
-            ModuleNotFoundError: The ML Runtime Client is not installed.
-        """
-        try:
-            from snowflake.ml.runtime import MLRuntimeClient
-        except ModuleNotFoundError as e:
-            # The snowflake.ml.runtime module should always be present when
-            # the env var IN_SPCS_ML_RUNTIME is present.
-            raise ModuleNotFoundError("ML Runtime Python Client is not installed.") from e
-        client = MLRuntimeClient()
-        ml_runtime_compatible_pipeline = self._create_unfitted_sklearn_object()
-        label_cols = self._get_label_cols()
-        all_df_cols = dataset.columns
-        input_cols = [col for col in all_df_cols if col not in label_cols]
-        trained_pipeline = client.train(
-            estimator=ml_runtime_compatible_pipeline,
-            dataset=dataset,
-            input_cols=input_cols,
-            label_cols=label_cols,
-            sample_weight_col=self.sample_weight_col,
-        )
-        self._sklearn_object = trained_pipeline
     def _get_label_cols(self) -> List[str]:
         """Util function to get the label columns from the pipeline.
         The label column is only present in the estimator
@@ -929,28 +782,6 @@ class Pipeline(base.BaseTransformer):
         return label_cols
-    def _can_be_trained_in_ml_runtime(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> bool:
-        """A utility function to determine if the pipeline cam be pushed down to the ML Runtime for training.
-        Currently, this is true if:
-        - The training dataset is a snowpark dataframe,
-        - The IN_SPCS_ML_RUNTIME environment is present and
-        - The pipeline can be converted to an sklearn pipeline.
-        Args:
-            dataset: The training dataset
-        Returns:
-            True if the dataset can be fit in the ml runtime, else false.
-        """
-        if not isinstance(dataset, snowpark.DataFrame):
-            return False
-        if not os.environ.get(IN_ML_RUNTIME_ENV_VAR):
-            return False
-        return self._is_convertible_to_sklearn
     @staticmethod
     def _wrap_transformer_in_column_transformer(
         transformer_name: str, transformer: base.BaseTransformer
@@ -1124,7 +955,6 @@ class Pipeline(base.BaseTransformer):
         telemetry_data = {
             "pipeline_is_convertible_to_sklearn": self._is_convertible_to_sklearn,
-            "in_spcs_ml_runtime": bool(os.environ.get(IN_ML_RUNTIME_ENV_VAR)),
         }
         telemetry.send_custom_usage(
             project=_PROJECT,

snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.3py3-none-any.whl