PyPI - snowflake-ml-python - Versions diffs - 1.6.0__py3-none-any.whl → 1.6.2__py3-none-any.whl - Mend

snowflake-ml-python 1.6.0py3-none-any.whl → 1.6.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (252) hide show

snowflake/ml/model/_signatures/utils.py CHANGED Viewed

@@ -110,6 +110,15 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
     # https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.ConversationalPipeline
     # Needs to convert to conversation object.
     if task == "conversational":
+        warnings.warn(
+            (
+                "Conversational pipeline is removed from transformers since 4.42.0. "
+                "Support will be removed from snowflake-ml-python soon."
+            ),
+            category=DeprecationWarning,
+            stacklevel=1,
+        )
         return core.ModelSignature(
             inputs=[
                 core.FeatureSpec(name="user_inputs", dtype=core.DataType.STRING, shape=(-1,)),

snowflake/ml/model/models/llm.py CHANGED Viewed

@@ -70,7 +70,9 @@ class LLM:
             import peft
-            peft_config = peft.PeftConfig.from_pretrained(model_id_or_path, **hub_kwargs)  # type: ignore[attr-defined]
+            peft_config = peft.PeftConfig.from_pretrained(  # type: ignore[no-untyped-call, attr-defined]
+                model_id_or_path, **hub_kwargs
+            )
             if peft_config.peft_type != peft.PeftType.LORA:  # type: ignore[attr-defined]
                 raise ValueError("Only LORA is supported.")
             if peft_config.task_type != peft.TaskType.CAUSAL_LM:  # type: ignore[attr-defined]

snowflake/ml/model/type_hints.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # mypy: disable-error-code="import"
+from enum import Enum
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -232,13 +233,12 @@ class BaseModelSaveOption(TypedDict):
     _legacy_save: NotRequired[bool]
     function_type: NotRequired[Literal["FUNCTION", "TABLE_FUNCTION"]]
     method_options: NotRequired[Dict[str, ModelMethodSaveOptions]]
-    include_pip_dependencies: NotRequired[bool]
+    enable_explainability: NotRequired[bool]
 class CatBoostModelSaveOptions(BaseModelSaveOption):
     target_methods: NotRequired[Sequence[str]]
     cuda_version: NotRequired[str]
-    enable_explainability: NotRequired[bool]
 class CustomModelSaveOption(BaseModelSaveOption):
@@ -252,12 +252,10 @@ class SKLModelSaveOptions(BaseModelSaveOption):
 class XGBModelSaveOptions(BaseModelSaveOption):
     target_methods: NotRequired[Sequence[str]]
     cuda_version: NotRequired[str]
-    enable_explainability: NotRequired[bool]
 class LGBMModelSaveOptions(BaseModelSaveOption):
     target_methods: NotRequired[Sequence[str]]
-    enable_explainability: NotRequired[bool]
 class SNOWModelSaveOptions(BaseModelSaveOption):
@@ -433,3 +431,11 @@ class Deployment(TypedDict):
     signature: core.ModelSignature
     options: Required[DeployOptions]
     details: NotRequired[DeployDetails]
+class ModelObjective(Enum):
+    UNKNOWN = "unknown"
+    BINARY_CLASSIFICATION = "binary_classification"
+    MULTI_CLASSIFICATION = "multi_classification"
+    REGRESSION = "regression"
+    RANKING = "ranking"

snowflake/ml/modeling/_internal/constants.py CHANGED Viewed

	@@ -1 +1,2 @@
1 1	IN_ML_RUNTIME_ENV_VAR = "IN_SPCS_ML_RUNTIME"
2	+ USE_OPTIMIZED_DATA_INGESTOR = "USE_OPTIMIZED_DATA_INGESTOR"

snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py CHANGED Viewed

@@ -166,10 +166,10 @@ class PandasTransformHandlers:
             SnowflakeMLException: The input column list does not have one of `X` and `X_test`.
         """
         assert hasattr(self.estimator, "score")  # make type checker happy
-        argspec = inspect.getfullargspec(self.estimator.score)
-        if "X" in argspec.args:
+        params = inspect.signature(self.estimator.score).parameters
+        if "X" in params:
             score_args = {"X": self.dataset[input_cols]}
-        elif "X_test" in argspec.args:
+        elif "X_test" in params:
             score_args = {"X_test": self.dataset[input_cols]}
         else:
             raise exceptions.SnowflakeMLException(
@@ -178,10 +178,10 @@ class PandasTransformHandlers:
             )
         if len(label_cols) > 0:
-            label_arg_name = "Y" if "Y" in argspec.args else "y"
+            label_arg_name = "Y" if "Y" in params else "y"
             score_args[label_arg_name] = self.dataset[label_cols].squeeze()
-        if sample_weight_col is not None and "sample_weight" in argspec.args:
+        if sample_weight_col is not None and "sample_weight" in params:
             score_args["sample_weight"] = self.dataset[sample_weight_col].squeeze()
         score = self.estimator.score(**score_args)

snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py CHANGED Viewed

@@ -43,14 +43,14 @@ class PandasModelTrainer:
             Trained model
         """
         assert hasattr(self.estimator, "fit")  # Keep mypy happy
-        argspec = inspect.getfullargspec(self.estimator.fit)
+        params = inspect.signature(self.estimator.fit).parameters
         args = {"X": self.dataset[self.input_cols]}
         if self.label_cols:
-            label_arg_name = "Y" if "Y" in argspec.args else "y"
+            label_arg_name = "Y" if "Y" in params else "y"
             args[label_arg_name] = self.dataset[self.label_cols].squeeze()
-        if self.sample_weight_col is not None and "sample_weight" in argspec.args:
+        if self.sample_weight_col is not None and "sample_weight" in params:
             args["sample_weight"] = self.dataset[self.sample_weight_col].squeeze()
         return self.estimator.fit(**args)
@@ -59,6 +59,7 @@ class PandasModelTrainer:
         self,
         expected_output_cols_list: List[str],
         drop_input_cols: Optional[bool] = False,
+        example_output_pd_df: Optional[pd.DataFrame] = None,
     ) -> Tuple[pd.DataFrame, object]:
         """Trains the model using specified features and target columns from the dataset.
         This API is different from fit itself because it would also provide the predict
@@ -69,6 +70,8 @@ class PandasModelTrainer:
                 name as a list. Defaults to None.
             drop_input_cols (Optional[bool]): Boolean to determine whether to
                 drop the input columns from the output dataset.
+            example_output_pd_df (Optional[pd.DataFrame]): Example output dataframe
+                This is not used in PandasModelTrainer. It is used in SnowparkModelTrainer.
         Returns:
             Tuple[pd.DataFrame, object]: [predicted dataset, estimator]
@@ -108,13 +111,13 @@ class PandasModelTrainer:
         assert hasattr(self.estimator, "fit")  # make type checker happy
         assert hasattr(self.estimator, "fit_transform")  # make type checker happy
-        argspec = inspect.getfullargspec(self.estimator.fit)
+        params = inspect.signature(self.estimator.fit).parameters
         args = {"X": self.dataset[self.input_cols]}
         if self.label_cols:
-            label_arg_name = "Y" if "Y" in argspec.args else "y"
+            label_arg_name = "Y" if "Y" in params else "y"
             args[label_arg_name] = self.dataset[self.label_cols].squeeze()
-        if self.sample_weight_col is not None and "sample_weight" in argspec.args:
+        if self.sample_weight_col is not None and "sample_weight" in params:
             args["sample_weight"] = self.dataset[self.sample_weight_col].squeeze()
         inference_res = self.estimator.fit_transform(**args)

snowflake/ml/modeling/_internal/model_specifications.py CHANGED Viewed

@@ -53,11 +53,13 @@ class SKLearnModelSpecifications(ModelSpecifications):
 class XGBoostModelSpecifications(ModelSpecifications):
     def __init__(self) -> None:
+        import sklearn
         import xgboost
         imports: List[str] = ["xgboost"]
         pkgDependencies: List[str] = [
             f"numpy=={np.__version__}",
+            f"scikit-learn=={sklearn.__version__}",
             f"xgboost=={xgboost.__version__}",
             f"cloudpickle=={cp.__version__}",
         ]

snowflake/ml/modeling/_internal/model_trainer.py CHANGED Viewed

@@ -20,6 +20,7 @@ class ModelTrainer(Protocol):
         self,
         expected_output_cols_list: List[str],
         drop_input_cols: Optional[bool] = False,
+        example_output_pd_df: Optional[pd.DataFrame] = None,
     ) -> Tuple[Union[DataFrame, pd.DataFrame], object]:
         raise NotImplementedError

snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py CHANGED Viewed

@@ -495,7 +495,7 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
                     label_arg_name = "Y" if "Y" in argspec.args else "y"
                     args[label_arg_name] = df[label_cols].squeeze()
-                if sample_weight_col is not None and "sample_weight" in argspec.args:
+                if sample_weight_col is not None:
                     args["sample_weight"] = df[sample_weight_col].squeeze()
                 return args, estimator, indices, len(df), params_to_evaluate
@@ -1061,7 +1061,7 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
                         if label_cols:
                             label_arg_name = "Y" if "Y" in argspec.args else "y"
                             args[label_arg_name] = y
-                        if sample_weight_col is not None and "sample_weight" in argspec.args:
+                        if sample_weight_col is not None:
                             args["sample_weight"] = df[sample_weight_col].squeeze()
                         # estimator.refit = original_refit
                         refit_start_time = time.time()

snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py CHANGED Viewed

@@ -318,19 +318,19 @@ class SnowparkTransformHandlers:
             with open(local_score_file_name_path, mode="r+b") as local_score_file_obj:
                 estimator = cp.load(local_score_file_obj)
-            argspec = inspect.getfullargspec(estimator.score)
-            if "X" in argspec.args:
+            params = inspect.signature(estimator.score).parameters
+            if "X" in params:
                 args = {"X": df[input_cols]}
-            elif "X_test" in argspec.args:
+            elif "X_test" in params:
                 args = {"X_test": df[input_cols]}
             else:
                 raise RuntimeError("Neither 'X' or 'X_test' exist in argument")
             if label_cols:
-                label_arg_name = "Y" if "Y" in argspec.args else "y"
+                label_arg_name = "Y" if "Y" in params else "y"
                 args[label_arg_name] = df[label_cols].squeeze()
-            if sample_weight_col is not None and "sample_weight" in argspec.args:
+            if sample_weight_col is not None and "sample_weight" in params:
                 args["sample_weight"] = df[sample_weight_col].squeeze()
             result: float = estimator.score(**args)

snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py CHANGED Viewed

@@ -35,6 +35,7 @@ cp.register_pickle_by_value(inspect.getmodule(handle_inference_result))
 _PROJECT = "ModelDevelopment"
 _ENABLE_ANONYMOUS_SPROC = False
+_ENABLE_TRACER = True
 class SnowparkModelTrainer:
@@ -119,6 +120,8 @@ class SnowparkModelTrainer:
             A callable that can be registered as a stored procedure.
         """
         imports = model_spec.imports  # In order for the sproc to not resolve this reference in snowflake.ml
+        method_name = "fit"
+        tracer_name = f"snowpark.ml.modeling.{self._class_name.lower()}.{method_name}"
         def fit_wrapper_function(
             session: Session,
@@ -138,110 +141,98 @@ class SnowparkModelTrainer:
             for import_name in imports:
                 importlib.import_module(import_name)
-            # Execute snowpark queries and obtain the results as pandas dataframe
-            # NB: this implies that the result data must fit into memory.
-            for query in sql_queries[:-1]:
-                _ = session.sql(query).collect(statement_params=statement_params)
-            sp_df = session.sql(sql_queries[-1])
-            df: pd.DataFrame = sp_df.to_pandas(statement_params=statement_params)
-            df.columns = sp_df.columns
+            def fit_and_return_estimator() -> str:
+                """This is a helper function within the sproc to download the data, fit the model, and upload the model.
+                Returns:
+                    The name of the file in session's temp stage (temp_stage_name) that contains the serialized model.
+                """
+                # Execute snowpark queries and obtain the results as pandas dataframe
+                # NB: this implies that the result data must fit into memory.
+                for query in sql_queries[:-1]:
+                    _ = session.sql(query).collect(statement_params=statement_params)
+                sp_df = session.sql(sql_queries[-1])
+                df: pd.DataFrame = sp_df.to_pandas(statement_params=statement_params)
+                df.columns = sp_df.columns
+                local_transform_file_name = temp_file_utils.get_temp_file_path()
+                session.file.get(
+                    stage_location=temp_stage_name,
+                    target_directory=local_transform_file_name,
+                    statement_params=statement_params,
+                )
-            local_transform_file_name = temp_file_utils.get_temp_file_path()
+                local_transform_file_path = os.path.join(
+                    local_transform_file_name, os.listdir(local_transform_file_name)[0]
+                )
+                with open(local_transform_file_path, mode="r+b") as local_transform_file_obj:
+                    estimator = cp.load(local_transform_file_obj)
-            session.file.get(
-                stage_location=temp_stage_name,
-                target_directory=local_transform_file_name,
-                statement_params=statement_params,
-            )
+                params = inspect.signature(estimator.fit).parameters
+                args = {"X": df[input_cols]}
+                if label_cols:
+                    label_arg_name = "Y" if "Y" in params else "y"
+                    args[label_arg_name] = df[label_cols].squeeze()
-            local_transform_file_path = os.path.join(
-                local_transform_file_name, os.listdir(local_transform_file_name)[0]
-            )
-            with open(local_transform_file_path, mode="r+b") as local_transform_file_obj:
-                estimator = cp.load(local_transform_file_obj)
+                if sample_weight_col is not None and "sample_weight" in params:
+                    args["sample_weight"] = df[sample_weight_col].squeeze()
-            argspec = inspect.getfullargspec(estimator.fit)
-            args = {"X": df[input_cols]}
-            if label_cols:
-                label_arg_name = "Y" if "Y" in argspec.args else "y"
-                args[label_arg_name] = df[label_cols].squeeze()
+                estimator.fit(**args)
-            if sample_weight_col is not None and "sample_weight" in argspec.args:
-                args["sample_weight"] = df[sample_weight_col].squeeze()
+                local_result_file_name = temp_file_utils.get_temp_file_path()
-            estimator.fit(**args)
+                with open(local_result_file_name, mode="w+b") as local_result_file_obj:
+                    cp.dump(estimator, local_result_file_obj)
-            local_result_file_name = temp_file_utils.get_temp_file_path()
+                session.file.put(
+                    local_file_name=local_result_file_name,
+                    stage_location=temp_stage_name,
+                    auto_compress=False,
+                    overwrite=True,
+                    statement_params=statement_params,
+                )
+                return local_result_file_name
-            with open(local_result_file_name, mode="w+b") as local_result_file_obj:
-                cp.dump(estimator, local_result_file_obj)
+            if _ENABLE_TRACER:
-            session.file.put(
-                local_file_name=local_result_file_name,
-                stage_location=temp_stage_name,
-                auto_compress=False,
-                overwrite=True,
-                statement_params=statement_params,
-            )
+                # Use opentelemetry to trace the dist and span of the fit operation.
+                # This would allow user to see the trace in the Snowflake UI.
+                from opentelemetry import trace
-            # Note: you can add something like  + "|" + str(df) to the return string
-            # to pass debug information to the caller.
-            return str(os.path.basename(local_result_file_name))
+                tracer = trace.get_tracer(tracer_name)
+                with tracer.start_as_current_span("fit"):
+                    local_result_file_name = fit_and_return_estimator()
+                    # Note: you can add something like  + "|" + str(df) to the return string
+                    # to pass debug information to the caller.
+                    return str(os.path.basename(local_result_file_name))
+            else:
+                local_result_file_name = fit_and_return_estimator()
+                return str(os.path.basename(local_result_file_name))
         return fit_wrapper_function
-    def _get_fit_wrapper_sproc_anonymous(self, statement_params: Dict[str, str]) -> StoredProcedure:
+    def _get_fit_wrapper_sproc(self, statement_params: Dict[str, str], anonymous: bool) -> StoredProcedure:
         model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
-        fit_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
-        relaxed_dependencies = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-            pkg_versions=model_spec.pkgDependencies, session=self.session
-        )
-        fit_wrapper_sproc = self.session.sproc.register(
-            func=self._build_fit_wrapper_sproc(model_spec=model_spec),
-            is_permanent=False,
-            name=fit_sproc_name,
-            packages=["snowflake-snowpark-python"] + relaxed_dependencies,  # type: ignore[arg-type]
-            replace=True,
-            session=self.session,
-            statement_params=statement_params,
-            anonymous=True,
-            execute_as="caller",
-        )
-        return fit_wrapper_sproc
-    def _get_fit_wrapper_sproc(self, statement_params: Dict[str, str]) -> StoredProcedure:
-        # If the sproc already exists, don't register.
-        if not hasattr(self.session, "_FIT_WRAPPER_SPROCS"):
-            self.session._FIT_WRAPPER_SPROCS: Dict[str, StoredProcedure] = {}  # type: ignore[attr-defined, misc]
-        model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
-        fit_sproc_key = model_spec.__class__.__name__
-        if fit_sproc_key in self.session._FIT_WRAPPER_SPROCS:  # type: ignore[attr-defined]
-            fit_sproc: StoredProcedure = self.session._FIT_WRAPPER_SPROCS[fit_sproc_key]  # type: ignore[attr-defined]
-            return fit_sproc
         fit_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
         relaxed_dependencies = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
             pkg_versions=model_spec.pkgDependencies, session=self.session
         )
+        packages = ["snowflake-snowpark-python", "snowflake-telemetry-python"] + relaxed_dependencies
         fit_wrapper_sproc = self.session.sproc.register(
             func=self._build_fit_wrapper_sproc(model_spec=model_spec),
             is_permanent=False,
             name=fit_sproc_name,
-            packages=["snowflake-snowpark-python"] + relaxed_dependencies,  # type: ignore[arg-type]
+            packages=packages,  # type: ignore[arg-type]
             replace=True,
             session=self.session,
             statement_params=statement_params,
             execute_as="caller",
+            anonymous=anonymous,
         )
-        self.session._FIT_WRAPPER_SPROCS[fit_sproc_key] = fit_wrapper_sproc  # type: ignore[attr-defined]
         return fit_wrapper_sproc
     def _build_fit_predict_wrapper_sproc(
@@ -333,7 +324,9 @@ class SnowparkModelTrainer:
             # write into a temp table in sproc and load the table from outside
             session.write_pandas(
-                fit_predict_result_pd, fit_predict_result_name, auto_create_table=True, table_type="temp"
+                fit_predict_result_pd,
+                fit_predict_result_name,
+                overwrite=True,
             )
             # Note: you can add something like  + "|" + str(df) to the return string
@@ -414,13 +407,13 @@ class SnowparkModelTrainer:
             with open(local_transform_file_path, mode="r+b") as local_transform_file_obj:
                 estimator = cp.load(local_transform_file_obj)
-            argspec = inspect.getfullargspec(estimator.fit)
+            params = inspect.signature(estimator.fit).parameters
             args = {"X": df[input_cols]}
             if label_cols:
-                label_arg_name = "Y" if "Y" in argspec.args else "y"
+                label_arg_name = "Y" if "Y" in params else "y"
                 args[label_arg_name] = df[label_cols].squeeze()
-            if sample_weight_col is not None and "sample_weight" in argspec.args:
+            if sample_weight_col is not None and "sample_weight" in params:
                 args["sample_weight"] = df[sample_weight_col].squeeze()
             fit_transform_result = estimator.fit_transform(**args)
@@ -477,7 +470,7 @@ class SnowparkModelTrainer:
         return fit_transform_wrapper_function
-    def _get_fit_predict_wrapper_sproc_anonymous(self, statement_params: Dict[str, str]) -> StoredProcedure:
+    def _get_fit_predict_wrapper_sproc(self, statement_params: Dict[str, str], anonymous: bool) -> StoredProcedure:
         model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
         fit_predict_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
@@ -494,82 +487,14 @@ class SnowparkModelTrainer:
             replace=True,
             session=self.session,
             statement_params=statement_params,
-            anonymous=True,
+            anonymous=anonymous,
             execute_as="caller",
         )
         return fit_predict_wrapper_sproc
-    def _get_fit_predict_wrapper_sproc(self, statement_params: Dict[str, str]) -> StoredProcedure:
-        # If the sproc already exists, don't register.
-        if not hasattr(self.session, "_FIT_WRAPPER_SPROCS"):
-            self.session._FIT_WRAPPER_SPROCS: Dict[str, StoredProcedure] = {}  # type: ignore[attr-defined, misc]
-        model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
-        fit_predict_sproc_key = model_spec.__class__.__name__ + "_fit_predict"
-        if fit_predict_sproc_key in self.session._FIT_WRAPPER_SPROCS:  # type: ignore[attr-defined]
-            fit_sproc: StoredProcedure = self.session._FIT_WRAPPER_SPROCS[  # type: ignore[attr-defined]
-                fit_predict_sproc_key
-            ]
-            return fit_sproc
-        fit_predict_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
-        relaxed_dependencies = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-            pkg_versions=model_spec.pkgDependencies, session=self.session
-        )
-        fit_predict_wrapper_sproc = self.session.sproc.register(
-            func=self._build_fit_predict_wrapper_sproc(model_spec=model_spec),
-            is_permanent=False,
-            name=fit_predict_sproc_name,
-            packages=["snowflake-snowpark-python"] + relaxed_dependencies,  # type: ignore[arg-type]
-            replace=True,
-            session=self.session,
-            statement_params=statement_params,
-            execute_as="caller",
-        )
-        self.session._FIT_WRAPPER_SPROCS[  # type: ignore[attr-defined]
-            fit_predict_sproc_key
-        ] = fit_predict_wrapper_sproc
-        return fit_predict_wrapper_sproc
-    def _get_fit_transform_wrapper_sproc_anonymous(self, statement_params: Dict[str, str]) -> StoredProcedure:
-        model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
-        fit_transform_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
-        relaxed_dependencies = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-            pkg_versions=model_spec.pkgDependencies, session=self.session
-        )
-        fit_transform_wrapper_sproc = self.session.sproc.register(
-            func=self._build_fit_transform_wrapper_sproc(model_spec=model_spec),
-            is_permanent=False,
-            name=fit_transform_sproc_name,
-            packages=["snowflake-snowpark-python"] + relaxed_dependencies,  # type: ignore[arg-type]
-            replace=True,
-            session=self.session,
-            statement_params=statement_params,
-            anonymous=True,
-            execute_as="caller",
-        )
-        return fit_transform_wrapper_sproc
-    def _get_fit_transform_wrapper_sproc(self, statement_params: Dict[str, str]) -> StoredProcedure:
-        # If the sproc already exists, don't register.
-        if not hasattr(self.session, "_FIT_WRAPPER_SPROCS"):
-            self.session._FIT_WRAPPER_SPROCS: Dict[str, StoredProcedure] = {}  # type: ignore[attr-defined, misc]
+    def _get_fit_transform_wrapper_sproc(self, statement_params: Dict[str, str], anonymous: bool) -> StoredProcedure:
         model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
-        fit_transform_sproc_key = model_spec.__class__.__name__ + "_fit_transform"
-        if fit_transform_sproc_key in self.session._FIT_WRAPPER_SPROCS:  # type: ignore[attr-defined]
-            fit_sproc: StoredProcedure = self.session._FIT_WRAPPER_SPROCS[  # type: ignore[attr-defined]
-                fit_transform_sproc_key
-            ]
-            return fit_sproc
         fit_transform_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
@@ -586,12 +511,9 @@ class SnowparkModelTrainer:
             session=self.session,
             statement_params=statement_params,
             execute_as="caller",
+            anonymous=anonymous,
         )
-        self.session._FIT_WRAPPER_SPROCS[  # type: ignore[attr-defined]
-            fit_transform_sproc_key
-        ] = fit_transform_wrapper_sproc
         return fit_transform_wrapper_sproc
     def train(self) -> object:
@@ -629,9 +551,9 @@ class SnowparkModelTrainer:
         # Call fit sproc
         if _ENABLE_ANONYMOUS_SPROC:
-            fit_wrapper_sproc = self._get_fit_wrapper_sproc_anonymous(statement_params=statement_params)
+            fit_wrapper_sproc = self._get_fit_wrapper_sproc(statement_params=statement_params, anonymous=True)
         else:
-            fit_wrapper_sproc = self._get_fit_wrapper_sproc(statement_params=statement_params)
+            fit_wrapper_sproc = self._get_fit_wrapper_sproc(statement_params=statement_params, anonymous=False)
         try:
             sproc_export_file_name: str = fit_wrapper_sproc(
@@ -665,6 +587,7 @@ class SnowparkModelTrainer:
         self,
         expected_output_cols_list: List[str],
         drop_input_cols: Optional[bool] = False,
+        example_output_pd_df: Optional[pd.DataFrame] = None,
     ) -> Tuple[Union[DataFrame, pd.DataFrame], object]:
         """Trains the model by pushing down the compute into Snowflake using stored procedures.
         This API is different from fit itself because it would also provide the predict
@@ -675,6 +598,11 @@ class SnowparkModelTrainer:
                 name as a list. Defaults to None.
             drop_input_cols (Optional[bool]): Boolean to determine drop
                 the input columns from the output dataset or not
+            example_output_pd_df (Optional[pd.DataFrame]): Example output dataframe
+                This is to create a temp table in the client side with df_one_row. This can maintain the same column
+                name and data type as the output dataframe. Within the sproc, we don't need to create another temp table
+                again - instead, we overwrite into this table without changing the schema.
+                This is not used in PandasModelTrainer.
         Returns:
             Tuple[Union[DataFrame, pd.DataFrame], object]: [predicted dataset, estimator]
@@ -702,12 +630,35 @@ class SnowparkModelTrainer:
         # Call fit sproc
         if _ENABLE_ANONYMOUS_SPROC:
-            fit_predict_wrapper_sproc = self._get_fit_predict_wrapper_sproc_anonymous(statement_params=statement_params)
+            fit_predict_wrapper_sproc = self._get_fit_predict_wrapper_sproc(
+                statement_params=statement_params, anonymous=True
+            )
         else:
-            fit_predict_wrapper_sproc = self._get_fit_predict_wrapper_sproc(statement_params=statement_params)
+            fit_predict_wrapper_sproc = self._get_fit_predict_wrapper_sproc(
+                statement_params=statement_params, anonymous=False
+            )
         fit_predict_result_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.TABLE)
+        # Create a temp table in advance to store the output
+        # This would allow us to use the same table outside the stored procedure
+        if not drop_input_cols:
+            assert example_output_pd_df is not None
+            remove_dataset_col_name_exist_in_output_col = list(set(dataset.columns) - set(example_output_pd_df.columns))
+            pd_df_one_row = (
+                dataset.select(remove_dataset_col_name_exist_in_output_col)
+                .limit(1)
+                .to_pandas(statement_params=statement_params)
+            )
+            example_output_pd_df = pd.concat([pd_df_one_row, example_output_pd_df], axis=1)
+        self.session.write_pandas(
+            example_output_pd_df,
+            fit_predict_result_name,
+            auto_create_table=True,
+            table_type="temp",
+        )
         sproc_export_file_name: str = fit_predict_wrapper_sproc(
             self.session,
             queries,
@@ -769,11 +720,13 @@ class SnowparkModelTrainer:
         # Call fit sproc
         if _ENABLE_ANONYMOUS_SPROC:
-            fit_transform_wrapper_sproc = self._get_fit_transform_wrapper_sproc_anonymous(
-                statement_params=statement_params
+            fit_transform_wrapper_sproc = self._get_fit_transform_wrapper_sproc(
+                statement_params=statement_params, anonymous=True
             )
         else:
-            fit_transform_wrapper_sproc = self._get_fit_transform_wrapper_sproc(statement_params=statement_params)
+            fit_transform_wrapper_sproc = self._get_fit_transform_wrapper_sproc(
+                statement_params=statement_params, anonymous=False
+            )
         fit_transform_result_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.TABLE)

snowflake-ml-python 1.6.0__py3-none-any.whl → 1.6.2__py3-none-any.whl

snowflake-ml-python 1.6.0py3-none-any.whl → 1.6.2py3-none-any.whl