PyPI - snowflake-ml-python - Versions diffs - 1.6.0__py3-none-any.whl → 1.6.2__py3-none-any.whl - Mend

snowflake-ml-python 1.6.0py3-none-any.whl → 1.6.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (252) hide show

snowflake/ml/modeling/xgboost/xgbrf_classifier.py CHANGED Viewed

@@ -4,18 +4,17 @@
 #
 import inspect
 import os
-import posixpath
-from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
-from typing_extensions import TypeGuard
+from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
 from uuid import uuid4
 import cloudpickle as cp
-import pandas as pd
 import numpy as np
+import pandas as pd
 from numpy import typing as npt
 import numpy
+import sklearn
 import xgboost
 from sklearn.utils.metaestimators import available_if
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
 from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
-from snowflake.ml._internal.utils import pkg_version_utils, identifier
+from snowflake.ml._internal.utils import identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.transformer_protocols import (
-    ModelTransformHandlers,
     BatchInferenceKwargsTypedDict,
     ScoreKwargsTypedDict
 )
@@ -363,7 +361,7 @@ class XGBRFClassifier(BaseTransformer):
         self.set_sample_weight_col(sample_weight_col)
         self._use_external_memory_version = use_external_memory_version
         self._batch_size = batch_size
-        deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
+        deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -699,12 +697,23 @@ class XGBRFClassifier(BaseTransformer):
             autogenerated=self._autogenerated,
             subproject=_SUBPROJECT,
         )
-        output_result, fitted_estimator = model_trainer.train_fit_predict(
-            drop_input_cols=self._drop_input_cols,
-            expected_output_cols_list=(
-                self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
-            ),
+        expected_output_cols = (
+            self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
         )
+        if isinstance(dataset, DataFrame):
+            expected_output_cols, example_output_pd_df = self._align_expected_output(
+                "fit_predict", dataset, expected_output_cols, output_cols_prefix
+            )
+            output_result, fitted_estimator = model_trainer.train_fit_predict(
+                drop_input_cols=self._drop_input_cols,
+                expected_output_cols_list=expected_output_cols,
+                example_output_pd_df=example_output_pd_df,
+            )
+        else:
+            output_result, fitted_estimator = model_trainer.train_fit_predict(
+                drop_input_cols=self._drop_input_cols,
+                expected_output_cols_list=expected_output_cols,
+            )
         self._sklearn_object = fitted_estimator
         self._is_fitted = True
         return output_result
@@ -783,12 +792,41 @@ class XGBRFClassifier(BaseTransformer):
         return rv
-    def _align_expected_output_names(
-        self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
-    ) -> List[str]:
+    def _align_expected_output(
+        self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
+    ) -> Tuple[List[str], pd.DataFrame]:
+        """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
+        and output dataframe with 1 line.
+        If the method is fit_predict, run 2 lines of data.
+        """
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
+        # so change the minimum of number of rows to 2
+        num_examples = 2
+        statement_params = telemetry.get_function_usage_statement_params(
+            project=_PROJECT,
+            subproject=_SUBPROJECT,
+            function_name=telemetry.get_statement_params_full_func_name(
+                inspect.currentframe(), XGBRFClassifier.__class__.__name__
+            ),
+            api_calls=[Session.call],
+            custom_tags={"autogen": True} if self._autogenerated else None,
+        )
+        if output_cols_prefix == "fit_predict_":
+            if hasattr(self._sklearn_object, "n_clusters"):
+                # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
+                num_examples = self._sklearn_object.n_clusters
+            elif hasattr(self._sklearn_object, "min_samples"):
+                # OPTICS default min_samples 5, which requires at least 5 lines of data
+                num_examples = self._sklearn_object.min_samples
+            elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
+                # LocalOutlierFactor expects n_neighbors <= n_samples
+                num_examples = self._sklearn_object.n_neighbors
+            sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
+        else:
+            sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
         # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
         # seen during the fit.
@@ -800,12 +838,14 @@ class XGBRFClassifier(BaseTransformer):
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:
             output_df_columns_set -= set(self.sample_weight_col)
         # if the dimension of inferred output column names is correct; use it
         if len(expected_output_cols_list) == len(output_df_columns_set):
-            return expected_output_cols_list
+            return expected_output_cols_list, output_df_pd
         # otherwise, use the sklearn estimator's output
         else:
-            return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
+            expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
+            return expected_output_cols_list, output_df_pd[expected_output_cols_list]
     @available_if(original_estimator_has_callable("predict_proba"))  # type: ignore[misc]
     @telemetry.send_api_usage_telemetry(
@@ -853,7 +893,7 @@ class XGBRFClassifier(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -921,7 +961,7 @@ class XGBRFClassifier(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
         elif isinstance(dataset, pd.DataFrame):
@@ -984,7 +1024,7 @@ class XGBRFClassifier(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -1049,7 +1089,7 @@ class XGBRFClassifier(BaseTransformer):
                 drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -1114,7 +1154,7 @@ class XGBRFClassifier(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                score_sproc_imports=['xgboost'],
+                score_sproc_imports=['xgboost', 'sklearn'],
             )
         elif isinstance(dataset, pd.DataFrame):
             # pandas_handler.score() does not require any extra kwargs.

snowflake/ml/modeling/xgboost/xgbrf_regressor.py CHANGED Viewed

@@ -4,18 +4,17 @@
 #
 import inspect
 import os
-import posixpath
-from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
-from typing_extensions import TypeGuard
+from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
 from uuid import uuid4
 import cloudpickle as cp
-import pandas as pd
 import numpy as np
+import pandas as pd
 from numpy import typing as npt
 import numpy
+import sklearn
 import xgboost
 from sklearn.utils.metaestimators import available_if
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
 from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
-from snowflake.ml._internal.utils import pkg_version_utils, identifier
+from snowflake.ml._internal.utils import identifier
 from snowflake.snowpark import DataFrame, Session
 from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
 from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.transformer_protocols import (
-    ModelTransformHandlers,
     BatchInferenceKwargsTypedDict,
     ScoreKwargsTypedDict
 )
@@ -363,7 +361,7 @@ class XGBRFRegressor(BaseTransformer):
         self.set_sample_weight_col(sample_weight_col)
         self._use_external_memory_version = use_external_memory_version
         self._batch_size = batch_size
-        deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
+        deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
         self._deps = list(deps)
@@ -699,12 +697,23 @@ class XGBRFRegressor(BaseTransformer):
             autogenerated=self._autogenerated,
             subproject=_SUBPROJECT,
         )
-        output_result, fitted_estimator = model_trainer.train_fit_predict(
-            drop_input_cols=self._drop_input_cols,
-            expected_output_cols_list=(
-                self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
-            ),
+        expected_output_cols = (
+            self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
         )
+        if isinstance(dataset, DataFrame):
+            expected_output_cols, example_output_pd_df = self._align_expected_output(
+                "fit_predict", dataset, expected_output_cols, output_cols_prefix
+            )
+            output_result, fitted_estimator = model_trainer.train_fit_predict(
+                drop_input_cols=self._drop_input_cols,
+                expected_output_cols_list=expected_output_cols,
+                example_output_pd_df=example_output_pd_df,
+            )
+        else:
+            output_result, fitted_estimator = model_trainer.train_fit_predict(
+                drop_input_cols=self._drop_input_cols,
+                expected_output_cols_list=expected_output_cols,
+            )
         self._sklearn_object = fitted_estimator
         self._is_fitted = True
         return output_result
@@ -783,12 +792,41 @@ class XGBRFRegressor(BaseTransformer):
         return rv
-    def _align_expected_output_names(
-        self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
-    ) -> List[str]:
+    def _align_expected_output(
+        self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
+    ) -> Tuple[List[str], pd.DataFrame]:
+        """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
+        and output dataframe with 1 line.
+        If the method is fit_predict, run 2 lines of data.
+        """
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
+        # so change the minimum of number of rows to 2
+        num_examples = 2
+        statement_params = telemetry.get_function_usage_statement_params(
+            project=_PROJECT,
+            subproject=_SUBPROJECT,
+            function_name=telemetry.get_statement_params_full_func_name(
+                inspect.currentframe(), XGBRFRegressor.__class__.__name__
+            ),
+            api_calls=[Session.call],
+            custom_tags={"autogen": True} if self._autogenerated else None,
+        )
+        if output_cols_prefix == "fit_predict_":
+            if hasattr(self._sklearn_object, "n_clusters"):
+                # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
+                num_examples = self._sklearn_object.n_clusters
+            elif hasattr(self._sklearn_object, "min_samples"):
+                # OPTICS default min_samples 5, which requires at least 5 lines of data
+                num_examples = self._sklearn_object.min_samples
+            elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
+                # LocalOutlierFactor expects n_neighbors <= n_samples
+                num_examples = self._sklearn_object.n_neighbors
+            sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
+        else:
+            sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
         # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
         # seen during the fit.
@@ -800,12 +838,14 @@ class XGBRFRegressor(BaseTransformer):
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:
             output_df_columns_set -= set(self.sample_weight_col)
         # if the dimension of inferred output column names is correct; use it
         if len(expected_output_cols_list) == len(output_df_columns_set):
-            return expected_output_cols_list
+            return expected_output_cols_list, output_df_pd
         # otherwise, use the sklearn estimator's output
         else:
-            return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
+            expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
+            return expected_output_cols_list, output_df_pd[expected_output_cols_list]
     @available_if(original_estimator_has_callable("predict_proba"))  # type: ignore[misc]
     @telemetry.send_api_usage_telemetry(
@@ -851,7 +891,7 @@ class XGBRFRegressor(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -917,7 +957,7 @@ class XGBRFRegressor(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
         elif isinstance(dataset, pd.DataFrame):
@@ -980,7 +1020,7 @@ class XGBRFRegressor(BaseTransformer):
                 drop_input_cols=self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -1045,7 +1085,7 @@ class XGBRFRegressor(BaseTransformer):
                 drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
-            expected_output_cols = self._align_expected_output_names(
+            expected_output_cols, _ = self._align_expected_output(
                 inference_method, dataset, expected_output_cols, output_cols_prefix
             )
@@ -1110,7 +1150,7 @@ class XGBRFRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                score_sproc_imports=['xgboost'],
+                score_sproc_imports=['xgboost', 'sklearn'],
             )
         elif isinstance(dataset, pd.DataFrame):
             # pandas_handler.score() does not require any extra kwargs.

snowflake/ml/registry/_manager/model_manager.py CHANGED Viewed

@@ -9,7 +9,7 @@ from snowflake.ml._internal.human_readable_id import hrid_generator
 from snowflake.ml._internal.utils import sql_identifier
 from snowflake.ml.model import model_signature, type_hints as model_types
 from snowflake.ml.model._client.model import model_impl, model_version_impl
-from snowflake.ml.model._client.ops import metadata_ops, model_ops
+from snowflake.ml.model._client.ops import metadata_ops, model_ops, service_ops
 from snowflake.ml.model._model_composer import model_composer
 from snowflake.ml.model._packager.model_meta import model_meta
 from snowflake.snowpark import session
@@ -30,6 +30,9 @@ class ModelManager:
         self._model_ops = model_ops.ModelOperator(
             session, database_name=self._database_name, schema_name=self._schema_name
         )
+        self._service_ops = service_ops.ServiceOperator(
+            session, database_name=self._database_name, schema_name=self._schema_name
+        )
         self._hrid_generator = hrid_generator.HRID16()
     def log_model(
@@ -47,6 +50,7 @@ class ModelManager:
         sample_input_data: Optional[model_types.SupportedDataType] = None,
         code_paths: Optional[List[str]] = None,
         ext_modules: Optional[List[ModuleType]] = None,
+        model_objective: model_types.ModelObjective = model_types.ModelObjective.UNKNOWN,
         options: Optional[model_types.ModelSaveOption] = None,
         statement_params: Optional[Dict[str, Any]] = None,
     ) -> model_version_impl.ModelVersion:
@@ -86,6 +90,7 @@ class ModelManager:
             sample_input_data=sample_input_data,
             code_paths=code_paths,
             ext_modules=ext_modules,
+            model_objective=model_objective,
             options=options,
             statement_params=statement_params,
         )
@@ -105,6 +110,7 @@ class ModelManager:
         sample_input_data: Optional[model_types.SupportedDataType] = None,
         code_paths: Optional[List[str]] = None,
         ext_modules: Optional[List[ModuleType]] = None,
+        model_objective: model_types.ModelObjective = model_types.ModelObjective.UNKNOWN,
         options: Optional[model_types.ModelSaveOption] = None,
         statement_params: Optional[Dict[str, Any]] = None,
     ) -> model_version_impl.ModelVersion:
@@ -153,6 +159,7 @@ class ModelManager:
             code_paths=code_paths,
             ext_modules=ext_modules,
             options=options,
+            model_objective=model_objective,
         )
         statement_params = telemetry.add_statement_params_custom_tags(
             statement_params, model_metadata.telemetry_metadata()
@@ -173,11 +180,16 @@ class ModelManager:
         )
         mv = model_version_impl.ModelVersion._ref(
-            model_ops.ModelOperator(
+            model_ops=model_ops.ModelOperator(
                 self._model_ops._session,
                 database_name=database_name_id or self._database_name,
                 schema_name=schema_name_id or self._schema_name,
             ),
+            service_ops=service_ops.ServiceOperator(
+                self._service_ops._session,
+                database_name=database_name_id or self._database_name,
+                schema_name=schema_name_id or self._schema_name,
+            ),
             model_name=model_name_id,
             version_name=version_name_id,
         )
@@ -216,6 +228,11 @@ class ModelManager:
                     database_name=database_name_id or self._database_name,
                     schema_name=schema_name_id or self._schema_name,
                 ),
+                service_ops=service_ops.ServiceOperator(
+                    self._service_ops._session,
+                    database_name=database_name_id or self._database_name,
+                    schema_name=schema_name_id or self._schema_name,
+                ),
                 model_name=model_name_id,
             )
         else:
@@ -234,6 +251,7 @@ class ModelManager:
         return [
             model_impl.Model._ref(
                 self._model_ops,
+                service_ops=self._service_ops,
                 model_name=model_name,
             )
             for model_name in model_names

snowflake/ml/registry/model_registry.py CHANGED Viewed

@@ -576,7 +576,7 @@ fully integrated into the new registry.
         raw_stage_path = uri.get_snowflake_stage_path_from_uri(model_uri)
         if not raw_stage_path:
             return None
-        (db, schema, stage, _) = identifier.parse_schema_level_object_identifier(raw_stage_path)
+        (db, schema, stage, _) = identifier.parse_snowflake_stage_path(raw_stage_path)
         return identifier.get_schema_level_object_identifier(db, schema, stage)
     def _list_selected_models(

snowflake/ml/registry/registry.py CHANGED Viewed

@@ -244,8 +244,7 @@ class Registry:
             warnings.warn(
                 "Models logged specifying `pip_requirements` can not be executed "
                 "in Snowflake Warehouse where all dependencies are required to be retrieved "
-                "from Snowflake Anaconda Channel. Specify model save option `include_pip_dependencies`"
-                "to log model with pip dependencies.",
+                "from Snowflake Anaconda Channel.",
                 category=UserWarning,
                 stacklevel=1,
             )

snowflake/ml/utils/sql_client.py ADDED Viewed

@@ -0,0 +1,22 @@
+from enum import Enum
+from typing import Dict
+class CreationOption(Enum):
+    FAIL_IF_NOT_EXIST = 1
+    CREATE_IF_NOT_EXIST = 2
+    OR_REPLACE = 3
+class CreationMode:
+    def __init__(self, *, if_not_exists: bool = False, or_replace: bool = False) -> None:
+        self.if_not_exists = if_not_exists
+        self.or_replace = or_replace
+    def get_ddl_phrases(self) -> Dict[CreationOption, str]:
+        if_not_exists_sql = " IF NOT EXISTS" if self.if_not_exists else ""
+        or_replace_sql = " OR REPLACE" if self.or_replace else ""
+        return {
+            CreationOption.CREATE_IF_NOT_EXIST: if_not_exists_sql,
+            CreationOption.OR_REPLACE: or_replace_sql,
+        }

snowflake/ml/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION="1.6.0"
1	+ VERSION="1.6.2"

{snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: snowflake-ml-python
-Version: 1.6.0
+Version: 1.6.2
 Summary: The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.
 Author-email: "Snowflake, Inc" <support@snowflake.com>
 License:
@@ -253,7 +253,7 @@ Requires-Dist: snowflake-connector-python[pandas] <4,>=3.5.0
 Requires-Dist: snowflake-snowpark-python <2,>=1.17.0
 Requires-Dist: sqlparse <1,>=0.4
 Requires-Dist: typing-extensions <5,>=4.1.0
-Requires-Dist: xgboost <2,>=1.7.3
+Requires-Dist: xgboost <2.1,>=1.7.3
 Provides-Extra: all
 Requires-Dist: catboost <2,>=1.2.0 ; extra == 'all'
 Requires-Dist: lightgbm <5,>=3.3.5 ; extra == 'all'
@@ -373,7 +373,51 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
 # Release History
-## 1.6.0
+## 1.6.2 (TBD)
+### Bug Fixes
+- Modeling: Support XGBoost version that is larger than 2.
+- Data: Fix multiple epoch iteration over `DataConnector.to_torch_datapipe()` DataPipes.
+- Generic: Fix a bug that when an invalid name is provided to argument where fully qualified name is expected, it will
+  be parsed wrongly. Now it raises an exception correctly.
+- Model Explainability: Handle explanations for multiclass XGBoost classification models
+- Model Explainability: Workarounds and better error handling for XGB>2.1.0 not working with SHAP==0.42.1
+### New Features
+- Data: Add top-level exports for `DataConnector` and `DataSource` to `snowflake.ml.data`.
+- Data: Add native batching support via `batch_size` and `drop_last_batch` arguments to `DataConnector.to_torch_dataset()`
+- Feature Store: update_feature_view() supports taking feature view object as argument.
+### Behavior Changes
+## 1.6.1 (2024-08-12)
+### Bug Fixes
+- Feature Store: Support large metadata blob when generating dataset
+- Feature Store: Added a hidden knob in FeatureView as kargs for setting customized
+  refresh_mode
+- Registry: Fix an error message in Model Version `run` when `function_name` is not mentioned and model has multiple
+  target methods.
+- Cortex inference: snowflake.cortex.Complete now only uses the REST API for streaming and the use_rest_api_experimental
+  is no longer needed.
+- Feature Store: Add a new API: FeatureView.list_columns() which list all column information.
+- Data: Fix `DataFrame` ingestion with `ArrowIngestor`.
+### New Features
+- Enable `set_params` to set the parameters of the underlying sklearn estimator, if the snowflake-ml model has been fit.
+- Data: Add `snowflake.ml.data.ingestor_utils` module with utility functions helpful for `DataIngestor` implementations.
+- Data: Add new `to_torch_dataset()` connector to `DataConnector` to replace deprecated DataPipe.
+- Registry: Option to `enable_explainability` set to True by default for XGBoost, LightGBM and CatBoost as PuPr feature.
+- Registry: Option to `enable_explainability` when registering SHAP supported sklearn models.
+### Behavior Changes
+## 1.6.0 (2024-07-29)
 ### Bug Fixes
@@ -402,6 +446,14 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
   distributed_hpo_trainer.ENABLE_EFFICIENT_MEMORY_USAGE = False
   `
 - Registry: Option to `enable_explainability` when registering LightGBM models as a pre-PuPr feature.
+- Data: Add new `snowflake.ml.data` preview module which contains data reading utilities like `DataConnector`
+  - `DataConnector` provides efficient connectors from Snowpark `DataFrame`
+  and Snowpark ML `Dataset` to external frameworks like PyTorch, TensorFlow, and Pandas. Create `DataConnector`
+  instances using the classmethod constructors `DataConnector.from_dataset()` and `DataConnector.from_dataframe()`.
+- Data: Add new `DataConnector.from_sources()` classmethod constructor for constructing from `DataSource` objects.
+- Data: Add new `ingestor_class` arg to `DataConnector` classmethod constructors for easier `DataIngestor` injection.
+- Dataset: `DatasetReader` now subclasses new `DataConnector` class.
+  - Add optional `limit` arg to `DatasetReader.to_pandas()`
 ### Behavior Changes

snowflake-ml-python 1.6.0__py3-none-any.whl → 1.6.2__py3-none-any.whl

snowflake-ml-python 1.6.0py3-none-any.whl → 1.6.2py3-none-any.whl