PyPI - snowflake-ml-python - Versions diffs - 1.5.3__py3-none-any.whl → 1.5.4__py3-none-any.whl - Mend

snowflake-ml-python 1.5.3py3-none-any.whl → 1.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

snowflake/ml/modeling/impute/missing_indicator.py CHANGED Viewed

@@ -76,8 +76,10 @@ class MissingIndicator(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py CHANGED Viewed

@@ -76,8 +76,10 @@ class AdditiveChi2Sampler(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/kernel_approximation/nystroem.py CHANGED Viewed

@@ -76,8 +76,10 @@ class Nystroem(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py CHANGED Viewed

@@ -76,8 +76,10 @@ class PolynomialCountSketch(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/kernel_approximation/rbf_sampler.py CHANGED Viewed

@@ -76,8 +76,10 @@ class RBFSampler(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py CHANGED Viewed

@@ -76,8 +76,10 @@ class SkewedChi2Sampler(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/linear_model/sgd_one_class_svm.py CHANGED Viewed

@@ -76,8 +76,10 @@ class SGDOneClassSVM(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/manifold/isomap.py CHANGED Viewed

@@ -76,8 +76,10 @@ class Isomap(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/manifold/mds.py CHANGED Viewed

@@ -76,8 +76,10 @@ class MDS(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/manifold/spectral_embedding.py CHANGED Viewed

@@ -76,8 +76,10 @@ class SpectralEmbedding(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/manifold/tsne.py CHANGED Viewed

@@ -76,8 +76,10 @@ class TSNE(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/metrics/ranking.py CHANGED Viewed

@@ -102,6 +102,7 @@ def precision_recall_curve(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def precision_recall_curve_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:
@@ -249,6 +250,7 @@ def roc_auc_score(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def roc_auc_score_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:
@@ -352,6 +354,7 @@ def roc_curve(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def roc_curve_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:

snowflake/ml/modeling/metrics/regression.py CHANGED Viewed

@@ -87,6 +87,7 @@ def d2_absolute_error_score(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def d2_absolute_error_score_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:
@@ -184,6 +185,7 @@ def d2_pinball_score(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def d2_pinball_score_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:
@@ -299,6 +301,7 @@ def explained_variance_score(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def explained_variance_score_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:

snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py CHANGED Viewed

@@ -76,8 +76,10 @@ class BayesianGaussianMixture(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/mixture/gaussian_mixture.py CHANGED Viewed

@@ -76,8 +76,10 @@ class GaussianMixture(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/neighbors/kernel_density.py CHANGED Viewed

@@ -76,8 +76,10 @@ class KernelDensity(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/neighbors/local_outlier_factor.py CHANGED Viewed

@@ -76,8 +76,10 @@ class LocalOutlierFactor(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/neighbors/nearest_neighbors.py CHANGED Viewed

@@ -76,8 +76,10 @@ class NearestNeighbors(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/neural_network/bernoulli_rbm.py CHANGED Viewed

@@ -76,8 +76,10 @@ class BernoulliRBM(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/modeling/pipeline/pipeline.py CHANGED Viewed

@@ -378,6 +378,7 @@ class Pipeline(base.BaseTransformer):
                 anonymous=True,
                 imports=imports,  # type: ignore[arg-type]
                 statement_params=sproc_statement_params,
+                execute_as="caller",
             )
             sproc_export_file_name: str = pipeline_within_one_sproc(

snowflake/ml/modeling/preprocessing/one_hot_encoder.py CHANGED Viewed

@@ -101,16 +101,20 @@ class OneHotEncoder(base.BaseTransformer):
     (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html).
     Args:
-        categories: 'auto' or dict {column_name: np.ndarray([category])}, default='auto'
+        categories: 'auto', list of array-like, or dict {column_name: np.ndarray([category])}, default='auto'
             Categories (unique values) per feature:
             - 'auto': Determine categories automatically from the training data.
+            - list: ``categories[i]`` holds the categories expected in the ith
+            column. The passed categories should not mix strings and numeric
+            values within a single feature, and should be sorted in case of
+            numeric values.
             - dict: ``categories[column_name]`` holds the categories expected in
               the column provided. The passed categories should not mix strings
               and numeric values within a single feature, and should be sorted in
               case of numeric values.
             The used categories can be found in the ``categories_`` attribute.
-        drop: {‘first’, ‘if_binary’} or an array-like of shape (n_features,), default=None
+        drop: {'first', 'if_binary'} or an array-like of shape (n_features,), default=None
             Specifies a methodology to use to drop one of the categories per
             feature. This is useful in situations where perfectly collinear
             features cause problems, such as when feeding the resulting data
@@ -206,7 +210,7 @@ class OneHotEncoder(base.BaseTransformer):
     def __init__(
         self,
         *,
-        categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]] = "auto",
+        categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]] = "auto",
         drop: Optional[Union[str, npt.ArrayLike]] = None,
         sparse: bool = False,
         handle_unknown: str = "error",
@@ -440,8 +444,19 @@ class OneHotEncoder(base.BaseTransformer):
         assert found_state_df is not None
         if self.categories != "auto":
             state_data = []
-            assert isinstance(self.categories, dict)
-            for input_col, cats in self.categories.items():
+            if isinstance(self.categories, list):
+                categories_map = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
+            elif isinstance(self.categories, dict):
+                categories_map = self.categories
+            else:
+                raise exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_ARGUMENT,
+                    original_exception=ValueError(
+                        f"Invalid type {type(self.categories)} provided for argument `categories`"
+                    ),
+                )
+            for input_col, cats in categories_map.items():
                 for cat in cats.tolist():
                     state_data.append([input_col, cat])
             # states of given categories
@@ -565,6 +580,8 @@ class OneHotEncoder(base.BaseTransformer):
                     else:
                         categories[k] = vectorized_func(v)
             self.categories_ = categories
+        elif isinstance(self.categories, list):
+            self.categories_ = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
         else:
             self.categories_ = self.categories
@@ -850,8 +867,15 @@ class OneHotEncoder(base.BaseTransformer):
         # In case of fitting with pandas dataframe and transforming with snowpark dataframe
         # state_pandas cannot recognize the datatype of _CATEGORY and _FITTED_CATEGORY column
         # Therefore, apply the convert_to_string_excluding_nan function to _CATEGORY and _FITTED_CATEGORY
-        state_pandas[[_CATEGORY]] = state_pandas[[_CATEGORY]].applymap(convert_to_string_excluding_nan)
-        state_pandas[[_FITTED_CATEGORY]] = state_pandas[[_FITTED_CATEGORY]].applymap(convert_to_string_excluding_nan)
+        # applymap is depreciated since pandas 2.1.0, replaced by map
+        if pd.__version__ < "2.1.0":
+            state_pandas[[_CATEGORY]] = state_pandas[[_CATEGORY]].applymap(convert_to_string_excluding_nan)
+            state_pandas[[_FITTED_CATEGORY]] = state_pandas[[_FITTED_CATEGORY]].applymap(
+                convert_to_string_excluding_nan
+            )
+        else:
+            state_pandas[[_CATEGORY]] = state_pandas[[_CATEGORY]].map(convert_to_string_excluding_nan)
+            state_pandas[[_FITTED_CATEGORY]] = state_pandas[[_FITTED_CATEGORY]].map(convert_to_string_excluding_nan)
         state_df = dataset._session.create_dataframe(state_pandas)
         transformed_dataset = dataset
@@ -1009,7 +1033,7 @@ class OneHotEncoder(base.BaseTransformer):
                 error_code=error_codes.INVALID_ATTRIBUTE,
                 original_exception=ValueError(f"Unsupported `categories` value: {self.categories}."),
             )
-        elif isinstance(self.categories, dict):
+        elif isinstance(self.categories, (dict, list)):
             if len(self.categories) != len(self.input_cols):
                 raise exceptions.SnowflakeMLException(
                     error_code=error_codes.INVALID_ATTRIBUTE,
@@ -1018,7 +1042,7 @@ class OneHotEncoder(base.BaseTransformer):
                         f"({len(self.input_cols)})."
                     ),
                 )
-            elif set(self.categories.keys()) != set(self.input_cols):
+            elif isinstance(self.categories, dict) and set(self.categories.keys()) != set(self.input_cols):
                 raise exceptions.SnowflakeMLException(
                     error_code=error_codes.INVALID_ATTRIBUTE,
                     original_exception=ValueError(
@@ -1537,6 +1561,16 @@ class OneHotEncoder(base.BaseTransformer):
         default_sklearn_args = _utils.get_default_args(default_sklearn_obj.__class__.__init__)
         given_args = self.get_params()
+        if "categories" in given_args and isinstance(given_args["categories"], dict):
+            # sklearn requires a list of array-like to satisfy the `categories` arg
+            try:
+                given_args["categories"] = [given_args["categories"][input_col] for input_col in self.input_cols]
+            except KeyError as e:
+                raise exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_ARGUMENT,
+                    original_exception=e,
+                )
         # replace 'sparse' with 'sparse_output' when scikit-learn>=1.2
         sklearn_version = sklearn.__version__
         if version.parse(sklearn_version) >= version.parse(_SKLEARN_DEPRECATED_KEYWORD_TO_VERSION_DICT["sparse"]):

snowflake/ml/modeling/preprocessing/ordinal_encoder.py CHANGED Viewed

@@ -45,9 +45,11 @@ class OrdinalEncoder(base.BaseTransformer):
     (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OrdinalEncoder.html).
     Args:
-        categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]], default="auto"
+        categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]],
+        default="auto"
             The string 'auto' (the default) causes the categories to be extracted from the input columns.
-            To specify the categories yourself, pass a dictionary mapping the column name to an ndarray containing the
+            To specify the categories yourself, pass either (1) a list of ndarrays containing the categories or
+            (2) a dictionary mapping the column name to an ndarray containing the
             categories.
         handle_unknown: str, default="error"
@@ -96,7 +98,7 @@ class OrdinalEncoder(base.BaseTransformer):
     def __init__(
         self,
         *,
-        categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]] = "auto",
+        categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]] = "auto",
         handle_unknown: str = "error",
         unknown_value: Optional[Union[int, float]] = None,
         encoded_missing_value: Union[int, float] = np.nan,
@@ -114,9 +116,13 @@ class OrdinalEncoder(base.BaseTransformer):
         a single column of integers (0 to n_categories - 1) per feature.
         Args:
-            categories: 'auto' or dict {column_name: ndarray([category])}, default='auto'
+            categories: 'auto', list of array-like, or dict {column_name: ndarray([category])}, default='auto'
                 Categories (unique values) per feature:
                 - 'auto': Determine categories automatically from the training data.
+                - list: ``categories[i]`` holds the categories expected in the ith
+                  column. The passed categories should not mix strings and numeric
+                  values within a single feature, and should be sorted in case of
+                  numeric values.
                 - dict: ``categories[column_name]`` holds the categories expected in
                   the column provided. The passed categories should not mix strings
                   and numeric values within a single feature, and should be sorted in
@@ -317,8 +323,19 @@ class OrdinalEncoder(base.BaseTransformer):
         assert found_state_df is not None
         if self.categories != "auto":
             state_data = []
-            assert isinstance(self.categories, dict)
-            for input_col, cats in self.categories.items():
+            if isinstance(self.categories, list):
+                categories_map = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
+            elif isinstance(self.categories, dict):
+                categories_map = self.categories
+            else:
+                raise exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_ARGUMENT,
+                    original_exception=ValueError(
+                        f"Invalid type {type(self.categories)} provided for argument `categories`"
+                    ),
+                )
+            for input_col, cats in categories_map.items():
                 for idx, cat in enumerate(cats.tolist()):
                     state_data.append([input_col, cat, idx])
             # states of given categories
@@ -368,6 +385,8 @@ class OrdinalEncoder(base.BaseTransformer):
                 for col_name, cats in grouped_categories.items()
             }
             self.categories_ = categories
+        elif isinstance(self.categories, list):
+            self.categories_ = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
         else:
             self.categories_ = self.categories
@@ -548,6 +567,15 @@ class OrdinalEncoder(base.BaseTransformer):
             snowml_only_keywords=_SNOWML_ONLY_KEYWORDS,
             sklearn_added_keyword_to_version_dict=_SKLEARN_ADDED_KEYWORD_TO_VERSION_DICT,
         )
+        if "categories" in sklearn_args and isinstance(sklearn_args["categories"], dict):
+            # sklearn requires a list of array-like to satisfy the `categories` arg
+            try:
+                sklearn_args["categories"] = [sklearn_args["categories"][input_col] for input_col in self.input_cols]
+            except KeyError as e:
+                raise exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_ARGUMENT,
+                    original_exception=e,
+                )
         return preprocessing.OrdinalEncoder(**sklearn_args)
     def _create_sklearn_object(self) -> preprocessing.OrdinalEncoder:
@@ -570,7 +598,7 @@ class OrdinalEncoder(base.BaseTransformer):
                 error_code=error_codes.INVALID_ATTRIBUTE,
                 original_exception=ValueError(f"Unsupported `categories` value: {self.categories}."),
             )
-        elif isinstance(self.categories, dict):
+        elif isinstance(self.categories, (dict, list)):
             if len(self.categories) != len(self.input_cols):
                 raise exceptions.SnowflakeMLException(
                     error_code=error_codes.INVALID_ATTRIBUTE,
@@ -579,7 +607,7 @@ class OrdinalEncoder(base.BaseTransformer):
                         f"({len(self.input_cols)})."
                     ),
                 )
-            elif set(self.categories.keys()) != set(self.input_cols):
+            elif isinstance(self.categories, dict) and set(self.categories.keys()) != set(self.input_cols):
                 raise exceptions.SnowflakeMLException(
                     error_code=error_codes.INVALID_ATTRIBUTE,
                     original_exception=ValueError(

snowflake/ml/modeling/preprocessing/polynomial_features.py CHANGED Viewed

@@ -76,8 +76,10 @@ class PolynomialFeatures(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must

snowflake/ml/registry/_manager/model_manager.py CHANGED Viewed

@@ -4,12 +4,14 @@ from typing import Any, Dict, List, Optional, Union
 import pandas as pd
 from absl.logging import logging
+from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.human_readable_id import hrid_generator
 from snowflake.ml._internal.utils import sql_identifier
 from snowflake.ml.model import model_signature, type_hints as model_types
 from snowflake.ml.model._client.model import model_impl, model_version_impl
 from snowflake.ml.model._client.ops import metadata_ops, model_ops
 from snowflake.ml.model._model_composer import model_composer
+from snowflake.ml.model._packager.model_meta import model_meta
 from snowflake.snowpark import session
 logger = logging.getLogger(__name__)
@@ -124,7 +126,10 @@ class ModelManager:
             version_name=version_name_id,
             statement_params=statement_params,
         ):
-            raise ValueError(f"Model {model_name} version {version_name} already existed.")
+            raise ValueError(
+                f"Model {model_name} version {version_name} already existed. "
+                + "To auto-generate `version_name`, skip that argument."
+            )
         stage_path = self._model_ops.prepare_model_stage_path(
             database_name=database_name_id,
@@ -134,8 +139,10 @@ class ModelManager:
         logger.info("Start packaging and uploading your model. It might take some time based on the size of the model.")
-        mc = model_composer.ModelComposer(self._model_ops._session, stage_path=stage_path)
-        mc.save(
+        mc = model_composer.ModelComposer(
+            self._model_ops._session, stage_path=stage_path, statement_params=statement_params
+        )
+        model_metadata: model_meta.ModelMetadata = mc.save(
             name=model_name_id.resolved(),
             model=model,
             signatures=signatures,
@@ -147,6 +154,12 @@ class ModelManager:
             ext_modules=ext_modules,
             options=options,
         )
+        statement_params = telemetry.add_statement_params_custom_tags(
+            statement_params, model_metadata.telemetry_metadata()
+        )
+        statement_params = telemetry.add_statement_params_custom_tags(
+            statement_params, {"model_version_name": version_name_id}
+        )
         logger.info("Start creating MODEL object for you in the Snowflake.")

snowflake/ml/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION="1.5.3"
1	+ VERSION="1.5.4"

snowflake-ml-python 1.5.3__py3-none-any.whl → 1.5.4__py3-none-any.whl

snowflake-ml-python 1.5.3py3-none-any.whl → 1.5.4py3-none-any.whl