PyPI - snowflake-ml-python - Versions diffs - 1.5.2__py3-none-any.whl → 1.5.4__py3-none-any.whl - Mend

snowflake-ml-python 1.5.2py3-none-any.whl → 1.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (250) hide show

snowflake/ml/modeling/preprocessing/max_abs_scaler.py CHANGED Viewed

@@ -28,11 +28,15 @@ class MaxAbsScaler(base.BaseTransformer):
     Args:
         input_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be scaled.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: Optional[Union[str, List[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/preprocessing/min_max_scaler.py CHANGED Viewed

@@ -29,12 +29,15 @@ class MinMaxScaler(base.BaseTransformer):
             Whether to clip transformed values of held-out data to the specified feature range (default is True).
         input_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be scaled. Each specified
-            input column is scaled independently and stored in the corresponding output column.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: Optional[Union[str, List[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/preprocessing/normalizer.py CHANGED Viewed

@@ -28,11 +28,15 @@ class Normalizer(base.BaseTransformer):
             values. It must be one of 'l1', 'l2', or 'max'.
         input_cols: Optional[Union[str, List[str]]]
-            Columns to use as inputs during transform.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be normalized. Input
+            columns must be specified before transform with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]]
-            A string or list of strings representing column names that will store the output of transform operation.
-            The length of `output_cols` must equal the length of `input_cols`.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: Optional[Union[str, List[str]]]
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/preprocessing/one_hot_encoder.py CHANGED Viewed

@@ -101,16 +101,20 @@ class OneHotEncoder(base.BaseTransformer):
     (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html).
     Args:
-        categories: 'auto' or dict {column_name: np.ndarray([category])}, default='auto'
+        categories: 'auto', list of array-like, or dict {column_name: np.ndarray([category])}, default='auto'
             Categories (unique values) per feature:
             - 'auto': Determine categories automatically from the training data.
+            - list: ``categories[i]`` holds the categories expected in the ith
+            column. The passed categories should not mix strings and numeric
+            values within a single feature, and should be sorted in case of
+            numeric values.
             - dict: ``categories[column_name]`` holds the categories expected in
               the column provided. The passed categories should not mix strings
               and numeric values within a single feature, and should be sorted in
               case of numeric values.
             The used categories can be found in the ``categories_`` attribute.
-        drop: {‘first’, ‘if_binary’} or an array-like of shape (n_features,), default=None
+        drop: {'first', 'if_binary'} or an array-like of shape (n_features,), default=None
             Specifies a methodology to use to drop one of the categories per
             feature. This is useful in situations where perfectly collinear
             features cause problems, such as when feeding the resulting data
@@ -157,10 +161,18 @@ class OneHotEncoder(base.BaseTransformer):
             there is no limit to the number of output features.
         input_cols: Optional[Union[str, List[str]]], default=None
-            Single or multiple input columns.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be encoded. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            Single or multiple output columns.
+            The prefix to be used for encoded output for each input column. The number of
+            output column prefixes specified must match the number of input columns. Output column prefixes must be
+            specified before transform with this argument or after initialization with the `set_output_cols` method.
+            Note: Dense output column names are case-sensitive and resolve identifiers following Snowflake rules, e.g.
+            `"PREFIX_a"`, `PREFIX_A`, `"prefix_A"`. Therefore, there is no need to provide double-quoted column names
+            as that would result in invalid identifiers.
         passthrough_cols: Optional[Union[str, List[str]]]
             A string or a list of strings indicating column names to be excluded from any
@@ -198,7 +210,7 @@ class OneHotEncoder(base.BaseTransformer):
     def __init__(
         self,
         *,
-        categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]] = "auto",
+        categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]] = "auto",
         drop: Optional[Union[str, npt.ArrayLike]] = None,
         sparse: bool = False,
         handle_unknown: str = "error",
@@ -432,8 +444,19 @@ class OneHotEncoder(base.BaseTransformer):
         assert found_state_df is not None
         if self.categories != "auto":
             state_data = []
-            assert isinstance(self.categories, dict)
-            for input_col, cats in self.categories.items():
+            if isinstance(self.categories, list):
+                categories_map = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
+            elif isinstance(self.categories, dict):
+                categories_map = self.categories
+            else:
+                raise exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_ARGUMENT,
+                    original_exception=ValueError(
+                        f"Invalid type {type(self.categories)} provided for argument `categories`"
+                    ),
+                )
+            for input_col, cats in categories_map.items():
                 for cat in cats.tolist():
                     state_data.append([input_col, cat])
             # states of given categories
@@ -557,6 +580,8 @@ class OneHotEncoder(base.BaseTransformer):
                     else:
                         categories[k] = vectorized_func(v)
             self.categories_ = categories
+        elif isinstance(self.categories, list):
+            self.categories_ = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
         else:
             self.categories_ = self.categories
@@ -842,8 +867,15 @@ class OneHotEncoder(base.BaseTransformer):
         # In case of fitting with pandas dataframe and transforming with snowpark dataframe
         # state_pandas cannot recognize the datatype of _CATEGORY and _FITTED_CATEGORY column
         # Therefore, apply the convert_to_string_excluding_nan function to _CATEGORY and _FITTED_CATEGORY
-        state_pandas[[_CATEGORY]] = state_pandas[[_CATEGORY]].applymap(convert_to_string_excluding_nan)
-        state_pandas[[_FITTED_CATEGORY]] = state_pandas[[_FITTED_CATEGORY]].applymap(convert_to_string_excluding_nan)
+        # applymap is depreciated since pandas 2.1.0, replaced by map
+        if pd.__version__ < "2.1.0":
+            state_pandas[[_CATEGORY]] = state_pandas[[_CATEGORY]].applymap(convert_to_string_excluding_nan)
+            state_pandas[[_FITTED_CATEGORY]] = state_pandas[[_FITTED_CATEGORY]].applymap(
+                convert_to_string_excluding_nan
+            )
+        else:
+            state_pandas[[_CATEGORY]] = state_pandas[[_CATEGORY]].map(convert_to_string_excluding_nan)
+            state_pandas[[_FITTED_CATEGORY]] = state_pandas[[_FITTED_CATEGORY]].map(convert_to_string_excluding_nan)
         state_df = dataset._session.create_dataframe(state_pandas)
         transformed_dataset = dataset
@@ -1001,7 +1033,7 @@ class OneHotEncoder(base.BaseTransformer):
                 error_code=error_codes.INVALID_ATTRIBUTE,
                 original_exception=ValueError(f"Unsupported `categories` value: {self.categories}."),
             )
-        elif isinstance(self.categories, dict):
+        elif isinstance(self.categories, (dict, list)):
             if len(self.categories) != len(self.input_cols):
                 raise exceptions.SnowflakeMLException(
                     error_code=error_codes.INVALID_ATTRIBUTE,
@@ -1010,7 +1042,7 @@ class OneHotEncoder(base.BaseTransformer):
                         f"({len(self.input_cols)})."
                     ),
                 )
-            elif set(self.categories.keys()) != set(self.input_cols):
+            elif isinstance(self.categories, dict) and set(self.categories.keys()) != set(self.input_cols):
                 raise exceptions.SnowflakeMLException(
                     error_code=error_codes.INVALID_ATTRIBUTE,
                     original_exception=ValueError(
@@ -1529,6 +1561,16 @@ class OneHotEncoder(base.BaseTransformer):
         default_sklearn_args = _utils.get_default_args(default_sklearn_obj.__class__.__init__)
         given_args = self.get_params()
+        if "categories" in given_args and isinstance(given_args["categories"], dict):
+            # sklearn requires a list of array-like to satisfy the `categories` arg
+            try:
+                given_args["categories"] = [given_args["categories"][input_col] for input_col in self.input_cols]
+            except KeyError as e:
+                raise exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_ARGUMENT,
+                    original_exception=e,
+                )
         # replace 'sparse' with 'sparse_output' when scikit-learn>=1.2
         sklearn_version = sklearn.__version__
         if version.parse(sklearn_version) >= version.parse(_SKLEARN_DEPRECATED_KEYWORD_TO_VERSION_DICT["sparse"]):

snowflake/ml/modeling/preprocessing/ordinal_encoder.py CHANGED Viewed

@@ -45,9 +45,11 @@ class OrdinalEncoder(base.BaseTransformer):
     (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OrdinalEncoder.html).
     Args:
-        categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]], default="auto"
+        categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]],
+        default="auto"
             The string 'auto' (the default) causes the categories to be extracted from the input columns.
-            To specify the categories yourself, pass a dictionary mapping the column name to an ndarray containing the
+            To specify the categories yourself, pass either (1) a list of ndarrays containing the categories or
+            (2) a dictionary mapping the column name to an ndarray containing the
             categories.
         handle_unknown: str, default="error"
@@ -67,11 +69,14 @@ class OrdinalEncoder(base.BaseTransformer):
             The value to be used to encode unknown categories.
         input_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be encoded.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be encoded. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns.
+            The prefix to be used for encoded output for each input column. The number of
+            output column prefixes specified must equal the number of input columns. Output column prefixes must be
+            specified before transform with this argument or after initialization with the `set_output_cols` method.
         passthrough_cols: Optional[Union[str, List[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any
@@ -93,7 +98,7 @@ class OrdinalEncoder(base.BaseTransformer):
     def __init__(
         self,
         *,
-        categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]] = "auto",
+        categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]] = "auto",
         handle_unknown: str = "error",
         unknown_value: Optional[Union[int, float]] = None,
         encoded_missing_value: Union[int, float] = np.nan,
@@ -111,9 +116,13 @@ class OrdinalEncoder(base.BaseTransformer):
         a single column of integers (0 to n_categories - 1) per feature.
         Args:
-            categories: 'auto' or dict {column_name: ndarray([category])}, default='auto'
+            categories: 'auto', list of array-like, or dict {column_name: ndarray([category])}, default='auto'
                 Categories (unique values) per feature:
                 - 'auto': Determine categories automatically from the training data.
+                - list: ``categories[i]`` holds the categories expected in the ith
+                  column. The passed categories should not mix strings and numeric
+                  values within a single feature, and should be sorted in case of
+                  numeric values.
                 - dict: ``categories[column_name]`` holds the categories expected in
                   the column provided. The passed categories should not mix strings
                   and numeric values within a single feature, and should be sorted in
@@ -247,7 +256,7 @@ class OrdinalEncoder(base.BaseTransformer):
         # columns: COLUMN_NAME, CATEGORY, INDEX
         state_df = self._get_category_index_state_df(dataset)
         # save the dataframe on server side so that transform doesn't need to upload
-        state_df.write.save_as_table(  # type: ignore[call-overload]
+        state_df.write.save_as_table(
             self._vocab_table_name,
             mode="overwrite",
             table_type="temporary",
@@ -314,8 +323,19 @@ class OrdinalEncoder(base.BaseTransformer):
         assert found_state_df is not None
         if self.categories != "auto":
             state_data = []
-            assert isinstance(self.categories, dict)
-            for input_col, cats in self.categories.items():
+            if isinstance(self.categories, list):
+                categories_map = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
+            elif isinstance(self.categories, dict):
+                categories_map = self.categories
+            else:
+                raise exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_ARGUMENT,
+                    original_exception=ValueError(
+                        f"Invalid type {type(self.categories)} provided for argument `categories`"
+                    ),
+                )
+            for input_col, cats in categories_map.items():
                 for idx, cat in enumerate(cats.tolist()):
                     state_data.append([input_col, cat, idx])
             # states of given categories
@@ -365,6 +385,8 @@ class OrdinalEncoder(base.BaseTransformer):
                 for col_name, cats in grouped_categories.items()
             }
             self.categories_ = categories
+        elif isinstance(self.categories, list):
+            self.categories_ = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
         else:
             self.categories_ = self.categories
@@ -520,7 +542,7 @@ class OrdinalEncoder(base.BaseTransformer):
                 )
             batch_table_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.TABLE)
-            transformed_dataset.write.save_as_table(  # type: ignore[call-overload]
+            transformed_dataset.write.save_as_table(
                 batch_table_name,
                 mode="overwrite",
                 table_type="temporary",
@@ -545,6 +567,15 @@ class OrdinalEncoder(base.BaseTransformer):
             snowml_only_keywords=_SNOWML_ONLY_KEYWORDS,
             sklearn_added_keyword_to_version_dict=_SKLEARN_ADDED_KEYWORD_TO_VERSION_DICT,
         )
+        if "categories" in sklearn_args and isinstance(sklearn_args["categories"], dict):
+            # sklearn requires a list of array-like to satisfy the `categories` arg
+            try:
+                sklearn_args["categories"] = [sklearn_args["categories"][input_col] for input_col in self.input_cols]
+            except KeyError as e:
+                raise exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_ARGUMENT,
+                    original_exception=e,
+                )
         return preprocessing.OrdinalEncoder(**sklearn_args)
     def _create_sklearn_object(self) -> preprocessing.OrdinalEncoder:
@@ -567,7 +598,7 @@ class OrdinalEncoder(base.BaseTransformer):
                 error_code=error_codes.INVALID_ATTRIBUTE,
                 original_exception=ValueError(f"Unsupported `categories` value: {self.categories}."),
             )
-        elif isinstance(self.categories, dict):
+        elif isinstance(self.categories, (dict, list)):
             if len(self.categories) != len(self.input_cols):
                 raise exceptions.SnowflakeMLException(
                     error_code=error_codes.INVALID_ATTRIBUTE,
@@ -576,7 +607,7 @@ class OrdinalEncoder(base.BaseTransformer):
                         f"({len(self.input_cols)})."
                     ),
                 )
-            elif set(self.categories.keys()) != set(self.input_cols):
+            elif isinstance(self.categories, dict) and set(self.categories.keys()) != set(self.input_cols):
                 raise exceptions.SnowflakeMLException(
                     error_code=error_codes.INVALID_ATTRIBUTE,
                     original_exception=ValueError(

snowflake/ml/modeling/preprocessing/polynomial_features.py CHANGED Viewed

@@ -76,8 +76,10 @@ class PolynomialFeatures(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must
@@ -251,7 +253,7 @@ class PolynomialFeatures(BaseTransformer):
                         inspect.currentframe(), PolynomialFeatures.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/preprocessing/robust_scaler.py CHANGED Viewed

@@ -37,12 +37,15 @@ class RobustScaler(base.BaseTransformer):
             the dataset is scaled down. If less than 1, the dataset is scaled up.
         input_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be scaled.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns. For dense output, the column names specified are
-            used as base names for the columns created for each category.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: Optional[Union[str, List[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/preprocessing/standard_scaler.py CHANGED Viewed

@@ -26,11 +26,15 @@ class StandardScaler(base.BaseTransformer):
             If True, scale the data unit variance (i.e. unit standard deviation).
         input_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be scaled.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: Optional[Union[str, List[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/semi_supervised/label_propagation.py CHANGED Viewed

@@ -257,7 +257,7 @@ class LabelPropagation(BaseTransformer):
                         inspect.currentframe(), LabelPropagation.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/semi_supervised/label_spreading.py CHANGED Viewed

@@ -266,7 +266,7 @@ class LabelSpreading(BaseTransformer):
                         inspect.currentframe(), LabelSpreading.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/svm/linear_svc.py CHANGED Viewed

@@ -322,7 +322,7 @@ class LinearSVC(BaseTransformer):
                         inspect.currentframe(), LinearSVC.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/svm/linear_svr.py CHANGED Viewed

@@ -294,7 +294,7 @@ class LinearSVR(BaseTransformer):
                         inspect.currentframe(), LinearSVR.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/svm/nu_svc.py CHANGED Viewed

@@ -328,7 +328,7 @@ class NuSVC(BaseTransformer):
                         inspect.currentframe(), NuSVC.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/svm/nu_svr.py CHANGED Viewed

@@ -289,7 +289,7 @@ class NuSVR(BaseTransformer):
                         inspect.currentframe(), NuSVR.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/svm/svc.py CHANGED Viewed

@@ -331,7 +331,7 @@ class SVC(BaseTransformer):
                         inspect.currentframe(), SVC.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/svm/svr.py CHANGED Viewed

@@ -292,7 +292,7 @@ class SVR(BaseTransformer):
                         inspect.currentframe(), SVR.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/tree/decision_tree_classifier.py CHANGED Viewed

@@ -359,7 +359,7 @@ class DecisionTreeClassifier(BaseTransformer):
                         inspect.currentframe(), DecisionTreeClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/tree/decision_tree_regressor.py CHANGED Viewed

@@ -341,7 +341,7 @@ class DecisionTreeRegressor(BaseTransformer):
                         inspect.currentframe(), DecisionTreeRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/tree/extra_tree_classifier.py CHANGED Viewed

@@ -351,7 +351,7 @@ class ExtraTreeClassifier(BaseTransformer):
                         inspect.currentframe(), ExtraTreeClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/tree/extra_tree_regressor.py CHANGED Viewed

@@ -333,7 +333,7 @@ class ExtraTreeRegressor(BaseTransformer):
                         inspect.currentframe(), ExtraTreeRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/xgboost/xgb_classifier.py CHANGED Viewed

@@ -451,7 +451,7 @@ class XGBClassifier(BaseTransformer):
                         inspect.currentframe(), XGBClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/xgboost/xgb_regressor.py CHANGED Viewed

@@ -450,7 +450,7 @@ class XGBRegressor(BaseTransformer):
                         inspect.currentframe(), XGBRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/xgboost/xgbrf_classifier.py CHANGED Viewed

@@ -455,7 +455,7 @@ class XGBRFClassifier(BaseTransformer):
                         inspect.currentframe(), XGBRFClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/xgboost/xgbrf_regressor.py CHANGED Viewed

@@ -455,7 +455,7 @@ class XGBRFRegressor(BaseTransformer):
                         inspect.currentframe(), XGBRFRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/registry/_manager/model_manager.py CHANGED Viewed

@@ -4,12 +4,14 @@ from typing import Any, Dict, List, Optional, Union
 import pandas as pd
 from absl.logging import logging
+from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.human_readable_id import hrid_generator
 from snowflake.ml._internal.utils import sql_identifier
 from snowflake.ml.model import model_signature, type_hints as model_types
 from snowflake.ml.model._client.model import model_impl, model_version_impl
 from snowflake.ml.model._client.ops import metadata_ops, model_ops
 from snowflake.ml.model._model_composer import model_composer
+from snowflake.ml.model._packager.model_meta import model_meta
 from snowflake.snowpark import session
 logger = logging.getLogger(__name__)
@@ -124,7 +126,10 @@ class ModelManager:
             version_name=version_name_id,
             statement_params=statement_params,
         ):
-            raise ValueError(f"Model {model_name} version {version_name} already existed.")
+            raise ValueError(
+                f"Model {model_name} version {version_name} already existed. "
+                + "To auto-generate `version_name`, skip that argument."
+            )
         stage_path = self._model_ops.prepare_model_stage_path(
             database_name=database_name_id,
@@ -134,8 +139,10 @@ class ModelManager:
         logger.info("Start packaging and uploading your model. It might take some time based on the size of the model.")
-        mc = model_composer.ModelComposer(self._model_ops._session, stage_path=stage_path)
-        mc.save(
+        mc = model_composer.ModelComposer(
+            self._model_ops._session, stage_path=stage_path, statement_params=statement_params
+        )
+        model_metadata: model_meta.ModelMetadata = mc.save(
             name=model_name_id.resolved(),
             model=model,
             signatures=signatures,
@@ -147,6 +154,12 @@ class ModelManager:
             ext_modules=ext_modules,
             options=options,
         )
+        statement_params = telemetry.add_statement_params_custom_tags(
+            statement_params, model_metadata.telemetry_metadata()
+        )
+        statement_params = telemetry.add_statement_params_custom_tags(
+            statement_params, {"model_version_name": version_name_id}
+        )
         logger.info("Start creating MODEL object for you in the Snowflake.")

snowflake/ml/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION="1.5.2"
1	+ VERSION="1.5.4"

snowflake-ml-python 1.5.2__py3-none-any.whl → 1.5.4__py3-none-any.whl

snowflake-ml-python 1.5.2py3-none-any.whl → 1.5.4py3-none-any.whl