PyPI - snowflake-ml-python - Versions diffs - 1.5.2__py3-none-any.whl → 1.5.3__py3-none-any.whl - Mend

snowflake-ml-python 1.5.2py3-none-any.whl → 1.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

snowflake/ml/modeling/linear_model/ridge_classifier_cv.py CHANGED Viewed

@@ -277,7 +277,7 @@ class RidgeClassifierCV(BaseTransformer):
                         inspect.currentframe(), RidgeClassifierCV.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/linear_model/ridge_cv.py CHANGED Viewed

@@ -298,7 +298,7 @@ class RidgeCV(BaseTransformer):
                         inspect.currentframe(), RidgeCV.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/linear_model/sgd_classifier.py CHANGED Viewed

@@ -417,7 +417,7 @@ class SGDClassifier(BaseTransformer):
                         inspect.currentframe(), SGDClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/linear_model/sgd_one_class_svm.py CHANGED Viewed

@@ -315,7 +315,7 @@ class SGDOneClassSVM(BaseTransformer):
                         inspect.currentframe(), SGDOneClassSVM.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/linear_model/sgd_regressor.py CHANGED Viewed

@@ -383,7 +383,7 @@ class SGDRegressor(BaseTransformer):
                         inspect.currentframe(), SGDRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/linear_model/theil_sen_regressor.py CHANGED Viewed

@@ -285,7 +285,7 @@ class TheilSenRegressor(BaseTransformer):
                         inspect.currentframe(), TheilSenRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/linear_model/tweedie_regressor.py CHANGED Viewed

@@ -311,7 +311,7 @@ class TweedieRegressor(BaseTransformer):
                         inspect.currentframe(), TweedieRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/manifold/isomap.py CHANGED Viewed

@@ -307,7 +307,7 @@ class Isomap(BaseTransformer):
                         inspect.currentframe(), Isomap.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/manifold/mds.py CHANGED Viewed

@@ -290,7 +290,7 @@ class MDS(BaseTransformer):
                         inspect.currentframe(), MDS.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/manifold/spectral_embedding.py CHANGED Viewed

@@ -292,7 +292,7 @@ class SpectralEmbedding(BaseTransformer):
                         inspect.currentframe(), SpectralEmbedding.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/manifold/tsne.py CHANGED Viewed

@@ -351,7 +351,7 @@ class TSNE(BaseTransformer):
                         inspect.currentframe(), TSNE.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py CHANGED Viewed

@@ -354,7 +354,7 @@ class BayesianGaussianMixture(BaseTransformer):
                         inspect.currentframe(), BayesianGaussianMixture.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/mixture/gaussian_mixture.py CHANGED Viewed

@@ -327,7 +327,7 @@ class GaussianMixture(BaseTransformer):
                         inspect.currentframe(), GaussianMixture.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/model_selection/grid_search_cv.py CHANGED Viewed

@@ -285,11 +285,7 @@ class GridSearchCV(BaseTransformer):
         )
         return selected_cols
-    @telemetry.send_api_usage_telemetry(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
-    def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
+    def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
         """Run fit with all sets of parameters
         For more details on this function, see [sklearn.model_selection.GridSearchCV.fit]
         (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV.fit)

snowflake/ml/modeling/model_selection/randomized_search_cv.py CHANGED Viewed

@@ -298,11 +298,7 @@ class RandomizedSearchCV(BaseTransformer):
         )
         return selected_cols
-    @telemetry.send_api_usage_telemetry(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
-    def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
+    def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
         """Run fit with all sets of parameters
         For more details on this function, see [sklearn.model_selection.RandomizedSearchCV.fit]
         (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV.fit)

snowflake/ml/modeling/multiclass/one_vs_one_classifier.py CHANGED Viewed

@@ -239,7 +239,7 @@ class OneVsOneClassifier(BaseTransformer):
                         inspect.currentframe(), OneVsOneClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py CHANGED Viewed

@@ -248,7 +248,7 @@ class OneVsRestClassifier(BaseTransformer):
                         inspect.currentframe(), OneVsRestClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/multiclass/output_code_classifier.py CHANGED Viewed

@@ -251,7 +251,7 @@ class OutputCodeClassifier(BaseTransformer):
                         inspect.currentframe(), OutputCodeClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/naive_bayes/bernoulli_nb.py CHANGED Viewed

@@ -251,7 +251,7 @@ class BernoulliNB(BaseTransformer):
                         inspect.currentframe(), BernoulliNB.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/naive_bayes/categorical_nb.py CHANGED Viewed

@@ -257,7 +257,7 @@ class CategoricalNB(BaseTransformer):
                         inspect.currentframe(), CategoricalNB.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/naive_bayes/complement_nb.py CHANGED Viewed

@@ -251,7 +251,7 @@ class ComplementNB(BaseTransformer):
                         inspect.currentframe(), ComplementNB.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/naive_bayes/gaussian_nb.py CHANGED Viewed

@@ -232,7 +232,7 @@ class GaussianNB(BaseTransformer):
                         inspect.currentframe(), GaussianNB.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/naive_bayes/multinomial_nb.py CHANGED Viewed

@@ -245,7 +245,7 @@ class MultinomialNB(BaseTransformer):
                         inspect.currentframe(), MultinomialNB.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/k_neighbors_classifier.py CHANGED Viewed

@@ -302,7 +302,7 @@ class KNeighborsClassifier(BaseTransformer):
                         inspect.currentframe(), KNeighborsClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/k_neighbors_regressor.py CHANGED Viewed

@@ -304,7 +304,7 @@ class KNeighborsRegressor(BaseTransformer):
                         inspect.currentframe(), KNeighborsRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/kernel_density.py CHANGED Viewed

@@ -281,7 +281,7 @@ class KernelDensity(BaseTransformer):
                         inspect.currentframe(), KernelDensity.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/local_outlier_factor.py CHANGED Viewed

@@ -309,7 +309,7 @@ class LocalOutlierFactor(BaseTransformer):
                         inspect.currentframe(), LocalOutlierFactor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/nearest_centroid.py CHANGED Viewed

@@ -242,7 +242,7 @@ class NearestCentroid(BaseTransformer):
                         inspect.currentframe(), NearestCentroid.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/nearest_neighbors.py CHANGED Viewed

@@ -292,7 +292,7 @@ class NearestNeighbors(BaseTransformer):
                         inspect.currentframe(), NearestNeighbors.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py CHANGED Viewed

@@ -313,7 +313,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
                         inspect.currentframe(), NeighborhoodComponentsAnalysis.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py CHANGED Viewed

@@ -314,7 +314,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
                         inspect.currentframe(), RadiusNeighborsClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py CHANGED Viewed

@@ -304,7 +304,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
                         inspect.currentframe(), RadiusNeighborsRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neural_network/bernoulli_rbm.py CHANGED Viewed

@@ -261,7 +261,7 @@ class BernoulliRBM(BaseTransformer):
                         inspect.currentframe(), BernoulliRBM.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neural_network/mlp_classifier.py CHANGED Viewed

@@ -416,7 +416,7 @@ class MLPClassifier(BaseTransformer):
                         inspect.currentframe(), MLPClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neural_network/mlp_regressor.py CHANGED Viewed

@@ -412,7 +412,7 @@ class MLPRegressor(BaseTransformer):
                         inspect.currentframe(), MLPRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/pipeline/pipeline.py CHANGED Viewed

@@ -17,6 +17,7 @@ from sklearn.utils import metaestimators
 from snowflake import snowpark
 from snowflake.ml._internal import file_utils, telemetry
 from snowflake.ml._internal.exceptions import error_codes, exceptions
+from snowflake.ml._internal.lineage import lineage_utils
 from snowflake.ml._internal.utils import snowpark_dataframe_utils, temp_file_utils
 from snowflake.ml.model.model_signature import ModelSignature, _infer_signature
 from snowflake.ml.modeling._internal.model_transformer_builder import (
@@ -427,6 +428,10 @@ class Pipeline(base.BaseTransformer):
             else dataset
         )
+        # Extract lineage information here since we're overriding fit() directly
+        data_sources = lineage_utils.get_data_sources(dataset)
+        lineage_utils.set_data_sources(self, data_sources)
         if self._can_be_trained_in_ml_runtime(dataset):
             if not self._is_convertible_to_sklearn:
                 raise ValueError("This pipeline cannot be converted to an sklearn pipeline.")

snowflake/ml/modeling/preprocessing/binarizer.py CHANGED Viewed

@@ -25,11 +25,15 @@ class Binarizer(base.BaseTransformer):
             Feature values below or equal to this are replaced by 0, above it by 1. Default values is 0.0.
         input_cols: Optional[Union[str, Iterable[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be binarized.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be binarized. Input
+            columns must be specified before transform with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, Iterable[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols:  Optional[Union[str, Iterable[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/preprocessing/k_bins_discretizer.py CHANGED Viewed

@@ -74,10 +74,15 @@ class KBinsDiscretizer(base.BaseTransformer):
             - 'quantile': All bins in each feature have the same number of points.
         input_cols: str or Iterable [column_name], default=None
-            Single or multiple input columns.
+           The name(s) of one or more columns in the input DataFrame containing feature(s) to be discretized.
+           Input columns must be specified before fit with this argument or after initialization with the
+           `set_input_cols` method. This argument is optional for API consistency.
         output_cols: str or Iterable [column_name], default=None
-            Single or multiple output columns.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: A string or a list of strings indicating column names to be excluded from any
             operations (such as train, transform, or inference). These specified column(s)

snowflake/ml/modeling/preprocessing/label_encoder.py CHANGED Viewed

@@ -25,11 +25,12 @@ class LabelEncoder(base.BaseTransformer):
     Args:
         input_cols: Optional[Union[str, List[str]]]
-            The name of a column in a DataFrame to be encoded. May be a string or a list containing one string.
+            The name of a column or a list containing one column name to be encoded in the input DataFrame. There must
+            be exactly one input column specified before fit. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]]
-            The name of a column in a DataFrame where the results will be stored. May be a string or a list
-            containing one string.
+            The name of a column or a list containing one column name where the results will be stored. There must be
+            exactly one output column specified before trainsform. This argument is optional for API consistency.
         passthrough_cols: Optional[Union[str, List[str]]]
             A string or a list of strings indicating column names to be excluded from any
@@ -54,11 +55,11 @@ class LabelEncoder(base.BaseTransformer):
         Args:
             input_cols: Optional[Union[str, List[str]]]
-                The name of a column in a DataFrame to be encoded. May be a string or a list containing one
-                string.
+                The name of a column or a list containing one column name to be encoded in the input DataFrame. There
+                must be exactly one input column specified before fit. This argument is optional for API consistency.
             output_cols: Optional[Union[str, List[str]]]
-                The name of a column in a DataFrame where the results will be stored. May be a string or a list
-                containing one string.
+                The name of a column or a list containing one column name where the results will be stored. There must
+                be exactly one output column specified before transform. This argument is optional for API consistency.
             passthrough_cols: Optional[Union[str, List[str]]]
                 A string or a list of strings indicating column names to be excluded from any
                 operations (such as train, transform, or inference). These specified column(s)

snowflake/ml/modeling/preprocessing/max_abs_scaler.py CHANGED Viewed

@@ -28,11 +28,15 @@ class MaxAbsScaler(base.BaseTransformer):
     Args:
         input_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be scaled.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: Optional[Union[str, List[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/preprocessing/min_max_scaler.py CHANGED Viewed

@@ -29,12 +29,15 @@ class MinMaxScaler(base.BaseTransformer):
             Whether to clip transformed values of held-out data to the specified feature range (default is True).
         input_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be scaled. Each specified
-            input column is scaled independently and stored in the corresponding output column.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: Optional[Union[str, List[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/preprocessing/normalizer.py CHANGED Viewed

@@ -28,11 +28,15 @@ class Normalizer(base.BaseTransformer):
             values. It must be one of 'l1', 'l2', or 'max'.
         input_cols: Optional[Union[str, List[str]]]
-            Columns to use as inputs during transform.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be normalized. Input
+            columns must be specified before transform with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]]
-            A string or list of strings representing column names that will store the output of transform operation.
-            The length of `output_cols` must equal the length of `input_cols`.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: Optional[Union[str, List[str]]]
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/preprocessing/one_hot_encoder.py CHANGED Viewed

@@ -157,10 +157,18 @@ class OneHotEncoder(base.BaseTransformer):
             there is no limit to the number of output features.
         input_cols: Optional[Union[str, List[str]]], default=None
-            Single or multiple input columns.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be encoded. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            Single or multiple output columns.
+            The prefix to be used for encoded output for each input column. The number of
+            output column prefixes specified must match the number of input columns. Output column prefixes must be
+            specified before transform with this argument or after initialization with the `set_output_cols` method.
+            Note: Dense output column names are case-sensitive and resolve identifiers following Snowflake rules, e.g.
+            `"PREFIX_a"`, `PREFIX_A`, `"prefix_A"`. Therefore, there is no need to provide double-quoted column names
+            as that would result in invalid identifiers.
         passthrough_cols: Optional[Union[str, List[str]]]
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/preprocessing/ordinal_encoder.py CHANGED Viewed

@@ -67,11 +67,14 @@ class OrdinalEncoder(base.BaseTransformer):
             The value to be used to encode unknown categories.
         input_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be encoded.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be encoded. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns.
+            The prefix to be used for encoded output for each input column. The number of
+            output column prefixes specified must equal the number of input columns. Output column prefixes must be
+            specified before transform with this argument or after initialization with the `set_output_cols` method.
         passthrough_cols: Optional[Union[str, List[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any
@@ -247,7 +250,7 @@ class OrdinalEncoder(base.BaseTransformer):
         # columns: COLUMN_NAME, CATEGORY, INDEX
         state_df = self._get_category_index_state_df(dataset)
         # save the dataframe on server side so that transform doesn't need to upload
-        state_df.write.save_as_table(  # type: ignore[call-overload]
+        state_df.write.save_as_table(
             self._vocab_table_name,
             mode="overwrite",
             table_type="temporary",
@@ -520,7 +523,7 @@ class OrdinalEncoder(base.BaseTransformer):
                 )
             batch_table_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.TABLE)
-            transformed_dataset.write.save_as_table(  # type: ignore[call-overload]
+            transformed_dataset.write.save_as_table(
                 batch_table_name,
                 mode="overwrite",
                 table_type="temporary",

snowflake/ml/modeling/preprocessing/polynomial_features.py CHANGED Viewed

@@ -251,7 +251,7 @@ class PolynomialFeatures(BaseTransformer):
                         inspect.currentframe(), PolynomialFeatures.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake-ml-python 1.5.2__py3-none-any.whl → 1.5.3__py3-none-any.whl

snowflake-ml-python 1.5.2py3-none-any.whl → 1.5.3py3-none-any.whl