PyPI - snowflake-ml-python - Versions diffs - 1.5.2__py3-none-any.whl → 1.5.4__py3-none-any.whl - Mend

snowflake-ml-python 1.5.2py3-none-any.whl → 1.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (250) hide show

snowflake/ml/modeling/linear_model/sgd_regressor.py CHANGED Viewed

@@ -383,7 +383,7 @@ class SGDRegressor(BaseTransformer):
                         inspect.currentframe(), SGDRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/linear_model/theil_sen_regressor.py CHANGED Viewed

@@ -285,7 +285,7 @@ class TheilSenRegressor(BaseTransformer):
                         inspect.currentframe(), TheilSenRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/linear_model/tweedie_regressor.py CHANGED Viewed

@@ -311,7 +311,7 @@ class TweedieRegressor(BaseTransformer):
                         inspect.currentframe(), TweedieRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/manifold/isomap.py CHANGED Viewed

@@ -76,8 +76,10 @@ class Isomap(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must
@@ -307,7 +309,7 @@ class Isomap(BaseTransformer):
                         inspect.currentframe(), Isomap.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/manifold/mds.py CHANGED Viewed

@@ -76,8 +76,10 @@ class MDS(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must
@@ -290,7 +292,7 @@ class MDS(BaseTransformer):
                         inspect.currentframe(), MDS.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/manifold/spectral_embedding.py CHANGED Viewed

@@ -76,8 +76,10 @@ class SpectralEmbedding(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must
@@ -292,7 +294,7 @@ class SpectralEmbedding(BaseTransformer):
                         inspect.currentframe(), SpectralEmbedding.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/manifold/tsne.py CHANGED Viewed

@@ -76,8 +76,10 @@ class TSNE(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must
@@ -351,7 +353,7 @@ class TSNE(BaseTransformer):
                         inspect.currentframe(), TSNE.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/metrics/ranking.py CHANGED Viewed

@@ -102,6 +102,7 @@ def precision_recall_curve(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def precision_recall_curve_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:
@@ -249,6 +250,7 @@ def roc_auc_score(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def roc_auc_score_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:
@@ -352,6 +354,7 @@ def roc_curve(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def roc_curve_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:

snowflake/ml/modeling/metrics/regression.py CHANGED Viewed

@@ -87,6 +87,7 @@ def d2_absolute_error_score(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def d2_absolute_error_score_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:
@@ -184,6 +185,7 @@ def d2_pinball_score(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def d2_pinball_score_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:
@@ -299,6 +301,7 @@ def explained_variance_score(
         ],
         statement_params=statement_params,
         anonymous=True,
+        execute_as="caller",
     )
     def explained_variance_score_anon_sproc(session: snowpark.Session) -> bytes:
         for query in queries[:-1]:

snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py CHANGED Viewed

@@ -76,8 +76,10 @@ class BayesianGaussianMixture(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must
@@ -354,7 +356,7 @@ class BayesianGaussianMixture(BaseTransformer):
                         inspect.currentframe(), BayesianGaussianMixture.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/mixture/gaussian_mixture.py CHANGED Viewed

@@ -76,8 +76,10 @@ class GaussianMixture(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must
@@ -327,7 +329,7 @@ class GaussianMixture(BaseTransformer):
                         inspect.currentframe(), GaussianMixture.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/model_selection/grid_search_cv.py CHANGED Viewed

@@ -285,11 +285,7 @@ class GridSearchCV(BaseTransformer):
         )
         return selected_cols
-    @telemetry.send_api_usage_telemetry(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
-    def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
+    def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
         """Run fit with all sets of parameters
         For more details on this function, see [sklearn.model_selection.GridSearchCV.fit]
         (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV.fit)

snowflake/ml/modeling/model_selection/randomized_search_cv.py CHANGED Viewed

@@ -298,11 +298,7 @@ class RandomizedSearchCV(BaseTransformer):
         )
         return selected_cols
-    @telemetry.send_api_usage_telemetry(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
-    def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
+    def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
         """Run fit with all sets of parameters
         For more details on this function, see [sklearn.model_selection.RandomizedSearchCV.fit]
         (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV.fit)

snowflake/ml/modeling/multiclass/one_vs_one_classifier.py CHANGED Viewed

@@ -239,7 +239,7 @@ class OneVsOneClassifier(BaseTransformer):
                         inspect.currentframe(), OneVsOneClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py CHANGED Viewed

@@ -248,7 +248,7 @@ class OneVsRestClassifier(BaseTransformer):
                         inspect.currentframe(), OneVsRestClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/multiclass/output_code_classifier.py CHANGED Viewed

@@ -251,7 +251,7 @@ class OutputCodeClassifier(BaseTransformer):
                         inspect.currentframe(), OutputCodeClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/naive_bayes/bernoulli_nb.py CHANGED Viewed

@@ -251,7 +251,7 @@ class BernoulliNB(BaseTransformer):
                         inspect.currentframe(), BernoulliNB.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/naive_bayes/categorical_nb.py CHANGED Viewed

@@ -257,7 +257,7 @@ class CategoricalNB(BaseTransformer):
                         inspect.currentframe(), CategoricalNB.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/naive_bayes/complement_nb.py CHANGED Viewed

@@ -251,7 +251,7 @@ class ComplementNB(BaseTransformer):
                         inspect.currentframe(), ComplementNB.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/naive_bayes/gaussian_nb.py CHANGED Viewed

@@ -232,7 +232,7 @@ class GaussianNB(BaseTransformer):
                         inspect.currentframe(), GaussianNB.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/naive_bayes/multinomial_nb.py CHANGED Viewed

@@ -245,7 +245,7 @@ class MultinomialNB(BaseTransformer):
                         inspect.currentframe(), MultinomialNB.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/k_neighbors_classifier.py CHANGED Viewed

@@ -302,7 +302,7 @@ class KNeighborsClassifier(BaseTransformer):
                         inspect.currentframe(), KNeighborsClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/k_neighbors_regressor.py CHANGED Viewed

@@ -304,7 +304,7 @@ class KNeighborsRegressor(BaseTransformer):
                         inspect.currentframe(), KNeighborsRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/kernel_density.py CHANGED Viewed

@@ -76,8 +76,10 @@ class KernelDensity(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must
@@ -281,7 +283,7 @@ class KernelDensity(BaseTransformer):
                         inspect.currentframe(), KernelDensity.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/local_outlier_factor.py CHANGED Viewed

@@ -76,8 +76,10 @@ class LocalOutlierFactor(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must
@@ -309,7 +311,7 @@ class LocalOutlierFactor(BaseTransformer):
                         inspect.currentframe(), LocalOutlierFactor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/nearest_centroid.py CHANGED Viewed

@@ -242,7 +242,7 @@ class NearestCentroid(BaseTransformer):
                         inspect.currentframe(), NearestCentroid.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/nearest_neighbors.py CHANGED Viewed

@@ -76,8 +76,10 @@ class NearestNeighbors(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must
@@ -292,7 +294,7 @@ class NearestNeighbors(BaseTransformer):
                         inspect.currentframe(), NearestNeighbors.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py CHANGED Viewed

@@ -313,7 +313,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
                         inspect.currentframe(), NeighborhoodComponentsAnalysis.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py CHANGED Viewed

@@ -314,7 +314,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
                         inspect.currentframe(), RadiusNeighborsClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py CHANGED Viewed

@@ -304,7 +304,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
                         inspect.currentframe(), RadiusNeighborsRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neural_network/bernoulli_rbm.py CHANGED Viewed

@@ -76,8 +76,10 @@ class BernoulliRBM(BaseTransformer):
         initialization with the `set_input_cols` method.
     label_cols: Optional[Union[str, List[str]]]
-        This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
+        A string or list of strings representing column names that contain labels.
+        Label columns must be specified with this parameter during initialization
+        or with the `set_label_cols` method before fitting.
     output_cols: Optional[Union[str, List[str]]]
         A string or list of strings representing column names that will store the
         output of predict and transform operations. The length of output_cols must
@@ -261,7 +263,7 @@ class BernoulliRBM(BaseTransformer):
                         inspect.currentframe(), BernoulliRBM.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neural_network/mlp_classifier.py CHANGED Viewed

@@ -416,7 +416,7 @@ class MLPClassifier(BaseTransformer):
                         inspect.currentframe(), MLPClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/neural_network/mlp_regressor.py CHANGED Viewed

@@ -412,7 +412,7 @@ class MLPRegressor(BaseTransformer):
                         inspect.currentframe(), MLPRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns

snowflake/ml/modeling/pipeline/pipeline.py CHANGED Viewed

@@ -17,6 +17,7 @@ from sklearn.utils import metaestimators
 from snowflake import snowpark
 from snowflake.ml._internal import file_utils, telemetry
 from snowflake.ml._internal.exceptions import error_codes, exceptions
+from snowflake.ml._internal.lineage import lineage_utils
 from snowflake.ml._internal.utils import snowpark_dataframe_utils, temp_file_utils
 from snowflake.ml.model.model_signature import ModelSignature, _infer_signature
 from snowflake.ml.modeling._internal.model_transformer_builder import (
@@ -377,6 +378,7 @@ class Pipeline(base.BaseTransformer):
                 anonymous=True,
                 imports=imports,  # type: ignore[arg-type]
                 statement_params=sproc_statement_params,
+                execute_as="caller",
             )
             sproc_export_file_name: str = pipeline_within_one_sproc(
@@ -427,6 +429,10 @@ class Pipeline(base.BaseTransformer):
             else dataset
         )
+        # Extract lineage information here since we're overriding fit() directly
+        data_sources = lineage_utils.get_data_sources(dataset)
+        lineage_utils.set_data_sources(self, data_sources)
         if self._can_be_trained_in_ml_runtime(dataset):
             if not self._is_convertible_to_sklearn:
                 raise ValueError("This pipeline cannot be converted to an sklearn pipeline.")

snowflake/ml/modeling/preprocessing/binarizer.py CHANGED Viewed

@@ -25,11 +25,15 @@ class Binarizer(base.BaseTransformer):
             Feature values below or equal to this are replaced by 0, above it by 1. Default values is 0.0.
         input_cols: Optional[Union[str, Iterable[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be binarized.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be binarized. Input
+            columns must be specified before transform with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, Iterable[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols:  Optional[Union[str, Iterable[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/preprocessing/k_bins_discretizer.py CHANGED Viewed

@@ -74,10 +74,15 @@ class KBinsDiscretizer(base.BaseTransformer):
             - 'quantile': All bins in each feature have the same number of points.
         input_cols: str or Iterable [column_name], default=None
-            Single or multiple input columns.
+           The name(s) of one or more columns in the input DataFrame containing feature(s) to be discretized.
+           Input columns must be specified before fit with this argument or after initialization with the
+           `set_input_cols` method. This argument is optional for API consistency.
         output_cols: str or Iterable [column_name], default=None
-            Single or multiple output columns.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: A string or a list of strings indicating column names to be excluded from any
             operations (such as train, transform, or inference). These specified column(s)

snowflake/ml/modeling/preprocessing/label_encoder.py CHANGED Viewed

@@ -25,11 +25,12 @@ class LabelEncoder(base.BaseTransformer):
     Args:
         input_cols: Optional[Union[str, List[str]]]
-            The name of a column in a DataFrame to be encoded. May be a string or a list containing one string.
+            The name of a column or a list containing one column name to be encoded in the input DataFrame. There must
+            be exactly one input column specified before fit. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]]
-            The name of a column in a DataFrame where the results will be stored. May be a string or a list
-            containing one string.
+            The name of a column or a list containing one column name where the results will be stored. There must be
+            exactly one output column specified before trainsform. This argument is optional for API consistency.
         passthrough_cols: Optional[Union[str, List[str]]]
             A string or a list of strings indicating column names to be excluded from any
@@ -54,11 +55,11 @@ class LabelEncoder(base.BaseTransformer):
         Args:
             input_cols: Optional[Union[str, List[str]]]
-                The name of a column in a DataFrame to be encoded. May be a string or a list containing one
-                string.
+                The name of a column or a list containing one column name to be encoded in the input DataFrame. There
+                must be exactly one input column specified before fit. This argument is optional for API consistency.
             output_cols: Optional[Union[str, List[str]]]
-                The name of a column in a DataFrame where the results will be stored. May be a string or a list
-                containing one string.
+                The name of a column or a list containing one column name where the results will be stored. There must
+                be exactly one output column specified before transform. This argument is optional for API consistency.
             passthrough_cols: Optional[Union[str, List[str]]]
                 A string or a list of strings indicating column names to be excluded from any
                 operations (such as train, transform, or inference). These specified column(s)

snowflake-ml-python 1.5.2__py3-none-any.whl → 1.5.4__py3-none-any.whl

snowflake-ml-python 1.5.2py3-none-any.whl → 1.5.4py3-none-any.whl