snowflake-ml-python 1.5.2__py3-none-any.whl → 1.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +26 -5
- snowflake/cortex/_sse_client.py +81 -0
- snowflake/cortex/_util.py +105 -8
- snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
- snowflake/ml/dataset/dataset.py +15 -12
- snowflake/ml/dataset/dataset_factory.py +3 -4
- snowflake/ml/feature_store/feature_store.py +2 -2
- snowflake/ml/model/_client/sql/model_version.py +2 -2
- snowflake/ml/model/_model_composer/model_composer.py +2 -2
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -1
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_signatures/builtins_handler.py +2 -1
- snowflake/ml/model/_signatures/core.py +13 -1
- snowflake/ml/model/_signatures/pandas_handler.py +2 -0
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +156 -121
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +2 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +38 -18
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +82 -134
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +21 -17
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +1 -1
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +1 -1
- snowflake/ml/modeling/cluster/birch.py +1 -1
- snowflake/ml/modeling/cluster/bisecting_k_means.py +1 -1
- snowflake/ml/modeling/cluster/dbscan.py +1 -1
- snowflake/ml/modeling/cluster/feature_agglomeration.py +1 -1
- snowflake/ml/modeling/cluster/k_means.py +1 -1
- snowflake/ml/modeling/cluster/mean_shift.py +1 -1
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +1 -1
- snowflake/ml/modeling/cluster/optics.py +1 -1
- snowflake/ml/modeling/cluster/spectral_biclustering.py +1 -1
- snowflake/ml/modeling/cluster/spectral_clustering.py +1 -1
- snowflake/ml/modeling/cluster/spectral_coclustering.py +1 -1
- snowflake/ml/modeling/compose/column_transformer.py +1 -1
- snowflake/ml/modeling/compose/transformed_target_regressor.py +1 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +1 -1
- snowflake/ml/modeling/covariance/empirical_covariance.py +1 -1
- snowflake/ml/modeling/covariance/graphical_lasso.py +1 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +1 -1
- snowflake/ml/modeling/covariance/ledoit_wolf.py +1 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +1 -1
- snowflake/ml/modeling/covariance/oas.py +1 -1
- snowflake/ml/modeling/covariance/shrunk_covariance.py +1 -1
- snowflake/ml/modeling/decomposition/dictionary_learning.py +1 -1
- snowflake/ml/modeling/decomposition/factor_analysis.py +1 -1
- snowflake/ml/modeling/decomposition/fast_ica.py +1 -1
- snowflake/ml/modeling/decomposition/incremental_pca.py +1 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +1 -1
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +1 -1
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +1 -1
- snowflake/ml/modeling/decomposition/pca.py +1 -1
- snowflake/ml/modeling/decomposition/sparse_pca.py +1 -1
- snowflake/ml/modeling/decomposition/truncated_svd.py +1 -1
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +1 -1
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +1 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +1 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +1 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +1 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +1 -1
- snowflake/ml/modeling/feature_selection/variance_threshold.py +1 -1
- snowflake/ml/modeling/framework/base.py +3 -8
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +1 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +1 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +1 -1
- snowflake/ml/modeling/impute/knn_imputer.py +1 -1
- snowflake/ml/modeling/impute/missing_indicator.py +1 -1
- snowflake/ml/modeling/impute/simple_imputer.py +8 -4
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +1 -1
- snowflake/ml/modeling/kernel_approximation/nystroem.py +1 -1
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +1 -1
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +1 -1
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +1 -1
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +1 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/lars.py +1 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +1 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/perceptron.py +1 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ridge.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +1 -1
- snowflake/ml/modeling/manifold/isomap.py +1 -1
- snowflake/ml/modeling/manifold/mds.py +1 -1
- snowflake/ml/modeling/manifold/spectral_embedding.py +1 -1
- snowflake/ml/modeling/manifold/tsne.py +1 -1
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +1 -1
- snowflake/ml/modeling/mixture/gaussian_mixture.py +1 -1
- snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +1 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +1 -1
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +1 -1
- snowflake/ml/modeling/neighbors/nearest_centroid.py +1 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +1 -1
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +1 -1
- snowflake/ml/modeling/pipeline/pipeline.py +5 -0
- snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
- snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +10 -2
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +8 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +1 -1
- snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +1 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +1 -1
- snowflake/ml/modeling/svm/linear_svc.py +1 -1
- snowflake/ml/modeling/svm/linear_svr.py +1 -1
- snowflake/ml/modeling/svm/nu_svc.py +1 -1
- snowflake/ml/modeling/svm/nu_svr.py +1 -1
- snowflake/ml/modeling/svm/svc.py +1 -1
- snowflake/ml/modeling/svm/svr.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/METADATA +21 -5
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/RECORD +196 -195
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/top_level.txt +0 -0
@@ -277,7 +277,7 @@ class RidgeClassifierCV(BaseTransformer):
|
|
277
277
|
inspect.currentframe(), RidgeClassifierCV.__class__.__name__
|
278
278
|
),
|
279
279
|
api_calls=[Session.call],
|
280
|
-
custom_tags=
|
280
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
281
281
|
)
|
282
282
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
283
283
|
pd_df.columns = dataset.columns
|
@@ -298,7 +298,7 @@ class RidgeCV(BaseTransformer):
|
|
298
298
|
inspect.currentframe(), RidgeCV.__class__.__name__
|
299
299
|
),
|
300
300
|
api_calls=[Session.call],
|
301
|
-
custom_tags=
|
301
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
302
302
|
)
|
303
303
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
304
304
|
pd_df.columns = dataset.columns
|
@@ -417,7 +417,7 @@ class SGDClassifier(BaseTransformer):
|
|
417
417
|
inspect.currentframe(), SGDClassifier.__class__.__name__
|
418
418
|
),
|
419
419
|
api_calls=[Session.call],
|
420
|
-
custom_tags=
|
420
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
421
421
|
)
|
422
422
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
423
423
|
pd_df.columns = dataset.columns
|
@@ -315,7 +315,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
315
315
|
inspect.currentframe(), SGDOneClassSVM.__class__.__name__
|
316
316
|
),
|
317
317
|
api_calls=[Session.call],
|
318
|
-
custom_tags=
|
318
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
319
319
|
)
|
320
320
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
321
321
|
pd_df.columns = dataset.columns
|
@@ -383,7 +383,7 @@ class SGDRegressor(BaseTransformer):
|
|
383
383
|
inspect.currentframe(), SGDRegressor.__class__.__name__
|
384
384
|
),
|
385
385
|
api_calls=[Session.call],
|
386
|
-
custom_tags=
|
386
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
387
387
|
)
|
388
388
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
389
389
|
pd_df.columns = dataset.columns
|
@@ -285,7 +285,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
285
285
|
inspect.currentframe(), TheilSenRegressor.__class__.__name__
|
286
286
|
),
|
287
287
|
api_calls=[Session.call],
|
288
|
-
custom_tags=
|
288
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
289
289
|
)
|
290
290
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
291
291
|
pd_df.columns = dataset.columns
|
@@ -311,7 +311,7 @@ class TweedieRegressor(BaseTransformer):
|
|
311
311
|
inspect.currentframe(), TweedieRegressor.__class__.__name__
|
312
312
|
),
|
313
313
|
api_calls=[Session.call],
|
314
|
-
custom_tags=
|
314
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
315
315
|
)
|
316
316
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
317
317
|
pd_df.columns = dataset.columns
|
@@ -307,7 +307,7 @@ class Isomap(BaseTransformer):
|
|
307
307
|
inspect.currentframe(), Isomap.__class__.__name__
|
308
308
|
),
|
309
309
|
api_calls=[Session.call],
|
310
|
-
custom_tags=
|
310
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
311
311
|
)
|
312
312
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
313
313
|
pd_df.columns = dataset.columns
|
@@ -290,7 +290,7 @@ class MDS(BaseTransformer):
|
|
290
290
|
inspect.currentframe(), MDS.__class__.__name__
|
291
291
|
),
|
292
292
|
api_calls=[Session.call],
|
293
|
-
custom_tags=
|
293
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
294
294
|
)
|
295
295
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
296
296
|
pd_df.columns = dataset.columns
|
@@ -292,7 +292,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
292
292
|
inspect.currentframe(), SpectralEmbedding.__class__.__name__
|
293
293
|
),
|
294
294
|
api_calls=[Session.call],
|
295
|
-
custom_tags=
|
295
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
296
296
|
)
|
297
297
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
298
298
|
pd_df.columns = dataset.columns
|
@@ -351,7 +351,7 @@ class TSNE(BaseTransformer):
|
|
351
351
|
inspect.currentframe(), TSNE.__class__.__name__
|
352
352
|
),
|
353
353
|
api_calls=[Session.call],
|
354
|
-
custom_tags=
|
354
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
355
355
|
)
|
356
356
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
357
357
|
pd_df.columns = dataset.columns
|
@@ -354,7 +354,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
354
354
|
inspect.currentframe(), BayesianGaussianMixture.__class__.__name__
|
355
355
|
),
|
356
356
|
api_calls=[Session.call],
|
357
|
-
custom_tags=
|
357
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
358
358
|
)
|
359
359
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
360
360
|
pd_df.columns = dataset.columns
|
@@ -327,7 +327,7 @@ class GaussianMixture(BaseTransformer):
|
|
327
327
|
inspect.currentframe(), GaussianMixture.__class__.__name__
|
328
328
|
),
|
329
329
|
api_calls=[Session.call],
|
330
|
-
custom_tags=
|
330
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
331
331
|
)
|
332
332
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
333
333
|
pd_df.columns = dataset.columns
|
@@ -285,11 +285,7 @@ class GridSearchCV(BaseTransformer):
|
|
285
285
|
)
|
286
286
|
return selected_cols
|
287
287
|
|
288
|
-
|
289
|
-
project=_PROJECT,
|
290
|
-
subproject=_SUBPROJECT,
|
291
|
-
)
|
292
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
|
288
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
|
293
289
|
"""Run fit with all sets of parameters
|
294
290
|
For more details on this function, see [sklearn.model_selection.GridSearchCV.fit]
|
295
291
|
(https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV.fit)
|
@@ -298,11 +298,7 @@ class RandomizedSearchCV(BaseTransformer):
|
|
298
298
|
)
|
299
299
|
return selected_cols
|
300
300
|
|
301
|
-
|
302
|
-
project=_PROJECT,
|
303
|
-
subproject=_SUBPROJECT,
|
304
|
-
)
|
305
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
|
301
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
|
306
302
|
"""Run fit with all sets of parameters
|
307
303
|
For more details on this function, see [sklearn.model_selection.RandomizedSearchCV.fit]
|
308
304
|
(https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV.fit)
|
@@ -239,7 +239,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
239
239
|
inspect.currentframe(), OneVsOneClassifier.__class__.__name__
|
240
240
|
),
|
241
241
|
api_calls=[Session.call],
|
242
|
-
custom_tags=
|
242
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
243
243
|
)
|
244
244
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
245
245
|
pd_df.columns = dataset.columns
|
@@ -248,7 +248,7 @@ class OneVsRestClassifier(BaseTransformer):
|
|
248
248
|
inspect.currentframe(), OneVsRestClassifier.__class__.__name__
|
249
249
|
),
|
250
250
|
api_calls=[Session.call],
|
251
|
-
custom_tags=
|
251
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
252
252
|
)
|
253
253
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
254
254
|
pd_df.columns = dataset.columns
|
@@ -251,7 +251,7 @@ class OutputCodeClassifier(BaseTransformer):
|
|
251
251
|
inspect.currentframe(), OutputCodeClassifier.__class__.__name__
|
252
252
|
),
|
253
253
|
api_calls=[Session.call],
|
254
|
-
custom_tags=
|
254
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
255
255
|
)
|
256
256
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
257
257
|
pd_df.columns = dataset.columns
|
@@ -251,7 +251,7 @@ class BernoulliNB(BaseTransformer):
|
|
251
251
|
inspect.currentframe(), BernoulliNB.__class__.__name__
|
252
252
|
),
|
253
253
|
api_calls=[Session.call],
|
254
|
-
custom_tags=
|
254
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
255
255
|
)
|
256
256
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
257
257
|
pd_df.columns = dataset.columns
|
@@ -257,7 +257,7 @@ class CategoricalNB(BaseTransformer):
|
|
257
257
|
inspect.currentframe(), CategoricalNB.__class__.__name__
|
258
258
|
),
|
259
259
|
api_calls=[Session.call],
|
260
|
-
custom_tags=
|
260
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
261
261
|
)
|
262
262
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
263
263
|
pd_df.columns = dataset.columns
|
@@ -251,7 +251,7 @@ class ComplementNB(BaseTransformer):
|
|
251
251
|
inspect.currentframe(), ComplementNB.__class__.__name__
|
252
252
|
),
|
253
253
|
api_calls=[Session.call],
|
254
|
-
custom_tags=
|
254
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
255
255
|
)
|
256
256
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
257
257
|
pd_df.columns = dataset.columns
|
@@ -232,7 +232,7 @@ class GaussianNB(BaseTransformer):
|
|
232
232
|
inspect.currentframe(), GaussianNB.__class__.__name__
|
233
233
|
),
|
234
234
|
api_calls=[Session.call],
|
235
|
-
custom_tags=
|
235
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
236
236
|
)
|
237
237
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
238
238
|
pd_df.columns = dataset.columns
|
@@ -245,7 +245,7 @@ class MultinomialNB(BaseTransformer):
|
|
245
245
|
inspect.currentframe(), MultinomialNB.__class__.__name__
|
246
246
|
),
|
247
247
|
api_calls=[Session.call],
|
248
|
-
custom_tags=
|
248
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
249
249
|
)
|
250
250
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
251
251
|
pd_df.columns = dataset.columns
|
@@ -302,7 +302,7 @@ class KNeighborsClassifier(BaseTransformer):
|
|
302
302
|
inspect.currentframe(), KNeighborsClassifier.__class__.__name__
|
303
303
|
),
|
304
304
|
api_calls=[Session.call],
|
305
|
-
custom_tags=
|
305
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
306
306
|
)
|
307
307
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
308
308
|
pd_df.columns = dataset.columns
|
@@ -304,7 +304,7 @@ class KNeighborsRegressor(BaseTransformer):
|
|
304
304
|
inspect.currentframe(), KNeighborsRegressor.__class__.__name__
|
305
305
|
),
|
306
306
|
api_calls=[Session.call],
|
307
|
-
custom_tags=
|
307
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
308
308
|
)
|
309
309
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
310
310
|
pd_df.columns = dataset.columns
|
@@ -281,7 +281,7 @@ class KernelDensity(BaseTransformer):
|
|
281
281
|
inspect.currentframe(), KernelDensity.__class__.__name__
|
282
282
|
),
|
283
283
|
api_calls=[Session.call],
|
284
|
-
custom_tags=
|
284
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
285
285
|
)
|
286
286
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
287
287
|
pd_df.columns = dataset.columns
|
@@ -309,7 +309,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
309
309
|
inspect.currentframe(), LocalOutlierFactor.__class__.__name__
|
310
310
|
),
|
311
311
|
api_calls=[Session.call],
|
312
|
-
custom_tags=
|
312
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
313
313
|
)
|
314
314
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
315
315
|
pd_df.columns = dataset.columns
|
@@ -242,7 +242,7 @@ class NearestCentroid(BaseTransformer):
|
|
242
242
|
inspect.currentframe(), NearestCentroid.__class__.__name__
|
243
243
|
),
|
244
244
|
api_calls=[Session.call],
|
245
|
-
custom_tags=
|
245
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
246
246
|
)
|
247
247
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
248
248
|
pd_df.columns = dataset.columns
|
@@ -292,7 +292,7 @@ class NearestNeighbors(BaseTransformer):
|
|
292
292
|
inspect.currentframe(), NearestNeighbors.__class__.__name__
|
293
293
|
),
|
294
294
|
api_calls=[Session.call],
|
295
|
-
custom_tags=
|
295
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
296
296
|
)
|
297
297
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
298
298
|
pd_df.columns = dataset.columns
|
@@ -313,7 +313,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
|
|
313
313
|
inspect.currentframe(), NeighborhoodComponentsAnalysis.__class__.__name__
|
314
314
|
),
|
315
315
|
api_calls=[Session.call],
|
316
|
-
custom_tags=
|
316
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
317
317
|
)
|
318
318
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
319
319
|
pd_df.columns = dataset.columns
|
@@ -314,7 +314,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
314
314
|
inspect.currentframe(), RadiusNeighborsClassifier.__class__.__name__
|
315
315
|
),
|
316
316
|
api_calls=[Session.call],
|
317
|
-
custom_tags=
|
317
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
318
318
|
)
|
319
319
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
320
320
|
pd_df.columns = dataset.columns
|
@@ -304,7 +304,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
|
|
304
304
|
inspect.currentframe(), RadiusNeighborsRegressor.__class__.__name__
|
305
305
|
),
|
306
306
|
api_calls=[Session.call],
|
307
|
-
custom_tags=
|
307
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
308
308
|
)
|
309
309
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
310
310
|
pd_df.columns = dataset.columns
|
@@ -261,7 +261,7 @@ class BernoulliRBM(BaseTransformer):
|
|
261
261
|
inspect.currentframe(), BernoulliRBM.__class__.__name__
|
262
262
|
),
|
263
263
|
api_calls=[Session.call],
|
264
|
-
custom_tags=
|
264
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
265
265
|
)
|
266
266
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
267
267
|
pd_df.columns = dataset.columns
|
@@ -416,7 +416,7 @@ class MLPClassifier(BaseTransformer):
|
|
416
416
|
inspect.currentframe(), MLPClassifier.__class__.__name__
|
417
417
|
),
|
418
418
|
api_calls=[Session.call],
|
419
|
-
custom_tags=
|
419
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
420
420
|
)
|
421
421
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
422
422
|
pd_df.columns = dataset.columns
|
@@ -412,7 +412,7 @@ class MLPRegressor(BaseTransformer):
|
|
412
412
|
inspect.currentframe(), MLPRegressor.__class__.__name__
|
413
413
|
),
|
414
414
|
api_calls=[Session.call],
|
415
|
-
custom_tags=
|
415
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
416
416
|
)
|
417
417
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
418
418
|
pd_df.columns = dataset.columns
|
@@ -17,6 +17,7 @@ from sklearn.utils import metaestimators
|
|
17
17
|
from snowflake import snowpark
|
18
18
|
from snowflake.ml._internal import file_utils, telemetry
|
19
19
|
from snowflake.ml._internal.exceptions import error_codes, exceptions
|
20
|
+
from snowflake.ml._internal.lineage import lineage_utils
|
20
21
|
from snowflake.ml._internal.utils import snowpark_dataframe_utils, temp_file_utils
|
21
22
|
from snowflake.ml.model.model_signature import ModelSignature, _infer_signature
|
22
23
|
from snowflake.ml.modeling._internal.model_transformer_builder import (
|
@@ -427,6 +428,10 @@ class Pipeline(base.BaseTransformer):
|
|
427
428
|
else dataset
|
428
429
|
)
|
429
430
|
|
431
|
+
# Extract lineage information here since we're overriding fit() directly
|
432
|
+
data_sources = lineage_utils.get_data_sources(dataset)
|
433
|
+
lineage_utils.set_data_sources(self, data_sources)
|
434
|
+
|
430
435
|
if self._can_be_trained_in_ml_runtime(dataset):
|
431
436
|
if not self._is_convertible_to_sklearn:
|
432
437
|
raise ValueError("This pipeline cannot be converted to an sklearn pipeline.")
|
@@ -25,11 +25,15 @@ class Binarizer(base.BaseTransformer):
|
|
25
25
|
Feature values below or equal to this are replaced by 0, above it by 1. Default values is 0.0.
|
26
26
|
|
27
27
|
input_cols: Optional[Union[str, Iterable[str]]], default=None
|
28
|
-
The name(s) of one or more columns in
|
28
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be binarized. Input
|
29
|
+
columns must be specified before transform with this argument or after initialization with the
|
30
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
29
31
|
|
30
32
|
output_cols: Optional[Union[str, Iterable[str]]], default=None
|
31
|
-
The name(s)
|
32
|
-
columns specified must
|
33
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
34
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
35
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
36
|
+
API consistency.
|
33
37
|
|
34
38
|
passthrough_cols: Optional[Union[str, Iterable[str]]], default=None
|
35
39
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -74,10 +74,15 @@ class KBinsDiscretizer(base.BaseTransformer):
|
|
74
74
|
- 'quantile': All bins in each feature have the same number of points.
|
75
75
|
|
76
76
|
input_cols: str or Iterable [column_name], default=None
|
77
|
-
|
77
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be discretized.
|
78
|
+
Input columns must be specified before fit with this argument or after initialization with the
|
79
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
78
80
|
|
79
81
|
output_cols: str or Iterable [column_name], default=None
|
80
|
-
|
82
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
83
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
84
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
85
|
+
API consistency.
|
81
86
|
|
82
87
|
passthrough_cols: A string or a list of strings indicating column names to be excluded from any
|
83
88
|
operations (such as train, transform, or inference). These specified column(s)
|
@@ -25,11 +25,12 @@ class LabelEncoder(base.BaseTransformer):
|
|
25
25
|
|
26
26
|
Args:
|
27
27
|
input_cols: Optional[Union[str, List[str]]]
|
28
|
-
The name of a column
|
28
|
+
The name of a column or a list containing one column name to be encoded in the input DataFrame. There must
|
29
|
+
be exactly one input column specified before fit. This argument is optional for API consistency.
|
29
30
|
|
30
31
|
output_cols: Optional[Union[str, List[str]]]
|
31
|
-
The name of a column
|
32
|
-
|
32
|
+
The name of a column or a list containing one column name where the results will be stored. There must be
|
33
|
+
exactly one output column specified before trainsform. This argument is optional for API consistency.
|
33
34
|
|
34
35
|
passthrough_cols: Optional[Union[str, List[str]]]
|
35
36
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -54,11 +55,11 @@ class LabelEncoder(base.BaseTransformer):
|
|
54
55
|
|
55
56
|
Args:
|
56
57
|
input_cols: Optional[Union[str, List[str]]]
|
57
|
-
The name of a column
|
58
|
-
|
58
|
+
The name of a column or a list containing one column name to be encoded in the input DataFrame. There
|
59
|
+
must be exactly one input column specified before fit. This argument is optional for API consistency.
|
59
60
|
output_cols: Optional[Union[str, List[str]]]
|
60
|
-
The name of a column
|
61
|
-
|
61
|
+
The name of a column or a list containing one column name where the results will be stored. There must
|
62
|
+
be exactly one output column specified before transform. This argument is optional for API consistency.
|
62
63
|
passthrough_cols: Optional[Union[str, List[str]]]
|
63
64
|
A string or a list of strings indicating column names to be excluded from any
|
64
65
|
operations (such as train, transform, or inference). These specified column(s)
|
@@ -28,11 +28,15 @@ class MaxAbsScaler(base.BaseTransformer):
|
|
28
28
|
|
29
29
|
Args:
|
30
30
|
input_cols: Optional[Union[str, List[str]]], default=None
|
31
|
-
The name(s) of one or more columns in
|
31
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
|
32
|
+
columns must be specified before fit with this argument or after initialization with the
|
33
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
32
34
|
|
33
35
|
output_cols: Optional[Union[str, List[str]]], default=None
|
34
|
-
The name(s)
|
35
|
-
columns specified must
|
36
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
37
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
38
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
39
|
+
API consistency.
|
36
40
|
|
37
41
|
passthrough_cols: Optional[Union[str, List[str]]], default=None
|
38
42
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -29,12 +29,15 @@ class MinMaxScaler(base.BaseTransformer):
|
|
29
29
|
Whether to clip transformed values of held-out data to the specified feature range (default is True).
|
30
30
|
|
31
31
|
input_cols: Optional[Union[str, List[str]]], default=None
|
32
|
-
The name(s) of one or more columns in
|
33
|
-
|
32
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
|
33
|
+
columns must be specified before fit with this argument or after initialization with the
|
34
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
34
35
|
|
35
36
|
output_cols: Optional[Union[str, List[str]]], default=None
|
36
|
-
The name(s)
|
37
|
-
columns specified must
|
37
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
38
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
39
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
40
|
+
API consistency.
|
38
41
|
|
39
42
|
passthrough_cols: Optional[Union[str, List[str]]], default=None
|
40
43
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -28,11 +28,15 @@ class Normalizer(base.BaseTransformer):
|
|
28
28
|
values. It must be one of 'l1', 'l2', or 'max'.
|
29
29
|
|
30
30
|
input_cols: Optional[Union[str, List[str]]]
|
31
|
-
|
31
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be normalized. Input
|
32
|
+
columns must be specified before transform with this argument or after initialization with the
|
33
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
32
34
|
|
33
35
|
output_cols: Optional[Union[str, List[str]]]
|
34
|
-
|
35
|
-
|
36
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
37
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
38
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
39
|
+
API consistency.
|
36
40
|
|
37
41
|
passthrough_cols: Optional[Union[str, List[str]]]
|
38
42
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -157,10 +157,18 @@ class OneHotEncoder(base.BaseTransformer):
|
|
157
157
|
there is no limit to the number of output features.
|
158
158
|
|
159
159
|
input_cols: Optional[Union[str, List[str]]], default=None
|
160
|
-
|
160
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be encoded. Input
|
161
|
+
columns must be specified before fit with this argument or after initialization with the
|
162
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
161
163
|
|
162
164
|
output_cols: Optional[Union[str, List[str]]], default=None
|
163
|
-
|
165
|
+
The prefix to be used for encoded output for each input column. The number of
|
166
|
+
output column prefixes specified must match the number of input columns. Output column prefixes must be
|
167
|
+
specified before transform with this argument or after initialization with the `set_output_cols` method.
|
168
|
+
|
169
|
+
Note: Dense output column names are case-sensitive and resolve identifiers following Snowflake rules, e.g.
|
170
|
+
`"PREFIX_a"`, `PREFIX_A`, `"prefix_A"`. Therefore, there is no need to provide double-quoted column names
|
171
|
+
as that would result in invalid identifiers.
|
164
172
|
|
165
173
|
passthrough_cols: Optional[Union[str, List[str]]]
|
166
174
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -67,11 +67,14 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
67
67
|
The value to be used to encode unknown categories.
|
68
68
|
|
69
69
|
input_cols: Optional[Union[str, List[str]]], default=None
|
70
|
-
The name(s) of one or more columns in
|
70
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be encoded. Input
|
71
|
+
columns must be specified before fit with this argument or after initialization with the
|
72
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
71
73
|
|
72
74
|
output_cols: Optional[Union[str, List[str]]], default=None
|
73
|
-
The
|
74
|
-
|
75
|
+
The prefix to be used for encoded output for each input column. The number of
|
76
|
+
output column prefixes specified must equal the number of input columns. Output column prefixes must be
|
77
|
+
specified before transform with this argument or after initialization with the `set_output_cols` method.
|
75
78
|
|
76
79
|
passthrough_cols: Optional[Union[str, List[str]]], default=None
|
77
80
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -247,7 +250,7 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
247
250
|
# columns: COLUMN_NAME, CATEGORY, INDEX
|
248
251
|
state_df = self._get_category_index_state_df(dataset)
|
249
252
|
# save the dataframe on server side so that transform doesn't need to upload
|
250
|
-
state_df.write.save_as_table(
|
253
|
+
state_df.write.save_as_table(
|
251
254
|
self._vocab_table_name,
|
252
255
|
mode="overwrite",
|
253
256
|
table_type="temporary",
|
@@ -520,7 +523,7 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
520
523
|
)
|
521
524
|
|
522
525
|
batch_table_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.TABLE)
|
523
|
-
transformed_dataset.write.save_as_table(
|
526
|
+
transformed_dataset.write.save_as_table(
|
524
527
|
batch_table_name,
|
525
528
|
mode="overwrite",
|
526
529
|
table_type="temporary",
|
@@ -251,7 +251,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
251
251
|
inspect.currentframe(), PolynomialFeatures.__class__.__name__
|
252
252
|
),
|
253
253
|
api_calls=[Session.call],
|
254
|
-
custom_tags=
|
254
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
255
255
|
)
|
256
256
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
257
257
|
pd_df.columns = dataset.columns
|