snowflake-ml-python 1.5.2__py3-none-any.whl → 1.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +26 -5
- snowflake/cortex/_sse_client.py +81 -0
- snowflake/cortex/_util.py +105 -8
- snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
- snowflake/ml/dataset/dataset.py +15 -12
- snowflake/ml/dataset/dataset_factory.py +3 -4
- snowflake/ml/feature_store/feature_store.py +2 -2
- snowflake/ml/model/_client/sql/model_version.py +2 -2
- snowflake/ml/model/_model_composer/model_composer.py +2 -2
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -1
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_signatures/builtins_handler.py +2 -1
- snowflake/ml/model/_signatures/core.py +13 -1
- snowflake/ml/model/_signatures/pandas_handler.py +2 -0
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +156 -121
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +2 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +38 -18
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +82 -134
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +21 -17
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +1 -1
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +1 -1
- snowflake/ml/modeling/cluster/birch.py +1 -1
- snowflake/ml/modeling/cluster/bisecting_k_means.py +1 -1
- snowflake/ml/modeling/cluster/dbscan.py +1 -1
- snowflake/ml/modeling/cluster/feature_agglomeration.py +1 -1
- snowflake/ml/modeling/cluster/k_means.py +1 -1
- snowflake/ml/modeling/cluster/mean_shift.py +1 -1
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +1 -1
- snowflake/ml/modeling/cluster/optics.py +1 -1
- snowflake/ml/modeling/cluster/spectral_biclustering.py +1 -1
- snowflake/ml/modeling/cluster/spectral_clustering.py +1 -1
- snowflake/ml/modeling/cluster/spectral_coclustering.py +1 -1
- snowflake/ml/modeling/compose/column_transformer.py +1 -1
- snowflake/ml/modeling/compose/transformed_target_regressor.py +1 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +1 -1
- snowflake/ml/modeling/covariance/empirical_covariance.py +1 -1
- snowflake/ml/modeling/covariance/graphical_lasso.py +1 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +1 -1
- snowflake/ml/modeling/covariance/ledoit_wolf.py +1 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +1 -1
- snowflake/ml/modeling/covariance/oas.py +1 -1
- snowflake/ml/modeling/covariance/shrunk_covariance.py +1 -1
- snowflake/ml/modeling/decomposition/dictionary_learning.py +1 -1
- snowflake/ml/modeling/decomposition/factor_analysis.py +1 -1
- snowflake/ml/modeling/decomposition/fast_ica.py +1 -1
- snowflake/ml/modeling/decomposition/incremental_pca.py +1 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +1 -1
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +1 -1
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +1 -1
- snowflake/ml/modeling/decomposition/pca.py +1 -1
- snowflake/ml/modeling/decomposition/sparse_pca.py +1 -1
- snowflake/ml/modeling/decomposition/truncated_svd.py +1 -1
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +1 -1
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +1 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +1 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +1 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +1 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +1 -1
- snowflake/ml/modeling/feature_selection/variance_threshold.py +1 -1
- snowflake/ml/modeling/framework/base.py +3 -8
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +1 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +1 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +1 -1
- snowflake/ml/modeling/impute/knn_imputer.py +1 -1
- snowflake/ml/modeling/impute/missing_indicator.py +1 -1
- snowflake/ml/modeling/impute/simple_imputer.py +8 -4
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +1 -1
- snowflake/ml/modeling/kernel_approximation/nystroem.py +1 -1
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +1 -1
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +1 -1
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +1 -1
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +1 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/lars.py +1 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +1 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/perceptron.py +1 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ridge.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +1 -1
- snowflake/ml/modeling/manifold/isomap.py +1 -1
- snowflake/ml/modeling/manifold/mds.py +1 -1
- snowflake/ml/modeling/manifold/spectral_embedding.py +1 -1
- snowflake/ml/modeling/manifold/tsne.py +1 -1
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +1 -1
- snowflake/ml/modeling/mixture/gaussian_mixture.py +1 -1
- snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +1 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +1 -1
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +1 -1
- snowflake/ml/modeling/neighbors/nearest_centroid.py +1 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +1 -1
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +1 -1
- snowflake/ml/modeling/pipeline/pipeline.py +5 -0
- snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
- snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +10 -2
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +8 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +1 -1
- snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +1 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +1 -1
- snowflake/ml/modeling/svm/linear_svc.py +1 -1
- snowflake/ml/modeling/svm/linear_svr.py +1 -1
- snowflake/ml/modeling/svm/nu_svc.py +1 -1
- snowflake/ml/modeling/svm/nu_svr.py +1 -1
- snowflake/ml/modeling/svm/svc.py +1 -1
- snowflake/ml/modeling/svm/svr.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/METADATA +21 -5
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/RECORD +196 -195
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/top_level.txt +0 -0
@@ -284,7 +284,7 @@ class StackingRegressor(BaseTransformer):
|
|
284
284
|
inspect.currentframe(), StackingRegressor.__class__.__name__
|
285
285
|
),
|
286
286
|
api_calls=[Session.call],
|
287
|
-
custom_tags=
|
287
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
288
288
|
)
|
289
289
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
290
290
|
pd_df.columns = dataset.columns
|
@@ -266,7 +266,7 @@ class VotingClassifier(BaseTransformer):
|
|
266
266
|
inspect.currentframe(), VotingClassifier.__class__.__name__
|
267
267
|
),
|
268
268
|
api_calls=[Session.call],
|
269
|
-
custom_tags=
|
269
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
270
270
|
)
|
271
271
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
272
272
|
pd_df.columns = dataset.columns
|
@@ -248,7 +248,7 @@ class VotingRegressor(BaseTransformer):
|
|
248
248
|
inspect.currentframe(), VotingRegressor.__class__.__name__
|
249
249
|
),
|
250
250
|
api_calls=[Session.call],
|
251
|
-
custom_tags=
|
251
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
252
252
|
)
|
253
253
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
254
254
|
pd_df.columns = dataset.columns
|
@@ -238,7 +238,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
238
238
|
inspect.currentframe(), GenericUnivariateSelect.__class__.__name__
|
239
239
|
),
|
240
240
|
api_calls=[Session.call],
|
241
|
-
custom_tags=
|
241
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
242
242
|
)
|
243
243
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
244
244
|
pd_df.columns = dataset.columns
|
@@ -234,7 +234,7 @@ class SelectFdr(BaseTransformer):
|
|
234
234
|
inspect.currentframe(), SelectFdr.__class__.__name__
|
235
235
|
),
|
236
236
|
api_calls=[Session.call],
|
237
|
-
custom_tags=
|
237
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
238
238
|
)
|
239
239
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
240
240
|
pd_df.columns = dataset.columns
|
@@ -234,7 +234,7 @@ class SelectFpr(BaseTransformer):
|
|
234
234
|
inspect.currentframe(), SelectFpr.__class__.__name__
|
235
235
|
),
|
236
236
|
api_calls=[Session.call],
|
237
|
-
custom_tags=
|
237
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
238
238
|
)
|
239
239
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
240
240
|
pd_df.columns = dataset.columns
|
@@ -234,7 +234,7 @@ class SelectFwe(BaseTransformer):
|
|
234
234
|
inspect.currentframe(), SelectFwe.__class__.__name__
|
235
235
|
),
|
236
236
|
api_calls=[Session.call],
|
237
|
-
custom_tags=
|
237
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
238
238
|
)
|
239
239
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
240
240
|
pd_df.columns = dataset.columns
|
@@ -235,7 +235,7 @@ class SelectKBest(BaseTransformer):
|
|
235
235
|
inspect.currentframe(), SelectKBest.__class__.__name__
|
236
236
|
),
|
237
237
|
api_calls=[Session.call],
|
238
|
-
custom_tags=
|
238
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
239
239
|
)
|
240
240
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
241
241
|
pd_df.columns = dataset.columns
|
@@ -234,7 +234,7 @@ class SelectPercentile(BaseTransformer):
|
|
234
234
|
inspect.currentframe(), SelectPercentile.__class__.__name__
|
235
235
|
),
|
236
236
|
api_calls=[Session.call],
|
237
|
-
custom_tags=
|
237
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
238
238
|
)
|
239
239
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
240
240
|
pd_df.columns = dataset.columns
|
@@ -292,7 +292,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
292
292
|
inspect.currentframe(), SequentialFeatureSelector.__class__.__name__
|
293
293
|
),
|
294
294
|
api_calls=[Session.call],
|
295
|
-
custom_tags=
|
295
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
296
296
|
)
|
297
297
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
298
298
|
pd_df.columns = dataset.columns
|
@@ -225,7 +225,7 @@ class VarianceThreshold(BaseTransformer):
|
|
225
225
|
inspect.currentframe(), VarianceThreshold.__class__.__name__
|
226
226
|
),
|
227
227
|
api_calls=[Session.call],
|
228
|
-
custom_tags=
|
228
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
229
229
|
)
|
230
230
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
231
231
|
pd_df.columns = dataset.columns
|
@@ -16,7 +16,7 @@ from snowflake.ml._internal.exceptions import (
|
|
16
16
|
exceptions,
|
17
17
|
modeling_error_messages,
|
18
18
|
)
|
19
|
-
from snowflake.ml._internal.lineage import
|
19
|
+
from snowflake.ml._internal.lineage import lineage_utils
|
20
20
|
from snowflake.ml._internal.utils import identifier, parallelize
|
21
21
|
from snowflake.ml.modeling.framework import _utils
|
22
22
|
from snowflake.snowpark import functions as F
|
@@ -386,7 +386,6 @@ class BaseEstimator(Base):
|
|
386
386
|
self.file_names = file_names
|
387
387
|
self.custom_states = custom_states
|
388
388
|
self.sample_weight_col = sample_weight_col
|
389
|
-
self._data_sources: Optional[List[data_source.DataSource]] = None
|
390
389
|
|
391
390
|
self.start_time = datetime.now().strftime(_utils.DATETIME_FORMAT)[:-3]
|
392
391
|
|
@@ -421,18 +420,14 @@ class BaseEstimator(Base):
|
|
421
420
|
"""
|
422
421
|
return []
|
423
422
|
|
424
|
-
def _get_data_sources(self) -> Optional[List[data_source.DataSource]]:
|
425
|
-
return self._data_sources
|
426
|
-
|
427
423
|
@telemetry.send_api_usage_telemetry(
|
428
424
|
project=PROJECT,
|
429
425
|
subproject=SUBPROJECT,
|
430
426
|
)
|
431
427
|
def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "BaseEstimator":
|
432
428
|
"""Runs universal logics for all fit implementations."""
|
433
|
-
|
434
|
-
|
435
|
-
assert all(isinstance(ds, data_source.DataSource) for ds in self._data_sources)
|
429
|
+
data_sources = lineage_utils.get_data_sources(dataset)
|
430
|
+
lineage_utils.set_data_sources(self, data_sources)
|
436
431
|
return self._fit(dataset)
|
437
432
|
|
438
433
|
@abstractmethod
|
@@ -320,7 +320,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
320
320
|
inspect.currentframe(), GaussianProcessClassifier.__class__.__name__
|
321
321
|
),
|
322
322
|
api_calls=[Session.call],
|
323
|
-
custom_tags=
|
323
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
324
324
|
)
|
325
325
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
326
326
|
pd_df.columns = dataset.columns
|
@@ -311,7 +311,7 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
311
311
|
inspect.currentframe(), GaussianProcessRegressor.__class__.__name__
|
312
312
|
),
|
313
313
|
api_calls=[Session.call],
|
314
|
-
custom_tags=
|
314
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
315
315
|
)
|
316
316
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
317
317
|
pd_df.columns = dataset.columns
|
@@ -353,7 +353,7 @@ class IterativeImputer(BaseTransformer):
|
|
353
353
|
inspect.currentframe(), IterativeImputer.__class__.__name__
|
354
354
|
),
|
355
355
|
api_calls=[Session.call],
|
356
|
-
custom_tags=
|
356
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
357
357
|
)
|
358
358
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
359
359
|
pd_df.columns = dataset.columns
|
@@ -279,7 +279,7 @@ class KNNImputer(BaseTransformer):
|
|
279
279
|
inspect.currentframe(), KNNImputer.__class__.__name__
|
280
280
|
),
|
281
281
|
api_calls=[Session.call],
|
282
|
-
custom_tags=
|
282
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
283
283
|
)
|
284
284
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
285
285
|
pd_df.columns = dataset.columns
|
@@ -253,7 +253,7 @@ class MissingIndicator(BaseTransformer):
|
|
253
253
|
inspect.currentframe(), MissingIndicator.__class__.__name__
|
254
254
|
),
|
255
255
|
api_calls=[Session.call],
|
256
|
-
custom_tags=
|
256
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
257
257
|
)
|
258
258
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
259
259
|
pd_df.columns = dataset.columns
|
@@ -102,10 +102,14 @@ class SimpleImputer(base.BaseTransformer):
|
|
102
102
|
For string or object data types, `fill_value` must be a string. If `None`, `fill_value` will be 0 when
|
103
103
|
imputing numerical data and `missing_value` for strings and object data types.
|
104
104
|
input_cols: Optional[Union[str, List[str]]]
|
105
|
-
|
105
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be imputed. Input
|
106
|
+
columns must be specified before fit with this argument or after initialization with the
|
107
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
106
108
|
output_cols: Optional[Union[str, List[str]]]
|
107
|
-
|
108
|
-
|
109
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
110
|
+
output columns specified must equal the number of input columns. Output columns must be specified before
|
111
|
+
transform with this argument or after initialization with the `set_output_cols` method. This argument is
|
112
|
+
optional for API consistency.
|
109
113
|
passthrough_cols: A string or a list of strings indicating column names to be excluded from any
|
110
114
|
operations (such as train, transform, or inference). These specified column(s)
|
111
115
|
will remain untouched throughout the process. This option is helpful in scenarios
|
@@ -230,7 +234,7 @@ class SimpleImputer(base.BaseTransformer):
|
|
230
234
|
|
231
235
|
return input_col_datatypes
|
232
236
|
|
233
|
-
def
|
237
|
+
def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "SimpleImputer":
|
234
238
|
if isinstance(dataset, snowpark.DataFrame):
|
235
239
|
return self._fit_snowpark(dataset)
|
236
240
|
else:
|
@@ -228,7 +228,7 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
228
228
|
inspect.currentframe(), AdditiveChi2Sampler.__class__.__name__
|
229
229
|
),
|
230
230
|
api_calls=[Session.call],
|
231
|
-
custom_tags=
|
231
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
232
232
|
)
|
233
233
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
234
234
|
pd_df.columns = dataset.columns
|
@@ -276,7 +276,7 @@ class Nystroem(BaseTransformer):
|
|
276
276
|
inspect.currentframe(), Nystroem.__class__.__name__
|
277
277
|
),
|
278
278
|
api_calls=[Session.call],
|
279
|
-
custom_tags=
|
279
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
280
280
|
)
|
281
281
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
282
282
|
pd_df.columns = dataset.columns
|
@@ -252,7 +252,7 @@ class PolynomialCountSketch(BaseTransformer):
|
|
252
252
|
inspect.currentframe(), PolynomialCountSketch.__class__.__name__
|
253
253
|
),
|
254
254
|
api_calls=[Session.call],
|
255
|
-
custom_tags=
|
255
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
256
256
|
)
|
257
257
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
258
258
|
pd_df.columns = dataset.columns
|
@@ -239,7 +239,7 @@ class RBFSampler(BaseTransformer):
|
|
239
239
|
inspect.currentframe(), RBFSampler.__class__.__name__
|
240
240
|
),
|
241
241
|
api_calls=[Session.call],
|
242
|
-
custom_tags=
|
242
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
243
243
|
)
|
244
244
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
245
245
|
pd_df.columns = dataset.columns
|
@@ -237,7 +237,7 @@ class SkewedChi2Sampler(BaseTransformer):
|
|
237
237
|
inspect.currentframe(), SkewedChi2Sampler.__class__.__name__
|
238
238
|
),
|
239
239
|
api_calls=[Session.call],
|
240
|
-
custom_tags=
|
240
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
241
241
|
)
|
242
242
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
243
243
|
pd_df.columns = dataset.columns
|
@@ -273,7 +273,7 @@ class KernelRidge(BaseTransformer):
|
|
273
273
|
inspect.currentframe(), KernelRidge.__class__.__name__
|
274
274
|
),
|
275
275
|
api_calls=[Session.call],
|
276
|
-
custom_tags=
|
276
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
277
277
|
)
|
278
278
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
279
279
|
pd_df.columns = dataset.columns
|
@@ -262,7 +262,7 @@ class LGBMClassifier(BaseTransformer):
|
|
262
262
|
inspect.currentframe(), LGBMClassifier.__class__.__name__
|
263
263
|
),
|
264
264
|
api_calls=[Session.call],
|
265
|
-
custom_tags=
|
265
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
266
266
|
)
|
267
267
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
268
268
|
pd_df.columns = dataset.columns
|
@@ -262,7 +262,7 @@ class LGBMRegressor(BaseTransformer):
|
|
262
262
|
inspect.currentframe(), LGBMRegressor.__class__.__name__
|
263
263
|
),
|
264
264
|
api_calls=[Session.call],
|
265
|
-
custom_tags=
|
265
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
266
266
|
)
|
267
267
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
268
268
|
pd_df.columns = dataset.columns
|
@@ -287,7 +287,7 @@ class ARDRegression(BaseTransformer):
|
|
287
287
|
inspect.currentframe(), ARDRegression.__class__.__name__
|
288
288
|
),
|
289
289
|
api_calls=[Session.call],
|
290
|
-
custom_tags=
|
290
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
291
291
|
)
|
292
292
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
293
293
|
pd_df.columns = dataset.columns
|
@@ -298,7 +298,7 @@ class BayesianRidge(BaseTransformer):
|
|
298
298
|
inspect.currentframe(), BayesianRidge.__class__.__name__
|
299
299
|
),
|
300
300
|
api_calls=[Session.call],
|
301
|
-
custom_tags=
|
301
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
302
302
|
)
|
303
303
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
304
304
|
pd_df.columns = dataset.columns
|
@@ -297,7 +297,7 @@ class ElasticNet(BaseTransformer):
|
|
297
297
|
inspect.currentframe(), ElasticNet.__class__.__name__
|
298
298
|
),
|
299
299
|
api_calls=[Session.call],
|
300
|
-
custom_tags=
|
300
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
301
301
|
)
|
302
302
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
303
303
|
pd_df.columns = dataset.columns
|
@@ -333,7 +333,7 @@ class ElasticNetCV(BaseTransformer):
|
|
333
333
|
inspect.currentframe(), ElasticNetCV.__class__.__name__
|
334
334
|
),
|
335
335
|
api_calls=[Session.call],
|
336
|
-
custom_tags=
|
336
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
337
337
|
)
|
338
338
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
339
339
|
pd_df.columns = dataset.columns
|
@@ -278,7 +278,7 @@ class GammaRegressor(BaseTransformer):
|
|
278
278
|
inspect.currentframe(), GammaRegressor.__class__.__name__
|
279
279
|
),
|
280
280
|
api_calls=[Session.call],
|
281
|
-
custom_tags=
|
281
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
282
282
|
)
|
283
283
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
284
284
|
pd_df.columns = dataset.columns
|
@@ -261,7 +261,7 @@ class HuberRegressor(BaseTransformer):
|
|
261
261
|
inspect.currentframe(), HuberRegressor.__class__.__name__
|
262
262
|
),
|
263
263
|
api_calls=[Session.call],
|
264
|
-
custom_tags=
|
264
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
265
265
|
)
|
266
266
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
267
267
|
pd_df.columns = dataset.columns
|
@@ -290,7 +290,7 @@ class Lars(BaseTransformer):
|
|
290
290
|
inspect.currentframe(), Lars.__class__.__name__
|
291
291
|
),
|
292
292
|
api_calls=[Session.call],
|
293
|
-
custom_tags=
|
293
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
294
294
|
)
|
295
295
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
296
296
|
pd_df.columns = dataset.columns
|
@@ -298,7 +298,7 @@ class LarsCV(BaseTransformer):
|
|
298
298
|
inspect.currentframe(), LarsCV.__class__.__name__
|
299
299
|
),
|
300
300
|
api_calls=[Session.call],
|
301
|
-
custom_tags=
|
301
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
302
302
|
)
|
303
303
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
304
304
|
pd_df.columns = dataset.columns
|
@@ -291,7 +291,7 @@ class Lasso(BaseTransformer):
|
|
291
291
|
inspect.currentframe(), Lasso.__class__.__name__
|
292
292
|
),
|
293
293
|
api_calls=[Session.call],
|
294
|
-
custom_tags=
|
294
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
295
295
|
)
|
296
296
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
297
297
|
pd_df.columns = dataset.columns
|
@@ -319,7 +319,7 @@ class LassoCV(BaseTransformer):
|
|
319
319
|
inspect.currentframe(), LassoCV.__class__.__name__
|
320
320
|
),
|
321
321
|
api_calls=[Session.call],
|
322
|
-
custom_tags=
|
322
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
323
323
|
)
|
324
324
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
325
325
|
pd_df.columns = dataset.columns
|
@@ -311,7 +311,7 @@ class LassoLars(BaseTransformer):
|
|
311
311
|
inspect.currentframe(), LassoLars.__class__.__name__
|
312
312
|
),
|
313
313
|
api_calls=[Session.call],
|
314
|
-
custom_tags=
|
314
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
315
315
|
)
|
316
316
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
317
317
|
pd_df.columns = dataset.columns
|
@@ -312,7 +312,7 @@ class LassoLarsCV(BaseTransformer):
|
|
312
312
|
inspect.currentframe(), LassoLarsCV.__class__.__name__
|
313
313
|
),
|
314
314
|
api_calls=[Session.call],
|
315
|
-
custom_tags=
|
315
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
316
316
|
)
|
317
317
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
318
318
|
pd_df.columns = dataset.columns
|
@@ -295,7 +295,7 @@ class LassoLarsIC(BaseTransformer):
|
|
295
295
|
inspect.currentframe(), LassoLarsIC.__class__.__name__
|
296
296
|
),
|
297
297
|
api_calls=[Session.call],
|
298
|
-
custom_tags=
|
298
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
299
299
|
)
|
300
300
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
301
301
|
pd_df.columns = dataset.columns
|
@@ -248,7 +248,7 @@ class LinearRegression(BaseTransformer):
|
|
248
248
|
inspect.currentframe(), LinearRegression.__class__.__name__
|
249
249
|
),
|
250
250
|
api_calls=[Session.call],
|
251
|
-
custom_tags=
|
251
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
252
252
|
)
|
253
253
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
254
254
|
pd_df.columns = dataset.columns
|
@@ -362,7 +362,7 @@ class LogisticRegression(BaseTransformer):
|
|
362
362
|
inspect.currentframe(), LogisticRegression.__class__.__name__
|
363
363
|
),
|
364
364
|
api_calls=[Session.call],
|
365
|
-
custom_tags=
|
365
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
366
366
|
)
|
367
367
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
368
368
|
pd_df.columns = dataset.columns
|
@@ -383,7 +383,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
383
383
|
inspect.currentframe(), LogisticRegressionCV.__class__.__name__
|
384
384
|
),
|
385
385
|
api_calls=[Session.call],
|
386
|
-
custom_tags=
|
386
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
387
387
|
)
|
388
388
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
389
389
|
pd_df.columns = dataset.columns
|
@@ -281,7 +281,7 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
281
281
|
inspect.currentframe(), MultiTaskElasticNet.__class__.__name__
|
282
282
|
),
|
283
283
|
api_calls=[Session.call],
|
284
|
-
custom_tags=
|
284
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
285
285
|
)
|
286
286
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
287
287
|
pd_df.columns = dataset.columns
|
@@ -322,7 +322,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
322
322
|
inspect.currentframe(), MultiTaskElasticNetCV.__class__.__name__
|
323
323
|
),
|
324
324
|
api_calls=[Session.call],
|
325
|
-
custom_tags=
|
325
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
326
326
|
)
|
327
327
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
328
328
|
pd_df.columns = dataset.columns
|
@@ -273,7 +273,7 @@ class MultiTaskLasso(BaseTransformer):
|
|
273
273
|
inspect.currentframe(), MultiTaskLasso.__class__.__name__
|
274
274
|
),
|
275
275
|
api_calls=[Session.call],
|
276
|
-
custom_tags=
|
276
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
277
277
|
)
|
278
278
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
279
279
|
pd_df.columns = dataset.columns
|
@@ -308,7 +308,7 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
308
308
|
inspect.currentframe(), MultiTaskLassoCV.__class__.__name__
|
309
309
|
),
|
310
310
|
api_calls=[Session.call],
|
311
|
-
custom_tags=
|
311
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
312
312
|
)
|
313
313
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
314
314
|
pd_df.columns = dataset.columns
|
@@ -256,7 +256,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
256
256
|
inspect.currentframe(), OrthogonalMatchingPursuit.__class__.__name__
|
257
257
|
),
|
258
258
|
api_calls=[Session.call],
|
259
|
-
custom_tags=
|
259
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
260
260
|
)
|
261
261
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
262
262
|
pd_df.columns = dataset.columns
|
@@ -330,7 +330,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
330
330
|
inspect.currentframe(), PassiveAggressiveClassifier.__class__.__name__
|
331
331
|
),
|
332
332
|
api_calls=[Session.call],
|
333
|
-
custom_tags=
|
333
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
334
334
|
)
|
335
335
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
336
336
|
pd_df.columns = dataset.columns
|
@@ -316,7 +316,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
316
316
|
inspect.currentframe(), PassiveAggressiveRegressor.__class__.__name__
|
317
317
|
),
|
318
318
|
api_calls=[Session.call],
|
319
|
-
custom_tags=
|
319
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
320
320
|
)
|
321
321
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
322
322
|
pd_df.columns = dataset.columns
|
@@ -329,7 +329,7 @@ class Perceptron(BaseTransformer):
|
|
329
329
|
inspect.currentframe(), Perceptron.__class__.__name__
|
330
330
|
),
|
331
331
|
api_calls=[Session.call],
|
332
|
-
custom_tags=
|
332
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
333
333
|
)
|
334
334
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
335
335
|
pd_df.columns = dataset.columns
|
@@ -278,7 +278,7 @@ class PoissonRegressor(BaseTransformer):
|
|
278
278
|
inspect.currentframe(), PoissonRegressor.__class__.__name__
|
279
279
|
),
|
280
280
|
api_calls=[Session.call],
|
281
|
-
custom_tags=
|
281
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
282
282
|
)
|
283
283
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
284
284
|
pd_df.columns = dataset.columns
|
@@ -334,7 +334,7 @@ class RANSACRegressor(BaseTransformer):
|
|
334
334
|
inspect.currentframe(), RANSACRegressor.__class__.__name__
|
335
335
|
),
|
336
336
|
api_calls=[Session.call],
|
337
|
-
custom_tags=
|
337
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
338
338
|
)
|
339
339
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
340
340
|
pd_df.columns = dataset.columns
|
@@ -326,7 +326,7 @@ class Ridge(BaseTransformer):
|
|
326
326
|
inspect.currentframe(), Ridge.__class__.__name__
|
327
327
|
),
|
328
328
|
api_calls=[Session.call],
|
329
|
-
custom_tags=
|
329
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
330
330
|
)
|
331
331
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
332
332
|
pd_df.columns = dataset.columns
|
@@ -326,7 +326,7 @@ class RidgeClassifier(BaseTransformer):
|
|
326
326
|
inspect.currentframe(), RidgeClassifier.__class__.__name__
|
327
327
|
),
|
328
328
|
api_calls=[Session.call],
|
329
|
-
custom_tags=
|
329
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
330
330
|
)
|
331
331
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
332
332
|
pd_df.columns = dataset.columns
|