snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_sentiment.py +7 -4
- snowflake/ml/_internal/env_utils.py +6 -0
- snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
- snowflake/ml/_internal/telemetry.py +1 -0
- snowflake/ml/_internal/utils/identifier.py +1 -1
- snowflake/ml/_internal/utils/sql_identifier.py +14 -1
- snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
- snowflake/ml/dataset/__init__.py +2 -1
- snowflake/ml/dataset/dataset.py +4 -3
- snowflake/ml/dataset/dataset_reader.py +5 -8
- snowflake/ml/feature_store/__init__.py +6 -0
- snowflake/ml/feature_store/access_manager.py +283 -0
- snowflake/ml/feature_store/feature_store.py +160 -100
- snowflake/ml/feature_store/feature_view.py +30 -19
- snowflake/ml/fileset/embedded_stage_fs.py +15 -12
- snowflake/ml/fileset/snowfs.py +2 -30
- snowflake/ml/fileset/stage_fs.py +25 -7
- snowflake/ml/model/_client/model/model_impl.py +46 -39
- snowflake/ml/model/_client/model/model_version_impl.py +24 -2
- snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
- snowflake/ml/model/_client/ops/model_ops.py +174 -16
- snowflake/ml/model/_client/sql/_base.py +34 -0
- snowflake/ml/model/_client/sql/model.py +32 -39
- snowflake/ml/model/_client/sql/model_version.py +111 -42
- snowflake/ml/model/_client/sql/stage.py +6 -32
- snowflake/ml/model/_client/sql/tag.py +32 -56
- snowflake/ml/model/_model_composer/model_composer.py +8 -4
- snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +90 -142
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +81 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
- snowflake/ml/modeling/cluster/birch.py +8 -1
- snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
- snowflake/ml/modeling/cluster/dbscan.py +8 -1
- snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
- snowflake/ml/modeling/cluster/k_means.py +8 -1
- snowflake/ml/modeling/cluster/mean_shift.py +8 -1
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
- snowflake/ml/modeling/cluster/optics.py +8 -1
- snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
- snowflake/ml/modeling/compose/column_transformer.py +8 -1
- snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
- snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
- snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
- snowflake/ml/modeling/covariance/oas.py +8 -1
- snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
- snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
- snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
- snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/pca.py +8 -1
- snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
- snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
- snowflake/ml/modeling/framework/base.py +4 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
- snowflake/ml/modeling/impute/knn_imputer.py +8 -1
- snowflake/ml/modeling/impute/missing_indicator.py +8 -1
- snowflake/ml/modeling/impute/simple_imputer.py +21 -2
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/lars.py +8 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/perceptron.py +8 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ridge.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
- snowflake/ml/modeling/manifold/isomap.py +8 -1
- snowflake/ml/modeling/manifold/mds.py +8 -1
- snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
- snowflake/ml/modeling/manifold/tsne.py +8 -1
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
- snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
- snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
- snowflake/ml/modeling/pipeline/pipeline.py +27 -7
- snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
- snowflake/ml/modeling/svm/linear_svc.py +8 -1
- snowflake/ml/modeling/svm/linear_svr.py +8 -1
- snowflake/ml/modeling/svm/nu_svc.py +8 -1
- snowflake/ml/modeling/svm/nu_svr.py +8 -1
- snowflake/ml/modeling/svm/svc.py +8 -1
- snowflake/ml/modeling/svm/svr.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
- snowflake/ml/registry/_manager/model_manager.py +95 -8
- snowflake/ml/registry/registry.py +10 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +66 -10
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +196 -192
- snowflake/ml/_internal/lineage/dataset_dataframe.py +0 -44
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -618,7 +618,14 @@ class TheilSenRegressor(BaseTransformer):
|
|
618
618
|
) -> List[str]:
|
619
619
|
# in case the inferred output column names dimension is different
|
620
620
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
621
|
-
|
621
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
622
|
+
|
623
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
624
|
+
# seen during the fit.
|
625
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
626
|
+
sample_pd_df.columns = snowpark_column_names
|
627
|
+
|
628
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
622
629
|
output_df_columns = list(output_df_pd.columns)
|
623
630
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
624
631
|
if self.sample_weight_col:
|
@@ -644,7 +644,14 @@ class TweedieRegressor(BaseTransformer):
|
|
644
644
|
) -> List[str]:
|
645
645
|
# in case the inferred output column names dimension is different
|
646
646
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
647
|
-
|
647
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
648
|
+
|
649
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
650
|
+
# seen during the fit.
|
651
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
652
|
+
sample_pd_df.columns = snowpark_column_names
|
653
|
+
|
654
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
648
655
|
output_df_columns = list(output_df_pd.columns)
|
649
656
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
650
657
|
if self.sample_weight_col:
|
@@ -642,7 +642,14 @@ class Isomap(BaseTransformer):
|
|
642
642
|
) -> List[str]:
|
643
643
|
# in case the inferred output column names dimension is different
|
644
644
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
645
|
-
|
645
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
646
|
+
|
647
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
648
|
+
# seen during the fit.
|
649
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
650
|
+
sample_pd_df.columns = snowpark_column_names
|
651
|
+
|
652
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
646
653
|
output_df_columns = list(output_df_pd.columns)
|
647
654
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
648
655
|
if self.sample_weight_col:
|
@@ -623,7 +623,14 @@ class MDS(BaseTransformer):
|
|
623
623
|
) -> List[str]:
|
624
624
|
# in case the inferred output column names dimension is different
|
625
625
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
626
|
-
|
626
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
627
|
+
|
628
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
629
|
+
# seen during the fit.
|
630
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
631
|
+
sample_pd_df.columns = snowpark_column_names
|
632
|
+
|
633
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
627
634
|
output_df_columns = list(output_df_pd.columns)
|
628
635
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
629
636
|
if self.sample_weight_col:
|
@@ -625,7 +625,14 @@ class SpectralEmbedding(BaseTransformer):
|
|
625
625
|
) -> List[str]:
|
626
626
|
# in case the inferred output column names dimension is different
|
627
627
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
628
|
-
|
628
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
629
|
+
|
630
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
631
|
+
# seen during the fit.
|
632
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
633
|
+
sample_pd_df.columns = snowpark_column_names
|
634
|
+
|
635
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
629
636
|
output_df_columns = list(output_df_pd.columns)
|
630
637
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
631
638
|
if self.sample_weight_col:
|
@@ -684,7 +684,14 @@ class TSNE(BaseTransformer):
|
|
684
684
|
) -> List[str]:
|
685
685
|
# in case the inferred output column names dimension is different
|
686
686
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
687
|
-
|
687
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
688
|
+
|
689
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
690
|
+
# seen during the fit.
|
691
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
692
|
+
sample_pd_df.columns = snowpark_column_names
|
693
|
+
|
694
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
688
695
|
output_df_columns = list(output_df_pd.columns)
|
689
696
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
690
697
|
if self.sample_weight_col:
|
@@ -689,7 +689,14 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
689
689
|
) -> List[str]:
|
690
690
|
# in case the inferred output column names dimension is different
|
691
691
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
692
|
-
|
692
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
693
|
+
|
694
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
695
|
+
# seen during the fit.
|
696
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
697
|
+
sample_pd_df.columns = snowpark_column_names
|
698
|
+
|
699
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
693
700
|
output_df_columns = list(output_df_pd.columns)
|
694
701
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
695
702
|
if self.sample_weight_col:
|
@@ -662,7 +662,14 @@ class GaussianMixture(BaseTransformer):
|
|
662
662
|
) -> List[str]:
|
663
663
|
# in case the inferred output column names dimension is different
|
664
664
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
665
|
-
|
665
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
666
|
+
|
667
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
668
|
+
# seen during the fit.
|
669
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
670
|
+
sample_pd_df.columns = snowpark_column_names
|
671
|
+
|
672
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
666
673
|
output_df_columns = list(output_df_pd.columns)
|
667
674
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
668
675
|
if self.sample_weight_col:
|
@@ -572,7 +572,14 @@ class OneVsOneClassifier(BaseTransformer):
|
|
572
572
|
) -> List[str]:
|
573
573
|
# in case the inferred output column names dimension is different
|
574
574
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
575
|
-
|
575
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
576
|
+
|
577
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
578
|
+
# seen during the fit.
|
579
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
580
|
+
sample_pd_df.columns = snowpark_column_names
|
581
|
+
|
582
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
576
583
|
output_df_columns = list(output_df_pd.columns)
|
577
584
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
578
585
|
if self.sample_weight_col:
|
@@ -581,7 +581,14 @@ class OneVsRestClassifier(BaseTransformer):
|
|
581
581
|
) -> List[str]:
|
582
582
|
# in case the inferred output column names dimension is different
|
583
583
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
584
|
-
|
584
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
585
|
+
|
586
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
587
|
+
# seen during the fit.
|
588
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
589
|
+
sample_pd_df.columns = snowpark_column_names
|
590
|
+
|
591
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
585
592
|
output_df_columns = list(output_df_pd.columns)
|
586
593
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
587
594
|
if self.sample_weight_col:
|
@@ -584,7 +584,14 @@ class OutputCodeClassifier(BaseTransformer):
|
|
584
584
|
) -> List[str]:
|
585
585
|
# in case the inferred output column names dimension is different
|
586
586
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
587
|
-
|
587
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
588
|
+
|
589
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
590
|
+
# seen during the fit.
|
591
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
592
|
+
sample_pd_df.columns = snowpark_column_names
|
593
|
+
|
594
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
588
595
|
output_df_columns = list(output_df_pd.columns)
|
589
596
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
590
597
|
if self.sample_weight_col:
|
@@ -584,7 +584,14 @@ class BernoulliNB(BaseTransformer):
|
|
584
584
|
) -> List[str]:
|
585
585
|
# in case the inferred output column names dimension is different
|
586
586
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
587
|
-
|
587
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
588
|
+
|
589
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
590
|
+
# seen during the fit.
|
591
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
592
|
+
sample_pd_df.columns = snowpark_column_names
|
593
|
+
|
594
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
588
595
|
output_df_columns = list(output_df_pd.columns)
|
589
596
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
590
597
|
if self.sample_weight_col:
|
@@ -590,7 +590,14 @@ class CategoricalNB(BaseTransformer):
|
|
590
590
|
) -> List[str]:
|
591
591
|
# in case the inferred output column names dimension is different
|
592
592
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
593
|
-
|
593
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
594
|
+
|
595
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
596
|
+
# seen during the fit.
|
597
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
598
|
+
sample_pd_df.columns = snowpark_column_names
|
599
|
+
|
600
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
594
601
|
output_df_columns = list(output_df_pd.columns)
|
595
602
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
596
603
|
if self.sample_weight_col:
|
@@ -584,7 +584,14 @@ class ComplementNB(BaseTransformer):
|
|
584
584
|
) -> List[str]:
|
585
585
|
# in case the inferred output column names dimension is different
|
586
586
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
587
|
-
|
587
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
588
|
+
|
589
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
590
|
+
# seen during the fit.
|
591
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
592
|
+
sample_pd_df.columns = snowpark_column_names
|
593
|
+
|
594
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
588
595
|
output_df_columns = list(output_df_pd.columns)
|
589
596
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
590
597
|
if self.sample_weight_col:
|
@@ -565,7 +565,14 @@ class GaussianNB(BaseTransformer):
|
|
565
565
|
) -> List[str]:
|
566
566
|
# in case the inferred output column names dimension is different
|
567
567
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
568
|
-
|
568
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
569
|
+
|
570
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
571
|
+
# seen during the fit.
|
572
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
573
|
+
sample_pd_df.columns = snowpark_column_names
|
574
|
+
|
575
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
569
576
|
output_df_columns = list(output_df_pd.columns)
|
570
577
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
571
578
|
if self.sample_weight_col:
|
@@ -578,7 +578,14 @@ class MultinomialNB(BaseTransformer):
|
|
578
578
|
) -> List[str]:
|
579
579
|
# in case the inferred output column names dimension is different
|
580
580
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
581
|
-
|
581
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
582
|
+
|
583
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
584
|
+
# seen during the fit.
|
585
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
586
|
+
sample_pd_df.columns = snowpark_column_names
|
587
|
+
|
588
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
582
589
|
output_df_columns = list(output_df_pd.columns)
|
583
590
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
584
591
|
if self.sample_weight_col:
|
@@ -635,7 +635,14 @@ class KNeighborsClassifier(BaseTransformer):
|
|
635
635
|
) -> List[str]:
|
636
636
|
# in case the inferred output column names dimension is different
|
637
637
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
638
|
-
|
638
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
639
|
+
|
640
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
641
|
+
# seen during the fit.
|
642
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
643
|
+
sample_pd_df.columns = snowpark_column_names
|
644
|
+
|
645
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
639
646
|
output_df_columns = list(output_df_pd.columns)
|
640
647
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
641
648
|
if self.sample_weight_col:
|
@@ -637,7 +637,14 @@ class KNeighborsRegressor(BaseTransformer):
|
|
637
637
|
) -> List[str]:
|
638
638
|
# in case the inferred output column names dimension is different
|
639
639
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
640
|
-
|
640
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
641
|
+
|
642
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
643
|
+
# seen during the fit.
|
644
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
645
|
+
sample_pd_df.columns = snowpark_column_names
|
646
|
+
|
647
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
641
648
|
output_df_columns = list(output_df_pd.columns)
|
642
649
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
643
650
|
if self.sample_weight_col:
|
@@ -612,7 +612,14 @@ class KernelDensity(BaseTransformer):
|
|
612
612
|
) -> List[str]:
|
613
613
|
# in case the inferred output column names dimension is different
|
614
614
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
615
|
-
|
615
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
616
|
+
|
617
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
618
|
+
# seen during the fit.
|
619
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
620
|
+
sample_pd_df.columns = snowpark_column_names
|
621
|
+
|
622
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
616
623
|
output_df_columns = list(output_df_pd.columns)
|
617
624
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
618
625
|
if self.sample_weight_col:
|
@@ -644,7 +644,14 @@ class LocalOutlierFactor(BaseTransformer):
|
|
644
644
|
) -> List[str]:
|
645
645
|
# in case the inferred output column names dimension is different
|
646
646
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
647
|
-
|
647
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
648
|
+
|
649
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
650
|
+
# seen during the fit.
|
651
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
652
|
+
sample_pd_df.columns = snowpark_column_names
|
653
|
+
|
654
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
648
655
|
output_df_columns = list(output_df_pd.columns)
|
649
656
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
650
657
|
if self.sample_weight_col:
|
@@ -575,7 +575,14 @@ class NearestCentroid(BaseTransformer):
|
|
575
575
|
) -> List[str]:
|
576
576
|
# in case the inferred output column names dimension is different
|
577
577
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
578
|
-
|
578
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
579
|
+
|
580
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
581
|
+
# seen during the fit.
|
582
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
583
|
+
sample_pd_df.columns = snowpark_column_names
|
584
|
+
|
585
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
579
586
|
output_df_columns = list(output_df_pd.columns)
|
580
587
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
581
588
|
if self.sample_weight_col:
|
@@ -623,7 +623,14 @@ class NearestNeighbors(BaseTransformer):
|
|
623
623
|
) -> List[str]:
|
624
624
|
# in case the inferred output column names dimension is different
|
625
625
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
626
|
-
|
626
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
627
|
+
|
628
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
629
|
+
# seen during the fit.
|
630
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
631
|
+
sample_pd_df.columns = snowpark_column_names
|
632
|
+
|
633
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
627
634
|
output_df_columns = list(output_df_pd.columns)
|
628
635
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
629
636
|
if self.sample_weight_col:
|
@@ -648,7 +648,14 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
|
|
648
648
|
) -> List[str]:
|
649
649
|
# in case the inferred output column names dimension is different
|
650
650
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
651
|
-
|
651
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
652
|
+
|
653
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
654
|
+
# seen during the fit.
|
655
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
656
|
+
sample_pd_df.columns = snowpark_column_names
|
657
|
+
|
658
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
652
659
|
output_df_columns = list(output_df_pd.columns)
|
653
660
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
654
661
|
if self.sample_weight_col:
|
@@ -647,7 +647,14 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
647
647
|
) -> List[str]:
|
648
648
|
# in case the inferred output column names dimension is different
|
649
649
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
650
|
-
|
650
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
651
|
+
|
652
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
653
|
+
# seen during the fit.
|
654
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
655
|
+
sample_pd_df.columns = snowpark_column_names
|
656
|
+
|
657
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
651
658
|
output_df_columns = list(output_df_pd.columns)
|
652
659
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
653
660
|
if self.sample_weight_col:
|
@@ -637,7 +637,14 @@ class RadiusNeighborsRegressor(BaseTransformer):
|
|
637
637
|
) -> List[str]:
|
638
638
|
# in case the inferred output column names dimension is different
|
639
639
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
640
|
-
|
640
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
641
|
+
|
642
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
643
|
+
# seen during the fit.
|
644
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
645
|
+
sample_pd_df.columns = snowpark_column_names
|
646
|
+
|
647
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
641
648
|
output_df_columns = list(output_df_pd.columns)
|
642
649
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
643
650
|
if self.sample_weight_col:
|
@@ -596,7 +596,14 @@ class BernoulliRBM(BaseTransformer):
|
|
596
596
|
) -> List[str]:
|
597
597
|
# in case the inferred output column names dimension is different
|
598
598
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
599
|
-
|
599
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
600
|
+
|
601
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
602
|
+
# seen during the fit.
|
603
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
604
|
+
sample_pd_df.columns = snowpark_column_names
|
605
|
+
|
606
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
600
607
|
output_df_columns = list(output_df_pd.columns)
|
601
608
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
602
609
|
if self.sample_weight_col:
|
@@ -749,7 +749,14 @@ class MLPClassifier(BaseTransformer):
|
|
749
749
|
) -> List[str]:
|
750
750
|
# in case the inferred output column names dimension is different
|
751
751
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
752
|
-
|
752
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
753
|
+
|
754
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
755
|
+
# seen during the fit.
|
756
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
757
|
+
sample_pd_df.columns = snowpark_column_names
|
758
|
+
|
759
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
753
760
|
output_df_columns = list(output_df_pd.columns)
|
754
761
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
755
762
|
if self.sample_weight_col:
|
@@ -745,7 +745,14 @@ class MLPRegressor(BaseTransformer):
|
|
745
745
|
) -> List[str]:
|
746
746
|
# in case the inferred output column names dimension is different
|
747
747
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
748
|
-
|
748
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
749
|
+
|
750
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
751
|
+
# seen during the fit.
|
752
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
753
|
+
sample_pd_df.columns = snowpark_column_names
|
754
|
+
|
755
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
749
756
|
output_df_columns = list(output_df_pd.columns)
|
750
757
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
751
758
|
if self.sample_weight_col:
|
@@ -115,7 +115,7 @@ class Pipeline(base.BaseTransformer):
|
|
115
115
|
self._feature_names_in: List[np.ndarray[Any, np.dtype[Any]]] = []
|
116
116
|
self._n_features_in: List[int] = []
|
117
117
|
self._transformers_to_input_indices: Dict[str, List[int]] = {}
|
118
|
-
self.
|
118
|
+
self._modifies_label_or_sample_weight = True
|
119
119
|
|
120
120
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
121
121
|
|
@@ -126,6 +126,9 @@ class Pipeline(base.BaseTransformer):
|
|
126
126
|
self._deps = list(deps)
|
127
127
|
self._sklearn_object = None
|
128
128
|
self.label_cols = self._get_label_cols()
|
129
|
+
self._is_convertible_to_sklearn = self._is_convertible_to_sklearn_object()
|
130
|
+
|
131
|
+
self._send_pipeline_configuration_telemetry()
|
129
132
|
|
130
133
|
@staticmethod
|
131
134
|
def _is_estimator(obj: object) -> bool:
|
@@ -228,7 +231,7 @@ class Pipeline(base.BaseTransformer):
|
|
228
231
|
return [c for c in columns if c not in target_cols]
|
229
232
|
|
230
233
|
def _append_step_feature_consumption_info(self, step_name: str, all_cols: List[str], input_cols: List[str]) -> None:
|
231
|
-
if self.
|
234
|
+
if self._modifies_label_or_sample_weight:
|
232
235
|
all_cols = self._get_sanitized_list_of_columns(all_cols)
|
233
236
|
self._feature_names_in.append(np.asarray(all_cols, dtype=object))
|
234
237
|
self._n_features_in.append(len(all_cols))
|
@@ -248,7 +251,7 @@ class Pipeline(base.BaseTransformer):
|
|
248
251
|
self, dataset: Union[snowpark.DataFrame, pd.DataFrame]
|
249
252
|
) -> Union[snowpark.DataFrame, pd.DataFrame]:
|
250
253
|
self._reset()
|
251
|
-
self.
|
254
|
+
self._modifies_label_or_sample_weight = not self._is_pipeline_modifying_label_or_sample_weight()
|
252
255
|
transformed_dataset = dataset
|
253
256
|
for name, trans in self._get_transformers():
|
254
257
|
self._append_step_feature_consumption_info(
|
@@ -425,7 +428,7 @@ class Pipeline(base.BaseTransformer):
|
|
425
428
|
)
|
426
429
|
|
427
430
|
if self._can_be_trained_in_ml_runtime(dataset):
|
428
|
-
if not self.
|
431
|
+
if not self._is_convertible_to_sklearn:
|
429
432
|
raise ValueError("This pipeline cannot be converted to an sklearn pipeline.")
|
430
433
|
self._fit_ml_runtime(dataset)
|
431
434
|
|
@@ -947,7 +950,7 @@ class Pipeline(base.BaseTransformer):
|
|
947
950
|
if not os.environ.get(IN_ML_RUNTIME_ENV_VAR):
|
948
951
|
return False
|
949
952
|
|
950
|
-
return self.
|
953
|
+
return self._is_convertible_to_sklearn
|
951
954
|
|
952
955
|
@staticmethod
|
953
956
|
def _wrap_transformer_in_column_transformer(
|
@@ -1003,7 +1006,7 @@ class Pipeline(base.BaseTransformer):
|
|
1003
1006
|
if not self._is_fitted:
|
1004
1007
|
return self._create_unfitted_sklearn_object()
|
1005
1008
|
|
1006
|
-
if not self.
|
1009
|
+
if not self._modifies_label_or_sample_weight:
|
1007
1010
|
raise exceptions.SnowflakeMLException(
|
1008
1011
|
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1009
1012
|
original_exception=ValueError(
|
@@ -1109,7 +1112,24 @@ class Pipeline(base.BaseTransformer):
|
|
1109
1112
|
else:
|
1110
1113
|
return self._create_sklearn_object()
|
1111
1114
|
else:
|
1112
|
-
if self.
|
1115
|
+
if self._is_convertible_to_sklearn:
|
1113
1116
|
return self._create_unfitted_sklearn_object()
|
1114
1117
|
else:
|
1115
1118
|
raise ValueError("This pipeline can not be converted to an sklearn pipeline.")
|
1119
|
+
|
1120
|
+
def _send_pipeline_configuration_telemetry(self) -> None:
|
1121
|
+
"""Track information about the pipeline setup. Currently, we want to track:
|
1122
|
+
- Whether the pipeline is converible to an sklearn pipeline
|
1123
|
+
- Whether the pipeline is being used in the SPCS ml runtime.
|
1124
|
+
"""
|
1125
|
+
|
1126
|
+
telemetry_data = {
|
1127
|
+
"pipeline_is_convertible_to_sklearn": self._is_convertible_to_sklearn,
|
1128
|
+
"in_spcs_ml_runtime": bool(os.environ.get(IN_ML_RUNTIME_ENV_VAR)),
|
1129
|
+
}
|
1130
|
+
telemetry.send_custom_usage(
|
1131
|
+
project=_PROJECT,
|
1132
|
+
subproject=_SUBPROJECT,
|
1133
|
+
telemetry_type=telemetry.TelemetryField.TYPE_SNOWML_PIPELINE_USAGE.value,
|
1134
|
+
data=telemetry_data,
|
1135
|
+
)
|
@@ -586,7 +586,14 @@ class PolynomialFeatures(BaseTransformer):
|
|
586
586
|
) -> List[str]:
|
587
587
|
# in case the inferred output column names dimension is different
|
588
588
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
589
|
-
|
589
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
590
|
+
|
591
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
592
|
+
# seen during the fit.
|
593
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
594
|
+
sample_pd_df.columns = snowpark_column_names
|
595
|
+
|
596
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
590
597
|
output_df_columns = list(output_df_pd.columns)
|
591
598
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
592
599
|
if self.sample_weight_col:
|
@@ -590,7 +590,14 @@ class LabelPropagation(BaseTransformer):
|
|
590
590
|
) -> List[str]:
|
591
591
|
# in case the inferred output column names dimension is different
|
592
592
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
593
|
-
|
593
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
594
|
+
|
595
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
596
|
+
# seen during the fit.
|
597
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
598
|
+
sample_pd_df.columns = snowpark_column_names
|
599
|
+
|
600
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
594
601
|
output_df_columns = list(output_df_pd.columns)
|
595
602
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
596
603
|
if self.sample_weight_col:
|
@@ -599,7 +599,14 @@ class LabelSpreading(BaseTransformer):
|
|
599
599
|
) -> List[str]:
|
600
600
|
# in case the inferred output column names dimension is different
|
601
601
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
602
|
-
|
602
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
603
|
+
|
604
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
605
|
+
# seen during the fit.
|
606
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
607
|
+
sample_pd_df.columns = snowpark_column_names
|
608
|
+
|
609
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
603
610
|
output_df_columns = list(output_df_pd.columns)
|
604
611
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
605
612
|
if self.sample_weight_col:
|