snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +8 -35
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/sklearn.py +1 -0
- snowflake/ml/model/_handlers/xgboost.py +1 -1
- snowflake/ml/model/_model.py +24 -19
- snowflake/ml/model/_model_meta.py +24 -15
- snowflake/ml/model/type_hints.py +5 -11
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
- snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
- snowflake/ml/modeling/cluster/birch.py +28 -17
- snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
- snowflake/ml/modeling/cluster/dbscan.py +28 -17
- snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
- snowflake/ml/modeling/cluster/k_means.py +28 -17
- snowflake/ml/modeling/cluster/mean_shift.py +28 -17
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
- snowflake/ml/modeling/cluster/optics.py +28 -17
- snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
- snowflake/ml/modeling/compose/column_transformer.py +28 -17
- snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
- snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
- snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
- snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
- snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
- snowflake/ml/modeling/covariance/oas.py +28 -17
- snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
- snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
- snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
- snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
- snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/pca.py +28 -17
- snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
- snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
- snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
- snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
- snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
- snowflake/ml/modeling/impute/knn_imputer.py +28 -17
- snowflake/ml/modeling/impute/missing_indicator.py +28 -17
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/lars.py +28 -17
- snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
- snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/perceptron.py +28 -17
- snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ridge.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
- snowflake/ml/modeling/manifold/isomap.py +28 -17
- snowflake/ml/modeling/manifold/mds.py +28 -17
- snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
- snowflake/ml/modeling/manifold/tsne.py +28 -17
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
- snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
- snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
- snowflake/ml/modeling/svm/linear_svc.py +28 -17
- snowflake/ml/modeling/svm/linear_svr.py +28 -17
- snowflake/ml/modeling/svm/nu_svc.py +28 -17
- snowflake/ml/modeling/svm/nu_svr.py +28 -17
- snowflake/ml/modeling/svm/svc.py +28 -17
- snowflake/ml/modeling/svm/svr.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
- snowflake/ml/registry/model_registry.py +49 -65
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
- snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -621,26 +621,37 @@ class SelectKBest(BaseTransformer):
|
|
621
621
|
# input cols need to match unquoted / quoted
|
622
622
|
input_cols = self.input_cols
|
623
623
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
624
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
624
625
|
|
625
626
|
estimator = self._sklearn_object
|
626
627
|
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
628
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
629
|
+
missing_features = []
|
630
|
+
features_in_dataset = set(dataset.columns)
|
631
|
+
columns_to_select = []
|
632
|
+
for i, f in enumerate(features_required_by_estimator):
|
633
|
+
if (
|
634
|
+
i >= len(input_cols)
|
635
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
636
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
637
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
638
|
+
):
|
639
|
+
missing_features.append(f)
|
640
|
+
elif input_cols[i] in features_in_dataset:
|
641
|
+
columns_to_select.append(input_cols[i])
|
642
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
643
|
+
columns_to_select.append(unquoted_input_cols[i])
|
644
|
+
else:
|
645
|
+
columns_to_select.append(quoted_input_cols[i])
|
646
|
+
|
647
|
+
if len(missing_features) > 0:
|
648
|
+
raise ValueError(
|
649
|
+
"The feature names should match with those that were passed during fit.\n"
|
650
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
651
|
+
f"Features in the input dataframe : {input_cols}\n"
|
652
|
+
)
|
653
|
+
input_df = dataset[columns_to_select]
|
654
|
+
input_df.columns = features_required_by_estimator
|
644
655
|
|
645
656
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
646
657
|
input_df
|
@@ -620,26 +620,37 @@ class SelectPercentile(BaseTransformer):
|
|
620
620
|
# input cols need to match unquoted / quoted
|
621
621
|
input_cols = self.input_cols
|
622
622
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
623
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
623
624
|
|
624
625
|
estimator = self._sklearn_object
|
625
626
|
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
627
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
628
|
+
missing_features = []
|
629
|
+
features_in_dataset = set(dataset.columns)
|
630
|
+
columns_to_select = []
|
631
|
+
for i, f in enumerate(features_required_by_estimator):
|
632
|
+
if (
|
633
|
+
i >= len(input_cols)
|
634
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
635
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
636
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
637
|
+
):
|
638
|
+
missing_features.append(f)
|
639
|
+
elif input_cols[i] in features_in_dataset:
|
640
|
+
columns_to_select.append(input_cols[i])
|
641
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
642
|
+
columns_to_select.append(unquoted_input_cols[i])
|
643
|
+
else:
|
644
|
+
columns_to_select.append(quoted_input_cols[i])
|
645
|
+
|
646
|
+
if len(missing_features) > 0:
|
647
|
+
raise ValueError(
|
648
|
+
"The feature names should match with those that were passed during fit.\n"
|
649
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
650
|
+
f"Features in the input dataframe : {input_cols}\n"
|
651
|
+
)
|
652
|
+
input_df = dataset[columns_to_select]
|
653
|
+
input_df.columns = features_required_by_estimator
|
643
654
|
|
644
655
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
645
656
|
input_df
|
@@ -680,26 +680,37 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
680
680
|
# input cols need to match unquoted / quoted
|
681
681
|
input_cols = self.input_cols
|
682
682
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
683
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
683
684
|
|
684
685
|
estimator = self._sklearn_object
|
685
686
|
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
687
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
688
|
+
missing_features = []
|
689
|
+
features_in_dataset = set(dataset.columns)
|
690
|
+
columns_to_select = []
|
691
|
+
for i, f in enumerate(features_required_by_estimator):
|
692
|
+
if (
|
693
|
+
i >= len(input_cols)
|
694
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
695
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
696
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
697
|
+
):
|
698
|
+
missing_features.append(f)
|
699
|
+
elif input_cols[i] in features_in_dataset:
|
700
|
+
columns_to_select.append(input_cols[i])
|
701
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
702
|
+
columns_to_select.append(unquoted_input_cols[i])
|
703
|
+
else:
|
704
|
+
columns_to_select.append(quoted_input_cols[i])
|
705
|
+
|
706
|
+
if len(missing_features) > 0:
|
707
|
+
raise ValueError(
|
708
|
+
"The feature names should match with those that were passed during fit.\n"
|
709
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
710
|
+
f"Features in the input dataframe : {input_cols}\n"
|
711
|
+
)
|
712
|
+
input_df = dataset[columns_to_select]
|
713
|
+
input_df.columns = features_required_by_estimator
|
703
714
|
|
704
715
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
705
716
|
input_df
|
@@ -613,26 +613,37 @@ class VarianceThreshold(BaseTransformer):
|
|
613
613
|
# input cols need to match unquoted / quoted
|
614
614
|
input_cols = self.input_cols
|
615
615
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
616
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
616
617
|
|
617
618
|
estimator = self._sklearn_object
|
618
619
|
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
620
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
621
|
+
missing_features = []
|
622
|
+
features_in_dataset = set(dataset.columns)
|
623
|
+
columns_to_select = []
|
624
|
+
for i, f in enumerate(features_required_by_estimator):
|
625
|
+
if (
|
626
|
+
i >= len(input_cols)
|
627
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
628
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
629
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
630
|
+
):
|
631
|
+
missing_features.append(f)
|
632
|
+
elif input_cols[i] in features_in_dataset:
|
633
|
+
columns_to_select.append(input_cols[i])
|
634
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
635
|
+
columns_to_select.append(unquoted_input_cols[i])
|
636
|
+
else:
|
637
|
+
columns_to_select.append(quoted_input_cols[i])
|
638
|
+
|
639
|
+
if len(missing_features) > 0:
|
640
|
+
raise ValueError(
|
641
|
+
"The feature names should match with those that were passed during fit.\n"
|
642
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
643
|
+
f"Features in the input dataframe : {input_cols}\n"
|
644
|
+
)
|
645
|
+
input_df = dataset[columns_to_select]
|
646
|
+
input_df.columns = features_required_by_estimator
|
636
647
|
|
637
648
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
638
649
|
input_df
|
@@ -706,26 +706,37 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
706
706
|
# input cols need to match unquoted / quoted
|
707
707
|
input_cols = self.input_cols
|
708
708
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
709
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
709
710
|
|
710
711
|
estimator = self._sklearn_object
|
711
712
|
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
713
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
714
|
+
missing_features = []
|
715
|
+
features_in_dataset = set(dataset.columns)
|
716
|
+
columns_to_select = []
|
717
|
+
for i, f in enumerate(features_required_by_estimator):
|
718
|
+
if (
|
719
|
+
i >= len(input_cols)
|
720
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
721
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
722
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
723
|
+
):
|
724
|
+
missing_features.append(f)
|
725
|
+
elif input_cols[i] in features_in_dataset:
|
726
|
+
columns_to_select.append(input_cols[i])
|
727
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
728
|
+
columns_to_select.append(unquoted_input_cols[i])
|
729
|
+
else:
|
730
|
+
columns_to_select.append(quoted_input_cols[i])
|
731
|
+
|
732
|
+
if len(missing_features) > 0:
|
733
|
+
raise ValueError(
|
734
|
+
"The feature names should match with those that were passed during fit.\n"
|
735
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
736
|
+
f"Features in the input dataframe : {input_cols}\n"
|
737
|
+
)
|
738
|
+
input_df = dataset[columns_to_select]
|
739
|
+
input_df.columns = features_required_by_estimator
|
729
740
|
|
730
741
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
731
742
|
input_df
|
@@ -689,26 +689,37 @@ class GaussianProcessRegressor(BaseTransformer):
|
|
689
689
|
# input cols need to match unquoted / quoted
|
690
690
|
input_cols = self.input_cols
|
691
691
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
692
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
692
693
|
|
693
694
|
estimator = self._sklearn_object
|
694
695
|
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
696
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
697
|
+
missing_features = []
|
698
|
+
features_in_dataset = set(dataset.columns)
|
699
|
+
columns_to_select = []
|
700
|
+
for i, f in enumerate(features_required_by_estimator):
|
701
|
+
if (
|
702
|
+
i >= len(input_cols)
|
703
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
704
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
705
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
706
|
+
):
|
707
|
+
missing_features.append(f)
|
708
|
+
elif input_cols[i] in features_in_dataset:
|
709
|
+
columns_to_select.append(input_cols[i])
|
710
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
711
|
+
columns_to_select.append(unquoted_input_cols[i])
|
712
|
+
else:
|
713
|
+
columns_to_select.append(quoted_input_cols[i])
|
714
|
+
|
715
|
+
if len(missing_features) > 0:
|
716
|
+
raise ValueError(
|
717
|
+
"The feature names should match with those that were passed during fit.\n"
|
718
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
719
|
+
f"Features in the input dataframe : {input_cols}\n"
|
720
|
+
)
|
721
|
+
input_df = dataset[columns_to_select]
|
722
|
+
input_df.columns = features_required_by_estimator
|
712
723
|
|
713
724
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
714
725
|
input_df
|
@@ -732,26 +732,37 @@ class IterativeImputer(BaseTransformer):
|
|
732
732
|
# input cols need to match unquoted / quoted
|
733
733
|
input_cols = self.input_cols
|
734
734
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
735
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
735
736
|
|
736
737
|
estimator = self._sklearn_object
|
737
738
|
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
739
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
740
|
+
missing_features = []
|
741
|
+
features_in_dataset = set(dataset.columns)
|
742
|
+
columns_to_select = []
|
743
|
+
for i, f in enumerate(features_required_by_estimator):
|
744
|
+
if (
|
745
|
+
i >= len(input_cols)
|
746
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
747
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
748
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
749
|
+
):
|
750
|
+
missing_features.append(f)
|
751
|
+
elif input_cols[i] in features_in_dataset:
|
752
|
+
columns_to_select.append(input_cols[i])
|
753
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
754
|
+
columns_to_select.append(unquoted_input_cols[i])
|
755
|
+
else:
|
756
|
+
columns_to_select.append(quoted_input_cols[i])
|
757
|
+
|
758
|
+
if len(missing_features) > 0:
|
759
|
+
raise ValueError(
|
760
|
+
"The feature names should match with those that were passed during fit.\n"
|
761
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
762
|
+
f"Features in the input dataframe : {input_cols}\n"
|
763
|
+
)
|
764
|
+
input_df = dataset[columns_to_select]
|
765
|
+
input_df.columns = features_required_by_estimator
|
755
766
|
|
756
767
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
757
768
|
input_df
|
@@ -667,26 +667,37 @@ class KNNImputer(BaseTransformer):
|
|
667
667
|
# input cols need to match unquoted / quoted
|
668
668
|
input_cols = self.input_cols
|
669
669
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
670
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
670
671
|
|
671
672
|
estimator = self._sklearn_object
|
672
673
|
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
674
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
675
|
+
missing_features = []
|
676
|
+
features_in_dataset = set(dataset.columns)
|
677
|
+
columns_to_select = []
|
678
|
+
for i, f in enumerate(features_required_by_estimator):
|
679
|
+
if (
|
680
|
+
i >= len(input_cols)
|
681
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
682
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
683
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
684
|
+
):
|
685
|
+
missing_features.append(f)
|
686
|
+
elif input_cols[i] in features_in_dataset:
|
687
|
+
columns_to_select.append(input_cols[i])
|
688
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
689
|
+
columns_to_select.append(unquoted_input_cols[i])
|
690
|
+
else:
|
691
|
+
columns_to_select.append(quoted_input_cols[i])
|
692
|
+
|
693
|
+
if len(missing_features) > 0:
|
694
|
+
raise ValueError(
|
695
|
+
"The feature names should match with those that were passed during fit.\n"
|
696
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
697
|
+
f"Features in the input dataframe : {input_cols}\n"
|
698
|
+
)
|
699
|
+
input_df = dataset[columns_to_select]
|
700
|
+
input_df.columns = features_required_by_estimator
|
690
701
|
|
691
702
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
692
703
|
input_df
|
@@ -641,26 +641,37 @@ class MissingIndicator(BaseTransformer):
|
|
641
641
|
# input cols need to match unquoted / quoted
|
642
642
|
input_cols = self.input_cols
|
643
643
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
644
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
644
645
|
|
645
646
|
estimator = self._sklearn_object
|
646
647
|
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
648
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
649
|
+
missing_features = []
|
650
|
+
features_in_dataset = set(dataset.columns)
|
651
|
+
columns_to_select = []
|
652
|
+
for i, f in enumerate(features_required_by_estimator):
|
653
|
+
if (
|
654
|
+
i >= len(input_cols)
|
655
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
656
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
657
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
658
|
+
):
|
659
|
+
missing_features.append(f)
|
660
|
+
elif input_cols[i] in features_in_dataset:
|
661
|
+
columns_to_select.append(input_cols[i])
|
662
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
663
|
+
columns_to_select.append(unquoted_input_cols[i])
|
664
|
+
else:
|
665
|
+
columns_to_select.append(quoted_input_cols[i])
|
666
|
+
|
667
|
+
if len(missing_features) > 0:
|
668
|
+
raise ValueError(
|
669
|
+
"The feature names should match with those that were passed during fit.\n"
|
670
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
671
|
+
f"Features in the input dataframe : {input_cols}\n"
|
672
|
+
)
|
673
|
+
input_df = dataset[columns_to_select]
|
674
|
+
input_df.columns = features_required_by_estimator
|
664
675
|
|
665
676
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
666
677
|
input_df
|
@@ -616,26 +616,37 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
616
616
|
# input cols need to match unquoted / quoted
|
617
617
|
input_cols = self.input_cols
|
618
618
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
619
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
619
620
|
|
620
621
|
estimator = self._sklearn_object
|
621
622
|
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
623
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
624
|
+
missing_features = []
|
625
|
+
features_in_dataset = set(dataset.columns)
|
626
|
+
columns_to_select = []
|
627
|
+
for i, f in enumerate(features_required_by_estimator):
|
628
|
+
if (
|
629
|
+
i >= len(input_cols)
|
630
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
631
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
632
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
633
|
+
):
|
634
|
+
missing_features.append(f)
|
635
|
+
elif input_cols[i] in features_in_dataset:
|
636
|
+
columns_to_select.append(input_cols[i])
|
637
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
638
|
+
columns_to_select.append(unquoted_input_cols[i])
|
639
|
+
else:
|
640
|
+
columns_to_select.append(quoted_input_cols[i])
|
641
|
+
|
642
|
+
if len(missing_features) > 0:
|
643
|
+
raise ValueError(
|
644
|
+
"The feature names should match with those that were passed during fit.\n"
|
645
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
646
|
+
f"Features in the input dataframe : {input_cols}\n"
|
647
|
+
)
|
648
|
+
input_df = dataset[columns_to_select]
|
649
|
+
input_df.columns = features_required_by_estimator
|
639
650
|
|
640
651
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
641
652
|
input_df
|
@@ -664,26 +664,37 @@ class Nystroem(BaseTransformer):
|
|
664
664
|
# input cols need to match unquoted / quoted
|
665
665
|
input_cols = self.input_cols
|
666
666
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
667
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
667
668
|
|
668
669
|
estimator = self._sklearn_object
|
669
670
|
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
671
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
672
|
+
missing_features = []
|
673
|
+
features_in_dataset = set(dataset.columns)
|
674
|
+
columns_to_select = []
|
675
|
+
for i, f in enumerate(features_required_by_estimator):
|
676
|
+
if (
|
677
|
+
i >= len(input_cols)
|
678
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
679
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
680
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
681
|
+
):
|
682
|
+
missing_features.append(f)
|
683
|
+
elif input_cols[i] in features_in_dataset:
|
684
|
+
columns_to_select.append(input_cols[i])
|
685
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
686
|
+
columns_to_select.append(unquoted_input_cols[i])
|
687
|
+
else:
|
688
|
+
columns_to_select.append(quoted_input_cols[i])
|
689
|
+
|
690
|
+
if len(missing_features) > 0:
|
691
|
+
raise ValueError(
|
692
|
+
"The feature names should match with those that were passed during fit.\n"
|
693
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
694
|
+
f"Features in the input dataframe : {input_cols}\n"
|
695
|
+
)
|
696
|
+
input_df = dataset[columns_to_select]
|
697
|
+
input_df.columns = features_required_by_estimator
|
687
698
|
|
688
699
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
689
700
|
input_df
|