snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +8 -35
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/sklearn.py +1 -0
- snowflake/ml/model/_handlers/xgboost.py +1 -1
- snowflake/ml/model/_model.py +24 -19
- snowflake/ml/model/_model_meta.py +24 -15
- snowflake/ml/model/type_hints.py +5 -11
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
- snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
- snowflake/ml/modeling/cluster/birch.py +28 -17
- snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
- snowflake/ml/modeling/cluster/dbscan.py +28 -17
- snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
- snowflake/ml/modeling/cluster/k_means.py +28 -17
- snowflake/ml/modeling/cluster/mean_shift.py +28 -17
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
- snowflake/ml/modeling/cluster/optics.py +28 -17
- snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
- snowflake/ml/modeling/compose/column_transformer.py +28 -17
- snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
- snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
- snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
- snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
- snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
- snowflake/ml/modeling/covariance/oas.py +28 -17
- snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
- snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
- snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
- snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
- snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/pca.py +28 -17
- snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
- snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
- snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
- snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
- snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
- snowflake/ml/modeling/impute/knn_imputer.py +28 -17
- snowflake/ml/modeling/impute/missing_indicator.py +28 -17
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/lars.py +28 -17
- snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
- snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/perceptron.py +28 -17
- snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ridge.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
- snowflake/ml/modeling/manifold/isomap.py +28 -17
- snowflake/ml/modeling/manifold/mds.py +28 -17
- snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
- snowflake/ml/modeling/manifold/tsne.py +28 -17
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
- snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
- snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
- snowflake/ml/modeling/svm/linear_svc.py +28 -17
- snowflake/ml/modeling/svm/linear_svr.py +28 -17
- snowflake/ml/modeling/svm/nu_svc.py +28 -17
- snowflake/ml/modeling/svm/nu_svr.py +28 -17
- snowflake/ml/modeling/svm/svc.py +28 -17
- snowflake/ml/modeling/svm/svr.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
- snowflake/ml/registry/model_registry.py +49 -65
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
- snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -672,26 +672,37 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
672
672
|
# input cols need to match unquoted / quoted
|
673
673
|
input_cols = self.input_cols
|
674
674
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
675
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
675
676
|
|
676
677
|
estimator = self._sklearn_object
|
677
678
|
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
679
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
680
|
+
missing_features = []
|
681
|
+
features_in_dataset = set(dataset.columns)
|
682
|
+
columns_to_select = []
|
683
|
+
for i, f in enumerate(features_required_by_estimator):
|
684
|
+
if (
|
685
|
+
i >= len(input_cols)
|
686
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
687
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
688
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
689
|
+
):
|
690
|
+
missing_features.append(f)
|
691
|
+
elif input_cols[i] in features_in_dataset:
|
692
|
+
columns_to_select.append(input_cols[i])
|
693
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
694
|
+
columns_to_select.append(unquoted_input_cols[i])
|
695
|
+
else:
|
696
|
+
columns_to_select.append(quoted_input_cols[i])
|
697
|
+
|
698
|
+
if len(missing_features) > 0:
|
699
|
+
raise ValueError(
|
700
|
+
"The feature names should match with those that were passed during fit.\n"
|
701
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
702
|
+
f"Features in the input dataframe : {input_cols}\n"
|
703
|
+
)
|
704
|
+
input_df = dataset[columns_to_select]
|
705
|
+
input_df.columns = features_required_by_estimator
|
695
706
|
|
696
707
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
697
708
|
input_df
|
@@ -634,26 +634,37 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
634
634
|
# input cols need to match unquoted / quoted
|
635
635
|
input_cols = self.input_cols
|
636
636
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
637
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
637
638
|
|
638
639
|
estimator = self._sklearn_object
|
639
640
|
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
641
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
642
|
+
missing_features = []
|
643
|
+
features_in_dataset = set(dataset.columns)
|
644
|
+
columns_to_select = []
|
645
|
+
for i, f in enumerate(features_required_by_estimator):
|
646
|
+
if (
|
647
|
+
i >= len(input_cols)
|
648
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
649
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
650
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
651
|
+
):
|
652
|
+
missing_features.append(f)
|
653
|
+
elif input_cols[i] in features_in_dataset:
|
654
|
+
columns_to_select.append(input_cols[i])
|
655
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
656
|
+
columns_to_select.append(unquoted_input_cols[i])
|
657
|
+
else:
|
658
|
+
columns_to_select.append(quoted_input_cols[i])
|
659
|
+
|
660
|
+
if len(missing_features) > 0:
|
661
|
+
raise ValueError(
|
662
|
+
"The feature names should match with those that were passed during fit.\n"
|
663
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
664
|
+
f"Features in the input dataframe : {input_cols}\n"
|
665
|
+
)
|
666
|
+
input_df = dataset[columns_to_select]
|
667
|
+
input_df.columns = features_required_by_estimator
|
657
668
|
|
658
669
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
659
670
|
input_df
|
@@ -659,26 +659,37 @@ class AdaBoostClassifier(BaseTransformer):
|
|
659
659
|
# input cols need to match unquoted / quoted
|
660
660
|
input_cols = self.input_cols
|
661
661
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
662
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
662
663
|
|
663
664
|
estimator = self._sklearn_object
|
664
665
|
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
666
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
667
|
+
missing_features = []
|
668
|
+
features_in_dataset = set(dataset.columns)
|
669
|
+
columns_to_select = []
|
670
|
+
for i, f in enumerate(features_required_by_estimator):
|
671
|
+
if (
|
672
|
+
i >= len(input_cols)
|
673
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
674
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
675
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
676
|
+
):
|
677
|
+
missing_features.append(f)
|
678
|
+
elif input_cols[i] in features_in_dataset:
|
679
|
+
columns_to_select.append(input_cols[i])
|
680
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
681
|
+
columns_to_select.append(unquoted_input_cols[i])
|
682
|
+
else:
|
683
|
+
columns_to_select.append(quoted_input_cols[i])
|
684
|
+
|
685
|
+
if len(missing_features) > 0:
|
686
|
+
raise ValueError(
|
687
|
+
"The feature names should match with those that were passed during fit.\n"
|
688
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
689
|
+
f"Features in the input dataframe : {input_cols}\n"
|
690
|
+
)
|
691
|
+
input_df = dataset[columns_to_select]
|
692
|
+
input_df.columns = features_required_by_estimator
|
682
693
|
|
683
694
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
684
695
|
input_df
|
@@ -656,26 +656,37 @@ class AdaBoostRegressor(BaseTransformer):
|
|
656
656
|
# input cols need to match unquoted / quoted
|
657
657
|
input_cols = self.input_cols
|
658
658
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
659
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
659
660
|
|
660
661
|
estimator = self._sklearn_object
|
661
662
|
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
663
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
664
|
+
missing_features = []
|
665
|
+
features_in_dataset = set(dataset.columns)
|
666
|
+
columns_to_select = []
|
667
|
+
for i, f in enumerate(features_required_by_estimator):
|
668
|
+
if (
|
669
|
+
i >= len(input_cols)
|
670
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
671
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
672
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
673
|
+
):
|
674
|
+
missing_features.append(f)
|
675
|
+
elif input_cols[i] in features_in_dataset:
|
676
|
+
columns_to_select.append(input_cols[i])
|
677
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
678
|
+
columns_to_select.append(unquoted_input_cols[i])
|
679
|
+
else:
|
680
|
+
columns_to_select.append(quoted_input_cols[i])
|
681
|
+
|
682
|
+
if len(missing_features) > 0:
|
683
|
+
raise ValueError(
|
684
|
+
"The feature names should match with those that were passed during fit.\n"
|
685
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
686
|
+
f"Features in the input dataframe : {input_cols}\n"
|
687
|
+
)
|
688
|
+
input_df = dataset[columns_to_select]
|
689
|
+
input_df.columns = features_required_by_estimator
|
679
690
|
|
680
691
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
681
692
|
input_df
|
@@ -691,26 +691,37 @@ class BaggingClassifier(BaseTransformer):
|
|
691
691
|
# input cols need to match unquoted / quoted
|
692
692
|
input_cols = self.input_cols
|
693
693
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
694
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
694
695
|
|
695
696
|
estimator = self._sklearn_object
|
696
697
|
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
698
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
699
|
+
missing_features = []
|
700
|
+
features_in_dataset = set(dataset.columns)
|
701
|
+
columns_to_select = []
|
702
|
+
for i, f in enumerate(features_required_by_estimator):
|
703
|
+
if (
|
704
|
+
i >= len(input_cols)
|
705
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
706
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
707
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
708
|
+
):
|
709
|
+
missing_features.append(f)
|
710
|
+
elif input_cols[i] in features_in_dataset:
|
711
|
+
columns_to_select.append(input_cols[i])
|
712
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
713
|
+
columns_to_select.append(unquoted_input_cols[i])
|
714
|
+
else:
|
715
|
+
columns_to_select.append(quoted_input_cols[i])
|
716
|
+
|
717
|
+
if len(missing_features) > 0:
|
718
|
+
raise ValueError(
|
719
|
+
"The feature names should match with those that were passed during fit.\n"
|
720
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
721
|
+
f"Features in the input dataframe : {input_cols}\n"
|
722
|
+
)
|
723
|
+
input_df = dataset[columns_to_select]
|
724
|
+
input_df.columns = features_required_by_estimator
|
714
725
|
|
715
726
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
716
727
|
input_df
|
@@ -691,26 +691,37 @@ class BaggingRegressor(BaseTransformer):
|
|
691
691
|
# input cols need to match unquoted / quoted
|
692
692
|
input_cols = self.input_cols
|
693
693
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
694
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
694
695
|
|
695
696
|
estimator = self._sklearn_object
|
696
697
|
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
698
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
699
|
+
missing_features = []
|
700
|
+
features_in_dataset = set(dataset.columns)
|
701
|
+
columns_to_select = []
|
702
|
+
for i, f in enumerate(features_required_by_estimator):
|
703
|
+
if (
|
704
|
+
i >= len(input_cols)
|
705
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
706
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
707
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
708
|
+
):
|
709
|
+
missing_features.append(f)
|
710
|
+
elif input_cols[i] in features_in_dataset:
|
711
|
+
columns_to_select.append(input_cols[i])
|
712
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
713
|
+
columns_to_select.append(unquoted_input_cols[i])
|
714
|
+
else:
|
715
|
+
columns_to_select.append(quoted_input_cols[i])
|
716
|
+
|
717
|
+
if len(missing_features) > 0:
|
718
|
+
raise ValueError(
|
719
|
+
"The feature names should match with those that were passed during fit.\n"
|
720
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
721
|
+
f"Features in the input dataframe : {input_cols}\n"
|
722
|
+
)
|
723
|
+
input_df = dataset[columns_to_select]
|
724
|
+
input_df.columns = features_required_by_estimator
|
714
725
|
|
715
726
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
716
727
|
input_df
|
@@ -793,26 +793,37 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
793
793
|
# input cols need to match unquoted / quoted
|
794
794
|
input_cols = self.input_cols
|
795
795
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
796
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
796
797
|
|
797
798
|
estimator = self._sklearn_object
|
798
799
|
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
800
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
801
|
+
missing_features = []
|
802
|
+
features_in_dataset = set(dataset.columns)
|
803
|
+
columns_to_select = []
|
804
|
+
for i, f in enumerate(features_required_by_estimator):
|
805
|
+
if (
|
806
|
+
i >= len(input_cols)
|
807
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
808
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
809
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
810
|
+
):
|
811
|
+
missing_features.append(f)
|
812
|
+
elif input_cols[i] in features_in_dataset:
|
813
|
+
columns_to_select.append(input_cols[i])
|
814
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
815
|
+
columns_to_select.append(unquoted_input_cols[i])
|
816
|
+
else:
|
817
|
+
columns_to_select.append(quoted_input_cols[i])
|
818
|
+
|
819
|
+
if len(missing_features) > 0:
|
820
|
+
raise ValueError(
|
821
|
+
"The feature names should match with those that were passed during fit.\n"
|
822
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
823
|
+
f"Features in the input dataframe : {input_cols}\n"
|
824
|
+
)
|
825
|
+
input_df = dataset[columns_to_select]
|
826
|
+
input_df.columns = features_required_by_estimator
|
816
827
|
|
817
828
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
818
829
|
input_df
|
@@ -772,26 +772,37 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
772
772
|
# input cols need to match unquoted / quoted
|
773
773
|
input_cols = self.input_cols
|
774
774
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
775
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
775
776
|
|
776
777
|
estimator = self._sklearn_object
|
777
778
|
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
779
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
780
|
+
missing_features = []
|
781
|
+
features_in_dataset = set(dataset.columns)
|
782
|
+
columns_to_select = []
|
783
|
+
for i, f in enumerate(features_required_by_estimator):
|
784
|
+
if (
|
785
|
+
i >= len(input_cols)
|
786
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
787
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
788
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
789
|
+
):
|
790
|
+
missing_features.append(f)
|
791
|
+
elif input_cols[i] in features_in_dataset:
|
792
|
+
columns_to_select.append(input_cols[i])
|
793
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
794
|
+
columns_to_select.append(unquoted_input_cols[i])
|
795
|
+
else:
|
796
|
+
columns_to_select.append(quoted_input_cols[i])
|
797
|
+
|
798
|
+
if len(missing_features) > 0:
|
799
|
+
raise ValueError(
|
800
|
+
"The feature names should match with those that were passed during fit.\n"
|
801
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
802
|
+
f"Features in the input dataframe : {input_cols}\n"
|
803
|
+
)
|
804
|
+
input_df = dataset[columns_to_select]
|
805
|
+
input_df.columns = features_required_by_estimator
|
795
806
|
|
796
807
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
797
808
|
input_df
|
@@ -807,26 +807,37 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
807
807
|
# input cols need to match unquoted / quoted
|
808
808
|
input_cols = self.input_cols
|
809
809
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
810
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
810
811
|
|
811
812
|
estimator = self._sklearn_object
|
812
813
|
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
814
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
815
|
+
missing_features = []
|
816
|
+
features_in_dataset = set(dataset.columns)
|
817
|
+
columns_to_select = []
|
818
|
+
for i, f in enumerate(features_required_by_estimator):
|
819
|
+
if (
|
820
|
+
i >= len(input_cols)
|
821
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
822
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
823
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
824
|
+
):
|
825
|
+
missing_features.append(f)
|
826
|
+
elif input_cols[i] in features_in_dataset:
|
827
|
+
columns_to_select.append(input_cols[i])
|
828
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
829
|
+
columns_to_select.append(unquoted_input_cols[i])
|
830
|
+
else:
|
831
|
+
columns_to_select.append(quoted_input_cols[i])
|
832
|
+
|
833
|
+
if len(missing_features) > 0:
|
834
|
+
raise ValueError(
|
835
|
+
"The feature names should match with those that were passed during fit.\n"
|
836
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
837
|
+
f"Features in the input dataframe : {input_cols}\n"
|
838
|
+
)
|
839
|
+
input_df = dataset[columns_to_select]
|
840
|
+
input_df.columns = features_required_by_estimator
|
830
841
|
|
831
842
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
832
843
|
input_df
|
@@ -816,26 +816,37 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
816
816
|
# input cols need to match unquoted / quoted
|
817
817
|
input_cols = self.input_cols
|
818
818
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
819
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
819
820
|
|
820
821
|
estimator = self._sklearn_object
|
821
822
|
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
823
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
824
|
+
missing_features = []
|
825
|
+
features_in_dataset = set(dataset.columns)
|
826
|
+
columns_to_select = []
|
827
|
+
for i, f in enumerate(features_required_by_estimator):
|
828
|
+
if (
|
829
|
+
i >= len(input_cols)
|
830
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
831
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
832
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
833
|
+
):
|
834
|
+
missing_features.append(f)
|
835
|
+
elif input_cols[i] in features_in_dataset:
|
836
|
+
columns_to_select.append(input_cols[i])
|
837
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
838
|
+
columns_to_select.append(unquoted_input_cols[i])
|
839
|
+
else:
|
840
|
+
columns_to_select.append(quoted_input_cols[i])
|
841
|
+
|
842
|
+
if len(missing_features) > 0:
|
843
|
+
raise ValueError(
|
844
|
+
"The feature names should match with those that were passed during fit.\n"
|
845
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
846
|
+
f"Features in the input dataframe : {input_cols}\n"
|
847
|
+
)
|
848
|
+
input_df = dataset[columns_to_select]
|
849
|
+
input_df.columns = features_required_by_estimator
|
839
850
|
|
840
851
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
841
852
|
input_df
|
@@ -785,26 +785,37 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
785
785
|
# input cols need to match unquoted / quoted
|
786
786
|
input_cols = self.input_cols
|
787
787
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
788
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
788
789
|
|
789
790
|
estimator = self._sklearn_object
|
790
791
|
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
792
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
793
|
+
missing_features = []
|
794
|
+
features_in_dataset = set(dataset.columns)
|
795
|
+
columns_to_select = []
|
796
|
+
for i, f in enumerate(features_required_by_estimator):
|
797
|
+
if (
|
798
|
+
i >= len(input_cols)
|
799
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
800
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
801
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
802
|
+
):
|
803
|
+
missing_features.append(f)
|
804
|
+
elif input_cols[i] in features_in_dataset:
|
805
|
+
columns_to_select.append(input_cols[i])
|
806
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
807
|
+
columns_to_select.append(unquoted_input_cols[i])
|
808
|
+
else:
|
809
|
+
columns_to_select.append(quoted_input_cols[i])
|
810
|
+
|
811
|
+
if len(missing_features) > 0:
|
812
|
+
raise ValueError(
|
813
|
+
"The feature names should match with those that were passed during fit.\n"
|
814
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
815
|
+
f"Features in the input dataframe : {input_cols}\n"
|
816
|
+
)
|
817
|
+
input_df = dataset[columns_to_select]
|
818
|
+
input_df.columns = features_required_by_estimator
|
808
819
|
|
809
820
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
810
821
|
input_df
|