snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +8 -35
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/sklearn.py +1 -0
- snowflake/ml/model/_handlers/xgboost.py +1 -1
- snowflake/ml/model/_model.py +24 -19
- snowflake/ml/model/_model_meta.py +24 -15
- snowflake/ml/model/type_hints.py +5 -11
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
- snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
- snowflake/ml/modeling/cluster/birch.py +28 -17
- snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
- snowflake/ml/modeling/cluster/dbscan.py +28 -17
- snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
- snowflake/ml/modeling/cluster/k_means.py +28 -17
- snowflake/ml/modeling/cluster/mean_shift.py +28 -17
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
- snowflake/ml/modeling/cluster/optics.py +28 -17
- snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
- snowflake/ml/modeling/compose/column_transformer.py +28 -17
- snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
- snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
- snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
- snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
- snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
- snowflake/ml/modeling/covariance/oas.py +28 -17
- snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
- snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
- snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
- snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
- snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/pca.py +28 -17
- snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
- snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
- snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
- snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
- snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
- snowflake/ml/modeling/impute/knn_imputer.py +28 -17
- snowflake/ml/modeling/impute/missing_indicator.py +28 -17
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/lars.py +28 -17
- snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
- snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/perceptron.py +28 -17
- snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ridge.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
- snowflake/ml/modeling/manifold/isomap.py +28 -17
- snowflake/ml/modeling/manifold/mds.py +28 -17
- snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
- snowflake/ml/modeling/manifold/tsne.py +28 -17
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
- snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
- snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
- snowflake/ml/modeling/svm/linear_svc.py +28 -17
- snowflake/ml/modeling/svm/linear_svr.py +28 -17
- snowflake/ml/modeling/svm/nu_svc.py +28 -17
- snowflake/ml/modeling/svm/nu_svr.py +28 -17
- snowflake/ml/modeling/svm/svc.py +28 -17
- snowflake/ml/modeling/svm/svr.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
- snowflake/ml/registry/model_registry.py +49 -65
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
- snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -775,26 +775,37 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
775
775
|
# input cols need to match unquoted / quoted
|
776
776
|
input_cols = self.input_cols
|
777
777
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
778
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
778
779
|
|
779
780
|
estimator = self._sklearn_object
|
780
781
|
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
782
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
783
|
+
missing_features = []
|
784
|
+
features_in_dataset = set(dataset.columns)
|
785
|
+
columns_to_select = []
|
786
|
+
for i, f in enumerate(features_required_by_estimator):
|
787
|
+
if (
|
788
|
+
i >= len(input_cols)
|
789
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
790
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
791
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
792
|
+
):
|
793
|
+
missing_features.append(f)
|
794
|
+
elif input_cols[i] in features_in_dataset:
|
795
|
+
columns_to_select.append(input_cols[i])
|
796
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
797
|
+
columns_to_select.append(unquoted_input_cols[i])
|
798
|
+
else:
|
799
|
+
columns_to_select.append(quoted_input_cols[i])
|
800
|
+
|
801
|
+
if len(missing_features) > 0:
|
802
|
+
raise ValueError(
|
803
|
+
"The feature names should match with those that were passed during fit.\n"
|
804
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
805
|
+
f"Features in the input dataframe : {input_cols}\n"
|
806
|
+
)
|
807
|
+
input_df = dataset[columns_to_select]
|
808
|
+
input_df.columns = features_required_by_estimator
|
798
809
|
|
799
810
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
800
811
|
input_df
|
@@ -680,26 +680,37 @@ class IsolationForest(BaseTransformer):
|
|
680
680
|
# input cols need to match unquoted / quoted
|
681
681
|
input_cols = self.input_cols
|
682
682
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
683
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
683
684
|
|
684
685
|
estimator = self._sklearn_object
|
685
686
|
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
687
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
688
|
+
missing_features = []
|
689
|
+
features_in_dataset = set(dataset.columns)
|
690
|
+
columns_to_select = []
|
691
|
+
for i, f in enumerate(features_required_by_estimator):
|
692
|
+
if (
|
693
|
+
i >= len(input_cols)
|
694
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
695
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
696
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
697
|
+
):
|
698
|
+
missing_features.append(f)
|
699
|
+
elif input_cols[i] in features_in_dataset:
|
700
|
+
columns_to_select.append(input_cols[i])
|
701
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
702
|
+
columns_to_select.append(unquoted_input_cols[i])
|
703
|
+
else:
|
704
|
+
columns_to_select.append(quoted_input_cols[i])
|
705
|
+
|
706
|
+
if len(missing_features) > 0:
|
707
|
+
raise ValueError(
|
708
|
+
"The feature names should match with those that were passed during fit.\n"
|
709
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
710
|
+
f"Features in the input dataframe : {input_cols}\n"
|
711
|
+
)
|
712
|
+
input_df = dataset[columns_to_select]
|
713
|
+
input_df.columns = features_required_by_estimator
|
703
714
|
|
704
715
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
705
716
|
input_df
|
@@ -789,26 +789,37 @@ class RandomForestClassifier(BaseTransformer):
|
|
789
789
|
# input cols need to match unquoted / quoted
|
790
790
|
input_cols = self.input_cols
|
791
791
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
792
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
792
793
|
|
793
794
|
estimator = self._sklearn_object
|
794
795
|
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
796
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
797
|
+
missing_features = []
|
798
|
+
features_in_dataset = set(dataset.columns)
|
799
|
+
columns_to_select = []
|
800
|
+
for i, f in enumerate(features_required_by_estimator):
|
801
|
+
if (
|
802
|
+
i >= len(input_cols)
|
803
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
804
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
805
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
806
|
+
):
|
807
|
+
missing_features.append(f)
|
808
|
+
elif input_cols[i] in features_in_dataset:
|
809
|
+
columns_to_select.append(input_cols[i])
|
810
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
811
|
+
columns_to_select.append(unquoted_input_cols[i])
|
812
|
+
else:
|
813
|
+
columns_to_select.append(quoted_input_cols[i])
|
814
|
+
|
815
|
+
if len(missing_features) > 0:
|
816
|
+
raise ValueError(
|
817
|
+
"The feature names should match with those that were passed during fit.\n"
|
818
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
819
|
+
f"Features in the input dataframe : {input_cols}\n"
|
820
|
+
)
|
821
|
+
input_df = dataset[columns_to_select]
|
822
|
+
input_df.columns = features_required_by_estimator
|
812
823
|
|
813
824
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
814
825
|
input_df
|
@@ -768,26 +768,37 @@ class RandomForestRegressor(BaseTransformer):
|
|
768
768
|
# input cols need to match unquoted / quoted
|
769
769
|
input_cols = self.input_cols
|
770
770
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
771
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
771
772
|
|
772
773
|
estimator = self._sklearn_object
|
773
774
|
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
775
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
776
|
+
missing_features = []
|
777
|
+
features_in_dataset = set(dataset.columns)
|
778
|
+
columns_to_select = []
|
779
|
+
for i, f in enumerate(features_required_by_estimator):
|
780
|
+
if (
|
781
|
+
i >= len(input_cols)
|
782
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
783
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
784
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
785
|
+
):
|
786
|
+
missing_features.append(f)
|
787
|
+
elif input_cols[i] in features_in_dataset:
|
788
|
+
columns_to_select.append(input_cols[i])
|
789
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
790
|
+
columns_to_select.append(unquoted_input_cols[i])
|
791
|
+
else:
|
792
|
+
columns_to_select.append(quoted_input_cols[i])
|
793
|
+
|
794
|
+
if len(missing_features) > 0:
|
795
|
+
raise ValueError(
|
796
|
+
"The feature names should match with those that were passed during fit.\n"
|
797
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
798
|
+
f"Features in the input dataframe : {input_cols}\n"
|
799
|
+
)
|
800
|
+
input_df = dataset[columns_to_select]
|
801
|
+
input_df.columns = features_required_by_estimator
|
791
802
|
|
792
803
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
793
804
|
input_df
|
@@ -670,26 +670,37 @@ class StackingRegressor(BaseTransformer):
|
|
670
670
|
# input cols need to match unquoted / quoted
|
671
671
|
input_cols = self.input_cols
|
672
672
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
673
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
673
674
|
|
674
675
|
estimator = self._sklearn_object
|
675
676
|
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
677
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
678
|
+
missing_features = []
|
679
|
+
features_in_dataset = set(dataset.columns)
|
680
|
+
columns_to_select = []
|
681
|
+
for i, f in enumerate(features_required_by_estimator):
|
682
|
+
if (
|
683
|
+
i >= len(input_cols)
|
684
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
685
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
686
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
687
|
+
):
|
688
|
+
missing_features.append(f)
|
689
|
+
elif input_cols[i] in features_in_dataset:
|
690
|
+
columns_to_select.append(input_cols[i])
|
691
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
692
|
+
columns_to_select.append(unquoted_input_cols[i])
|
693
|
+
else:
|
694
|
+
columns_to_select.append(quoted_input_cols[i])
|
695
|
+
|
696
|
+
if len(missing_features) > 0:
|
697
|
+
raise ValueError(
|
698
|
+
"The feature names should match with those that were passed during fit.\n"
|
699
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
700
|
+
f"Features in the input dataframe : {input_cols}\n"
|
701
|
+
)
|
702
|
+
input_df = dataset[columns_to_select]
|
703
|
+
input_df.columns = features_required_by_estimator
|
693
704
|
|
694
705
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
695
706
|
input_df
|
@@ -652,26 +652,37 @@ class VotingClassifier(BaseTransformer):
|
|
652
652
|
# input cols need to match unquoted / quoted
|
653
653
|
input_cols = self.input_cols
|
654
654
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
655
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
655
656
|
|
656
657
|
estimator = self._sklearn_object
|
657
658
|
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
659
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
660
|
+
missing_features = []
|
661
|
+
features_in_dataset = set(dataset.columns)
|
662
|
+
columns_to_select = []
|
663
|
+
for i, f in enumerate(features_required_by_estimator):
|
664
|
+
if (
|
665
|
+
i >= len(input_cols)
|
666
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
667
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
668
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
669
|
+
):
|
670
|
+
missing_features.append(f)
|
671
|
+
elif input_cols[i] in features_in_dataset:
|
672
|
+
columns_to_select.append(input_cols[i])
|
673
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
674
|
+
columns_to_select.append(unquoted_input_cols[i])
|
675
|
+
else:
|
676
|
+
columns_to_select.append(quoted_input_cols[i])
|
677
|
+
|
678
|
+
if len(missing_features) > 0:
|
679
|
+
raise ValueError(
|
680
|
+
"The feature names should match with those that were passed during fit.\n"
|
681
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
682
|
+
f"Features in the input dataframe : {input_cols}\n"
|
683
|
+
)
|
684
|
+
input_df = dataset[columns_to_select]
|
685
|
+
input_df.columns = features_required_by_estimator
|
675
686
|
|
676
687
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
677
688
|
input_df
|
@@ -634,26 +634,37 @@ class VotingRegressor(BaseTransformer):
|
|
634
634
|
# input cols need to match unquoted / quoted
|
635
635
|
input_cols = self.input_cols
|
636
636
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
637
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
637
638
|
|
638
639
|
estimator = self._sklearn_object
|
639
640
|
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
641
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
642
|
+
missing_features = []
|
643
|
+
features_in_dataset = set(dataset.columns)
|
644
|
+
columns_to_select = []
|
645
|
+
for i, f in enumerate(features_required_by_estimator):
|
646
|
+
if (
|
647
|
+
i >= len(input_cols)
|
648
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
649
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
650
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
651
|
+
):
|
652
|
+
missing_features.append(f)
|
653
|
+
elif input_cols[i] in features_in_dataset:
|
654
|
+
columns_to_select.append(input_cols[i])
|
655
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
656
|
+
columns_to_select.append(unquoted_input_cols[i])
|
657
|
+
else:
|
658
|
+
columns_to_select.append(quoted_input_cols[i])
|
659
|
+
|
660
|
+
if len(missing_features) > 0:
|
661
|
+
raise ValueError(
|
662
|
+
"The feature names should match with those that were passed during fit.\n"
|
663
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
664
|
+
f"Features in the input dataframe : {input_cols}\n"
|
665
|
+
)
|
666
|
+
input_df = dataset[columns_to_select]
|
667
|
+
input_df.columns = features_required_by_estimator
|
657
668
|
|
658
669
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
659
670
|
input_df
|
@@ -624,26 +624,37 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
624
624
|
# input cols need to match unquoted / quoted
|
625
625
|
input_cols = self.input_cols
|
626
626
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
627
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
627
628
|
|
628
629
|
estimator = self._sklearn_object
|
629
630
|
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
631
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
632
|
+
missing_features = []
|
633
|
+
features_in_dataset = set(dataset.columns)
|
634
|
+
columns_to_select = []
|
635
|
+
for i, f in enumerate(features_required_by_estimator):
|
636
|
+
if (
|
637
|
+
i >= len(input_cols)
|
638
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
639
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
640
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
641
|
+
):
|
642
|
+
missing_features.append(f)
|
643
|
+
elif input_cols[i] in features_in_dataset:
|
644
|
+
columns_to_select.append(input_cols[i])
|
645
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
646
|
+
columns_to_select.append(unquoted_input_cols[i])
|
647
|
+
else:
|
648
|
+
columns_to_select.append(quoted_input_cols[i])
|
649
|
+
|
650
|
+
if len(missing_features) > 0:
|
651
|
+
raise ValueError(
|
652
|
+
"The feature names should match with those that were passed during fit.\n"
|
653
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
654
|
+
f"Features in the input dataframe : {input_cols}\n"
|
655
|
+
)
|
656
|
+
input_df = dataset[columns_to_select]
|
657
|
+
input_df.columns = features_required_by_estimator
|
647
658
|
|
648
659
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
649
660
|
input_df
|
@@ -620,26 +620,37 @@ class SelectFdr(BaseTransformer):
|
|
620
620
|
# input cols need to match unquoted / quoted
|
621
621
|
input_cols = self.input_cols
|
622
622
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
623
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
623
624
|
|
624
625
|
estimator = self._sklearn_object
|
625
626
|
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
627
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
628
|
+
missing_features = []
|
629
|
+
features_in_dataset = set(dataset.columns)
|
630
|
+
columns_to_select = []
|
631
|
+
for i, f in enumerate(features_required_by_estimator):
|
632
|
+
if (
|
633
|
+
i >= len(input_cols)
|
634
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
635
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
636
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
637
|
+
):
|
638
|
+
missing_features.append(f)
|
639
|
+
elif input_cols[i] in features_in_dataset:
|
640
|
+
columns_to_select.append(input_cols[i])
|
641
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
642
|
+
columns_to_select.append(unquoted_input_cols[i])
|
643
|
+
else:
|
644
|
+
columns_to_select.append(quoted_input_cols[i])
|
645
|
+
|
646
|
+
if len(missing_features) > 0:
|
647
|
+
raise ValueError(
|
648
|
+
"The feature names should match with those that were passed during fit.\n"
|
649
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
650
|
+
f"Features in the input dataframe : {input_cols}\n"
|
651
|
+
)
|
652
|
+
input_df = dataset[columns_to_select]
|
653
|
+
input_df.columns = features_required_by_estimator
|
643
654
|
|
644
655
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
645
656
|
input_df
|
@@ -620,26 +620,37 @@ class SelectFpr(BaseTransformer):
|
|
620
620
|
# input cols need to match unquoted / quoted
|
621
621
|
input_cols = self.input_cols
|
622
622
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
623
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
623
624
|
|
624
625
|
estimator = self._sklearn_object
|
625
626
|
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
627
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
628
|
+
missing_features = []
|
629
|
+
features_in_dataset = set(dataset.columns)
|
630
|
+
columns_to_select = []
|
631
|
+
for i, f in enumerate(features_required_by_estimator):
|
632
|
+
if (
|
633
|
+
i >= len(input_cols)
|
634
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
635
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
636
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
637
|
+
):
|
638
|
+
missing_features.append(f)
|
639
|
+
elif input_cols[i] in features_in_dataset:
|
640
|
+
columns_to_select.append(input_cols[i])
|
641
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
642
|
+
columns_to_select.append(unquoted_input_cols[i])
|
643
|
+
else:
|
644
|
+
columns_to_select.append(quoted_input_cols[i])
|
645
|
+
|
646
|
+
if len(missing_features) > 0:
|
647
|
+
raise ValueError(
|
648
|
+
"The feature names should match with those that were passed during fit.\n"
|
649
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
650
|
+
f"Features in the input dataframe : {input_cols}\n"
|
651
|
+
)
|
652
|
+
input_df = dataset[columns_to_select]
|
653
|
+
input_df.columns = features_required_by_estimator
|
643
654
|
|
644
655
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
645
656
|
input_df
|
@@ -620,26 +620,37 @@ class SelectFwe(BaseTransformer):
|
|
620
620
|
# input cols need to match unquoted / quoted
|
621
621
|
input_cols = self.input_cols
|
622
622
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
623
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
623
624
|
|
624
625
|
estimator = self._sklearn_object
|
625
626
|
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
627
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
628
|
+
missing_features = []
|
629
|
+
features_in_dataset = set(dataset.columns)
|
630
|
+
columns_to_select = []
|
631
|
+
for i, f in enumerate(features_required_by_estimator):
|
632
|
+
if (
|
633
|
+
i >= len(input_cols)
|
634
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
635
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
636
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
637
|
+
):
|
638
|
+
missing_features.append(f)
|
639
|
+
elif input_cols[i] in features_in_dataset:
|
640
|
+
columns_to_select.append(input_cols[i])
|
641
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
642
|
+
columns_to_select.append(unquoted_input_cols[i])
|
643
|
+
else:
|
644
|
+
columns_to_select.append(quoted_input_cols[i])
|
645
|
+
|
646
|
+
if len(missing_features) > 0:
|
647
|
+
raise ValueError(
|
648
|
+
"The feature names should match with those that were passed during fit.\n"
|
649
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
650
|
+
f"Features in the input dataframe : {input_cols}\n"
|
651
|
+
)
|
652
|
+
input_df = dataset[columns_to_select]
|
653
|
+
input_df.columns = features_required_by_estimator
|
643
654
|
|
644
655
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
645
656
|
input_df
|