snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +8 -35
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/sklearn.py +1 -0
- snowflake/ml/model/_handlers/xgboost.py +1 -1
- snowflake/ml/model/_model.py +24 -19
- snowflake/ml/model/_model_meta.py +24 -15
- snowflake/ml/model/type_hints.py +5 -11
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
- snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
- snowflake/ml/modeling/cluster/birch.py +28 -17
- snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
- snowflake/ml/modeling/cluster/dbscan.py +28 -17
- snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
- snowflake/ml/modeling/cluster/k_means.py +28 -17
- snowflake/ml/modeling/cluster/mean_shift.py +28 -17
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
- snowflake/ml/modeling/cluster/optics.py +28 -17
- snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
- snowflake/ml/modeling/compose/column_transformer.py +28 -17
- snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
- snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
- snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
- snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
- snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
- snowflake/ml/modeling/covariance/oas.py +28 -17
- snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
- snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
- snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
- snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
- snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/pca.py +28 -17
- snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
- snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
- snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
- snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
- snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
- snowflake/ml/modeling/impute/knn_imputer.py +28 -17
- snowflake/ml/modeling/impute/missing_indicator.py +28 -17
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/lars.py +28 -17
- snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
- snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/perceptron.py +28 -17
- snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ridge.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
- snowflake/ml/modeling/manifold/isomap.py +28 -17
- snowflake/ml/modeling/manifold/mds.py +28 -17
- snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
- snowflake/ml/modeling/manifold/tsne.py +28 -17
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
- snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
- snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
- snowflake/ml/modeling/svm/linear_svc.py +28 -17
- snowflake/ml/modeling/svm/linear_svr.py +28 -17
- snowflake/ml/modeling/svm/nu_svc.py +28 -17
- snowflake/ml/modeling/svm/nu_svr.py +28 -17
- snowflake/ml/modeling/svm/svc.py +28 -17
- snowflake/ml/modeling/svm/svr.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
- snowflake/ml/registry/model_registry.py +49 -65
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
- snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -688,26 +688,37 @@ class KNeighborsClassifier(BaseTransformer):
|
|
688
688
|
# input cols need to match unquoted / quoted
|
689
689
|
input_cols = self.input_cols
|
690
690
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
691
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
691
692
|
|
692
693
|
estimator = self._sklearn_object
|
693
694
|
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
695
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
696
|
+
missing_features = []
|
697
|
+
features_in_dataset = set(dataset.columns)
|
698
|
+
columns_to_select = []
|
699
|
+
for i, f in enumerate(features_required_by_estimator):
|
700
|
+
if (
|
701
|
+
i >= len(input_cols)
|
702
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
703
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
704
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
705
|
+
):
|
706
|
+
missing_features.append(f)
|
707
|
+
elif input_cols[i] in features_in_dataset:
|
708
|
+
columns_to_select.append(input_cols[i])
|
709
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
710
|
+
columns_to_select.append(unquoted_input_cols[i])
|
711
|
+
else:
|
712
|
+
columns_to_select.append(quoted_input_cols[i])
|
713
|
+
|
714
|
+
if len(missing_features) > 0:
|
715
|
+
raise ValueError(
|
716
|
+
"The feature names should match with those that were passed during fit.\n"
|
717
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
718
|
+
f"Features in the input dataframe : {input_cols}\n"
|
719
|
+
)
|
720
|
+
input_df = dataset[columns_to_select]
|
721
|
+
input_df.columns = features_required_by_estimator
|
711
722
|
|
712
723
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
713
724
|
input_df
|
@@ -690,26 +690,37 @@ class KNeighborsRegressor(BaseTransformer):
|
|
690
690
|
# input cols need to match unquoted / quoted
|
691
691
|
input_cols = self.input_cols
|
692
692
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
693
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
693
694
|
|
694
695
|
estimator = self._sklearn_object
|
695
696
|
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
697
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
698
|
+
missing_features = []
|
699
|
+
features_in_dataset = set(dataset.columns)
|
700
|
+
columns_to_select = []
|
701
|
+
for i, f in enumerate(features_required_by_estimator):
|
702
|
+
if (
|
703
|
+
i >= len(input_cols)
|
704
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
705
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
706
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
707
|
+
):
|
708
|
+
missing_features.append(f)
|
709
|
+
elif input_cols[i] in features_in_dataset:
|
710
|
+
columns_to_select.append(input_cols[i])
|
711
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
712
|
+
columns_to_select.append(unquoted_input_cols[i])
|
713
|
+
else:
|
714
|
+
columns_to_select.append(quoted_input_cols[i])
|
715
|
+
|
716
|
+
if len(missing_features) > 0:
|
717
|
+
raise ValueError(
|
718
|
+
"The feature names should match with those that were passed during fit.\n"
|
719
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
720
|
+
f"Features in the input dataframe : {input_cols}\n"
|
721
|
+
)
|
722
|
+
input_df = dataset[columns_to_select]
|
723
|
+
input_df.columns = features_required_by_estimator
|
713
724
|
|
714
725
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
715
726
|
input_df
|
@@ -669,26 +669,37 @@ class KernelDensity(BaseTransformer):
|
|
669
669
|
# input cols need to match unquoted / quoted
|
670
670
|
input_cols = self.input_cols
|
671
671
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
672
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
672
673
|
|
673
674
|
estimator = self._sklearn_object
|
674
675
|
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
676
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
677
|
+
missing_features = []
|
678
|
+
features_in_dataset = set(dataset.columns)
|
679
|
+
columns_to_select = []
|
680
|
+
for i, f in enumerate(features_required_by_estimator):
|
681
|
+
if (
|
682
|
+
i >= len(input_cols)
|
683
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
684
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
685
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
686
|
+
):
|
687
|
+
missing_features.append(f)
|
688
|
+
elif input_cols[i] in features_in_dataset:
|
689
|
+
columns_to_select.append(input_cols[i])
|
690
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
691
|
+
columns_to_select.append(unquoted_input_cols[i])
|
692
|
+
else:
|
693
|
+
columns_to_select.append(quoted_input_cols[i])
|
694
|
+
|
695
|
+
if len(missing_features) > 0:
|
696
|
+
raise ValueError(
|
697
|
+
"The feature names should match with those that were passed during fit.\n"
|
698
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
699
|
+
f"Features in the input dataframe : {input_cols}\n"
|
700
|
+
)
|
701
|
+
input_df = dataset[columns_to_select]
|
702
|
+
input_df.columns = features_required_by_estimator
|
692
703
|
|
693
704
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
694
705
|
input_df
|
@@ -697,26 +697,37 @@ class LocalOutlierFactor(BaseTransformer):
|
|
697
697
|
# input cols need to match unquoted / quoted
|
698
698
|
input_cols = self.input_cols
|
699
699
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
700
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
700
701
|
|
701
702
|
estimator = self._sklearn_object
|
702
703
|
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
704
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
705
|
+
missing_features = []
|
706
|
+
features_in_dataset = set(dataset.columns)
|
707
|
+
columns_to_select = []
|
708
|
+
for i, f in enumerate(features_required_by_estimator):
|
709
|
+
if (
|
710
|
+
i >= len(input_cols)
|
711
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
712
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
713
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
714
|
+
):
|
715
|
+
missing_features.append(f)
|
716
|
+
elif input_cols[i] in features_in_dataset:
|
717
|
+
columns_to_select.append(input_cols[i])
|
718
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
719
|
+
columns_to_select.append(unquoted_input_cols[i])
|
720
|
+
else:
|
721
|
+
columns_to_select.append(quoted_input_cols[i])
|
722
|
+
|
723
|
+
if len(missing_features) > 0:
|
724
|
+
raise ValueError(
|
725
|
+
"The feature names should match with those that were passed during fit.\n"
|
726
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
727
|
+
f"Features in the input dataframe : {input_cols}\n"
|
728
|
+
)
|
729
|
+
input_df = dataset[columns_to_select]
|
730
|
+
input_df.columns = features_required_by_estimator
|
720
731
|
|
721
732
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
722
733
|
input_df
|
@@ -628,26 +628,37 @@ class NearestCentroid(BaseTransformer):
|
|
628
628
|
# input cols need to match unquoted / quoted
|
629
629
|
input_cols = self.input_cols
|
630
630
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
631
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
631
632
|
|
632
633
|
estimator = self._sklearn_object
|
633
634
|
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
635
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
636
|
+
missing_features = []
|
637
|
+
features_in_dataset = set(dataset.columns)
|
638
|
+
columns_to_select = []
|
639
|
+
for i, f in enumerate(features_required_by_estimator):
|
640
|
+
if (
|
641
|
+
i >= len(input_cols)
|
642
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
643
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
644
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
645
|
+
):
|
646
|
+
missing_features.append(f)
|
647
|
+
elif input_cols[i] in features_in_dataset:
|
648
|
+
columns_to_select.append(input_cols[i])
|
649
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
650
|
+
columns_to_select.append(unquoted_input_cols[i])
|
651
|
+
else:
|
652
|
+
columns_to_select.append(quoted_input_cols[i])
|
653
|
+
|
654
|
+
if len(missing_features) > 0:
|
655
|
+
raise ValueError(
|
656
|
+
"The feature names should match with those that were passed during fit.\n"
|
657
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
658
|
+
f"Features in the input dataframe : {input_cols}\n"
|
659
|
+
)
|
660
|
+
input_df = dataset[columns_to_select]
|
661
|
+
input_df.columns = features_required_by_estimator
|
651
662
|
|
652
663
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
653
664
|
input_df
|
@@ -680,26 +680,37 @@ class NearestNeighbors(BaseTransformer):
|
|
680
680
|
# input cols need to match unquoted / quoted
|
681
681
|
input_cols = self.input_cols
|
682
682
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
683
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
683
684
|
|
684
685
|
estimator = self._sklearn_object
|
685
686
|
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
687
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
688
|
+
missing_features = []
|
689
|
+
features_in_dataset = set(dataset.columns)
|
690
|
+
columns_to_select = []
|
691
|
+
for i, f in enumerate(features_required_by_estimator):
|
692
|
+
if (
|
693
|
+
i >= len(input_cols)
|
694
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
695
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
696
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
697
|
+
):
|
698
|
+
missing_features.append(f)
|
699
|
+
elif input_cols[i] in features_in_dataset:
|
700
|
+
columns_to_select.append(input_cols[i])
|
701
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
702
|
+
columns_to_select.append(unquoted_input_cols[i])
|
703
|
+
else:
|
704
|
+
columns_to_select.append(quoted_input_cols[i])
|
705
|
+
|
706
|
+
if len(missing_features) > 0:
|
707
|
+
raise ValueError(
|
708
|
+
"The feature names should match with those that were passed during fit.\n"
|
709
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
710
|
+
f"Features in the input dataframe : {input_cols}\n"
|
711
|
+
)
|
712
|
+
input_df = dataset[columns_to_select]
|
713
|
+
input_df.columns = features_required_by_estimator
|
703
714
|
|
704
715
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
705
716
|
input_df
|
@@ -699,26 +699,37 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
|
|
699
699
|
# input cols need to match unquoted / quoted
|
700
700
|
input_cols = self.input_cols
|
701
701
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
702
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
702
703
|
|
703
704
|
estimator = self._sklearn_object
|
704
705
|
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
706
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
707
|
+
missing_features = []
|
708
|
+
features_in_dataset = set(dataset.columns)
|
709
|
+
columns_to_select = []
|
710
|
+
for i, f in enumerate(features_required_by_estimator):
|
711
|
+
if (
|
712
|
+
i >= len(input_cols)
|
713
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
714
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
715
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
716
|
+
):
|
717
|
+
missing_features.append(f)
|
718
|
+
elif input_cols[i] in features_in_dataset:
|
719
|
+
columns_to_select.append(input_cols[i])
|
720
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
721
|
+
columns_to_select.append(unquoted_input_cols[i])
|
722
|
+
else:
|
723
|
+
columns_to_select.append(quoted_input_cols[i])
|
724
|
+
|
725
|
+
if len(missing_features) > 0:
|
726
|
+
raise ValueError(
|
727
|
+
"The feature names should match with those that were passed during fit.\n"
|
728
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
729
|
+
f"Features in the input dataframe : {input_cols}\n"
|
730
|
+
)
|
731
|
+
input_df = dataset[columns_to_select]
|
732
|
+
input_df.columns = features_required_by_estimator
|
722
733
|
|
723
734
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
724
735
|
input_df
|
@@ -700,26 +700,37 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
700
700
|
# input cols need to match unquoted / quoted
|
701
701
|
input_cols = self.input_cols
|
702
702
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
703
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
703
704
|
|
704
705
|
estimator = self._sklearn_object
|
705
706
|
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
707
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
708
|
+
missing_features = []
|
709
|
+
features_in_dataset = set(dataset.columns)
|
710
|
+
columns_to_select = []
|
711
|
+
for i, f in enumerate(features_required_by_estimator):
|
712
|
+
if (
|
713
|
+
i >= len(input_cols)
|
714
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
715
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
716
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
717
|
+
):
|
718
|
+
missing_features.append(f)
|
719
|
+
elif input_cols[i] in features_in_dataset:
|
720
|
+
columns_to_select.append(input_cols[i])
|
721
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
722
|
+
columns_to_select.append(unquoted_input_cols[i])
|
723
|
+
else:
|
724
|
+
columns_to_select.append(quoted_input_cols[i])
|
725
|
+
|
726
|
+
if len(missing_features) > 0:
|
727
|
+
raise ValueError(
|
728
|
+
"The feature names should match with those that were passed during fit.\n"
|
729
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
730
|
+
f"Features in the input dataframe : {input_cols}\n"
|
731
|
+
)
|
732
|
+
input_df = dataset[columns_to_select]
|
733
|
+
input_df.columns = features_required_by_estimator
|
723
734
|
|
724
735
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
725
736
|
input_df
|
@@ -690,26 +690,37 @@ class RadiusNeighborsRegressor(BaseTransformer):
|
|
690
690
|
# input cols need to match unquoted / quoted
|
691
691
|
input_cols = self.input_cols
|
692
692
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
693
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
693
694
|
|
694
695
|
estimator = self._sklearn_object
|
695
696
|
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
697
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
698
|
+
missing_features = []
|
699
|
+
features_in_dataset = set(dataset.columns)
|
700
|
+
columns_to_select = []
|
701
|
+
for i, f in enumerate(features_required_by_estimator):
|
702
|
+
if (
|
703
|
+
i >= len(input_cols)
|
704
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
705
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
706
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
707
|
+
):
|
708
|
+
missing_features.append(f)
|
709
|
+
elif input_cols[i] in features_in_dataset:
|
710
|
+
columns_to_select.append(input_cols[i])
|
711
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
712
|
+
columns_to_select.append(unquoted_input_cols[i])
|
713
|
+
else:
|
714
|
+
columns_to_select.append(quoted_input_cols[i])
|
715
|
+
|
716
|
+
if len(missing_features) > 0:
|
717
|
+
raise ValueError(
|
718
|
+
"The feature names should match with those that were passed during fit.\n"
|
719
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
720
|
+
f"Features in the input dataframe : {input_cols}\n"
|
721
|
+
)
|
722
|
+
input_df = dataset[columns_to_select]
|
723
|
+
input_df.columns = features_required_by_estimator
|
713
724
|
|
714
725
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
715
726
|
input_df
|
@@ -649,26 +649,37 @@ class BernoulliRBM(BaseTransformer):
|
|
649
649
|
# input cols need to match unquoted / quoted
|
650
650
|
input_cols = self.input_cols
|
651
651
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
652
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
652
653
|
|
653
654
|
estimator = self._sklearn_object
|
654
655
|
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
656
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
657
|
+
missing_features = []
|
658
|
+
features_in_dataset = set(dataset.columns)
|
659
|
+
columns_to_select = []
|
660
|
+
for i, f in enumerate(features_required_by_estimator):
|
661
|
+
if (
|
662
|
+
i >= len(input_cols)
|
663
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
664
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
665
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
666
|
+
):
|
667
|
+
missing_features.append(f)
|
668
|
+
elif input_cols[i] in features_in_dataset:
|
669
|
+
columns_to_select.append(input_cols[i])
|
670
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
671
|
+
columns_to_select.append(unquoted_input_cols[i])
|
672
|
+
else:
|
673
|
+
columns_to_select.append(quoted_input_cols[i])
|
674
|
+
|
675
|
+
if len(missing_features) > 0:
|
676
|
+
raise ValueError(
|
677
|
+
"The feature names should match with those that were passed during fit.\n"
|
678
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
679
|
+
f"Features in the input dataframe : {input_cols}\n"
|
680
|
+
)
|
681
|
+
input_df = dataset[columns_to_select]
|
682
|
+
input_df.columns = features_required_by_estimator
|
672
683
|
|
673
684
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
674
685
|
input_df
|
@@ -802,26 +802,37 @@ class MLPClassifier(BaseTransformer):
|
|
802
802
|
# input cols need to match unquoted / quoted
|
803
803
|
input_cols = self.input_cols
|
804
804
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
805
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
805
806
|
|
806
807
|
estimator = self._sklearn_object
|
807
808
|
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
809
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
810
|
+
missing_features = []
|
811
|
+
features_in_dataset = set(dataset.columns)
|
812
|
+
columns_to_select = []
|
813
|
+
for i, f in enumerate(features_required_by_estimator):
|
814
|
+
if (
|
815
|
+
i >= len(input_cols)
|
816
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
817
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
818
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
819
|
+
):
|
820
|
+
missing_features.append(f)
|
821
|
+
elif input_cols[i] in features_in_dataset:
|
822
|
+
columns_to_select.append(input_cols[i])
|
823
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
824
|
+
columns_to_select.append(unquoted_input_cols[i])
|
825
|
+
else:
|
826
|
+
columns_to_select.append(quoted_input_cols[i])
|
827
|
+
|
828
|
+
if len(missing_features) > 0:
|
829
|
+
raise ValueError(
|
830
|
+
"The feature names should match with those that were passed during fit.\n"
|
831
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
832
|
+
f"Features in the input dataframe : {input_cols}\n"
|
833
|
+
)
|
834
|
+
input_df = dataset[columns_to_select]
|
835
|
+
input_df.columns = features_required_by_estimator
|
825
836
|
|
826
837
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
827
838
|
input_df
|