snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +8 -35
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/sklearn.py +1 -0
- snowflake/ml/model/_handlers/xgboost.py +1 -1
- snowflake/ml/model/_model.py +24 -19
- snowflake/ml/model/_model_meta.py +24 -15
- snowflake/ml/model/type_hints.py +5 -11
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
- snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
- snowflake/ml/modeling/cluster/birch.py +28 -17
- snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
- snowflake/ml/modeling/cluster/dbscan.py +28 -17
- snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
- snowflake/ml/modeling/cluster/k_means.py +28 -17
- snowflake/ml/modeling/cluster/mean_shift.py +28 -17
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
- snowflake/ml/modeling/cluster/optics.py +28 -17
- snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
- snowflake/ml/modeling/compose/column_transformer.py +28 -17
- snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
- snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
- snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
- snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
- snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
- snowflake/ml/modeling/covariance/oas.py +28 -17
- snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
- snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
- snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
- snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
- snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/pca.py +28 -17
- snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
- snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
- snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
- snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
- snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
- snowflake/ml/modeling/impute/knn_imputer.py +28 -17
- snowflake/ml/modeling/impute/missing_indicator.py +28 -17
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/lars.py +28 -17
- snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
- snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/perceptron.py +28 -17
- snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ridge.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
- snowflake/ml/modeling/manifold/isomap.py +28 -17
- snowflake/ml/modeling/manifold/mds.py +28 -17
- snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
- snowflake/ml/modeling/manifold/tsne.py +28 -17
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
- snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
- snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
- snowflake/ml/modeling/svm/linear_svc.py +28 -17
- snowflake/ml/modeling/svm/linear_svr.py +28 -17
- snowflake/ml/modeling/svm/nu_svc.py +28 -17
- snowflake/ml/modeling/svm/nu_svr.py +28 -17
- snowflake/ml/modeling/svm/svc.py +28 -17
- snowflake/ml/modeling/svm/svr.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
- snowflake/ml/registry/model_registry.py +49 -65
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
- snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -715,26 +715,37 @@ class GaussianMixture(BaseTransformer):
|
|
715
715
|
# input cols need to match unquoted / quoted
|
716
716
|
input_cols = self.input_cols
|
717
717
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
718
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
718
719
|
|
719
720
|
estimator = self._sklearn_object
|
720
721
|
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
722
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
723
|
+
missing_features = []
|
724
|
+
features_in_dataset = set(dataset.columns)
|
725
|
+
columns_to_select = []
|
726
|
+
for i, f in enumerate(features_required_by_estimator):
|
727
|
+
if (
|
728
|
+
i >= len(input_cols)
|
729
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
730
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
731
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
732
|
+
):
|
733
|
+
missing_features.append(f)
|
734
|
+
elif input_cols[i] in features_in_dataset:
|
735
|
+
columns_to_select.append(input_cols[i])
|
736
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
737
|
+
columns_to_select.append(unquoted_input_cols[i])
|
738
|
+
else:
|
739
|
+
columns_to_select.append(quoted_input_cols[i])
|
740
|
+
|
741
|
+
if len(missing_features) > 0:
|
742
|
+
raise ValueError(
|
743
|
+
"The feature names should match with those that were passed during fit.\n"
|
744
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
745
|
+
f"Features in the input dataframe : {input_cols}\n"
|
746
|
+
)
|
747
|
+
input_df = dataset[columns_to_select]
|
748
|
+
input_df.columns = features_required_by_estimator
|
738
749
|
|
739
750
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
740
751
|
input_df
|
@@ -751,26 +751,37 @@ class GridSearchCV(BaseTransformer):
|
|
751
751
|
# input cols need to match unquoted / quoted
|
752
752
|
input_cols = self.input_cols
|
753
753
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
754
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
754
755
|
|
755
756
|
estimator = self._sklearn_object
|
756
757
|
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
758
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
759
|
+
missing_features = []
|
760
|
+
features_in_dataset = set(dataset.columns)
|
761
|
+
columns_to_select = []
|
762
|
+
for i, f in enumerate(features_required_by_estimator):
|
763
|
+
if (
|
764
|
+
i >= len(input_cols)
|
765
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
766
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
767
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
768
|
+
):
|
769
|
+
missing_features.append(f)
|
770
|
+
elif input_cols[i] in features_in_dataset:
|
771
|
+
columns_to_select.append(input_cols[i])
|
772
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
773
|
+
columns_to_select.append(unquoted_input_cols[i])
|
774
|
+
else:
|
775
|
+
columns_to_select.append(quoted_input_cols[i])
|
776
|
+
|
777
|
+
if len(missing_features) > 0:
|
778
|
+
raise ValueError(
|
779
|
+
"The feature names should match with those that were passed during fit.\n"
|
780
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
781
|
+
f"Features in the input dataframe : {input_cols}\n"
|
782
|
+
)
|
783
|
+
input_df = dataset[columns_to_select]
|
784
|
+
input_df.columns = features_required_by_estimator
|
774
785
|
|
775
786
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
776
787
|
input_df
|
@@ -766,26 +766,37 @@ class RandomizedSearchCV(BaseTransformer):
|
|
766
766
|
# input cols need to match unquoted / quoted
|
767
767
|
input_cols = self.input_cols
|
768
768
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
769
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
769
770
|
|
770
771
|
estimator = self._sklearn_object
|
771
772
|
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
773
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
774
|
+
missing_features = []
|
775
|
+
features_in_dataset = set(dataset.columns)
|
776
|
+
columns_to_select = []
|
777
|
+
for i, f in enumerate(features_required_by_estimator):
|
778
|
+
if (
|
779
|
+
i >= len(input_cols)
|
780
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
781
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
782
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
783
|
+
):
|
784
|
+
missing_features.append(f)
|
785
|
+
elif input_cols[i] in features_in_dataset:
|
786
|
+
columns_to_select.append(input_cols[i])
|
787
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
788
|
+
columns_to_select.append(unquoted_input_cols[i])
|
789
|
+
else:
|
790
|
+
columns_to_select.append(quoted_input_cols[i])
|
791
|
+
|
792
|
+
if len(missing_features) > 0:
|
793
|
+
raise ValueError(
|
794
|
+
"The feature names should match with those that were passed during fit.\n"
|
795
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
796
|
+
f"Features in the input dataframe : {input_cols}\n"
|
797
|
+
)
|
798
|
+
input_df = dataset[columns_to_select]
|
799
|
+
input_df.columns = features_required_by_estimator
|
789
800
|
|
790
801
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
791
802
|
input_df
|
@@ -625,26 +625,37 @@ class OneVsOneClassifier(BaseTransformer):
|
|
625
625
|
# input cols need to match unquoted / quoted
|
626
626
|
input_cols = self.input_cols
|
627
627
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
628
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
628
629
|
|
629
630
|
estimator = self._sklearn_object
|
630
631
|
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
632
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
633
|
+
missing_features = []
|
634
|
+
features_in_dataset = set(dataset.columns)
|
635
|
+
columns_to_select = []
|
636
|
+
for i, f in enumerate(features_required_by_estimator):
|
637
|
+
if (
|
638
|
+
i >= len(input_cols)
|
639
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
640
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
641
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
642
|
+
):
|
643
|
+
missing_features.append(f)
|
644
|
+
elif input_cols[i] in features_in_dataset:
|
645
|
+
columns_to_select.append(input_cols[i])
|
646
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
647
|
+
columns_to_select.append(unquoted_input_cols[i])
|
648
|
+
else:
|
649
|
+
columns_to_select.append(quoted_input_cols[i])
|
650
|
+
|
651
|
+
if len(missing_features) > 0:
|
652
|
+
raise ValueError(
|
653
|
+
"The feature names should match with those that were passed during fit.\n"
|
654
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
655
|
+
f"Features in the input dataframe : {input_cols}\n"
|
656
|
+
)
|
657
|
+
input_df = dataset[columns_to_select]
|
658
|
+
input_df.columns = features_required_by_estimator
|
648
659
|
|
649
660
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
650
661
|
input_df
|
@@ -634,26 +634,37 @@ class OneVsRestClassifier(BaseTransformer):
|
|
634
634
|
# input cols need to match unquoted / quoted
|
635
635
|
input_cols = self.input_cols
|
636
636
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
637
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
637
638
|
|
638
639
|
estimator = self._sklearn_object
|
639
640
|
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
641
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
642
|
+
missing_features = []
|
643
|
+
features_in_dataset = set(dataset.columns)
|
644
|
+
columns_to_select = []
|
645
|
+
for i, f in enumerate(features_required_by_estimator):
|
646
|
+
if (
|
647
|
+
i >= len(input_cols)
|
648
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
649
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
650
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
651
|
+
):
|
652
|
+
missing_features.append(f)
|
653
|
+
elif input_cols[i] in features_in_dataset:
|
654
|
+
columns_to_select.append(input_cols[i])
|
655
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
656
|
+
columns_to_select.append(unquoted_input_cols[i])
|
657
|
+
else:
|
658
|
+
columns_to_select.append(quoted_input_cols[i])
|
659
|
+
|
660
|
+
if len(missing_features) > 0:
|
661
|
+
raise ValueError(
|
662
|
+
"The feature names should match with those that were passed during fit.\n"
|
663
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
664
|
+
f"Features in the input dataframe : {input_cols}\n"
|
665
|
+
)
|
666
|
+
input_df = dataset[columns_to_select]
|
667
|
+
input_df.columns = features_required_by_estimator
|
657
668
|
|
658
669
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
659
670
|
input_df
|
@@ -637,26 +637,37 @@ class OutputCodeClassifier(BaseTransformer):
|
|
637
637
|
# input cols need to match unquoted / quoted
|
638
638
|
input_cols = self.input_cols
|
639
639
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
640
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
640
641
|
|
641
642
|
estimator = self._sklearn_object
|
642
643
|
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
644
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
645
|
+
missing_features = []
|
646
|
+
features_in_dataset = set(dataset.columns)
|
647
|
+
columns_to_select = []
|
648
|
+
for i, f in enumerate(features_required_by_estimator):
|
649
|
+
if (
|
650
|
+
i >= len(input_cols)
|
651
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
652
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
653
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
654
|
+
):
|
655
|
+
missing_features.append(f)
|
656
|
+
elif input_cols[i] in features_in_dataset:
|
657
|
+
columns_to_select.append(input_cols[i])
|
658
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
659
|
+
columns_to_select.append(unquoted_input_cols[i])
|
660
|
+
else:
|
661
|
+
columns_to_select.append(quoted_input_cols[i])
|
662
|
+
|
663
|
+
if len(missing_features) > 0:
|
664
|
+
raise ValueError(
|
665
|
+
"The feature names should match with those that were passed during fit.\n"
|
666
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
667
|
+
f"Features in the input dataframe : {input_cols}\n"
|
668
|
+
)
|
669
|
+
input_df = dataset[columns_to_select]
|
670
|
+
input_df.columns = features_required_by_estimator
|
660
671
|
|
661
672
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
662
673
|
input_df
|
@@ -637,26 +637,37 @@ class BernoulliNB(BaseTransformer):
|
|
637
637
|
# input cols need to match unquoted / quoted
|
638
638
|
input_cols = self.input_cols
|
639
639
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
640
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
640
641
|
|
641
642
|
estimator = self._sklearn_object
|
642
643
|
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
644
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
645
|
+
missing_features = []
|
646
|
+
features_in_dataset = set(dataset.columns)
|
647
|
+
columns_to_select = []
|
648
|
+
for i, f in enumerate(features_required_by_estimator):
|
649
|
+
if (
|
650
|
+
i >= len(input_cols)
|
651
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
652
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
653
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
654
|
+
):
|
655
|
+
missing_features.append(f)
|
656
|
+
elif input_cols[i] in features_in_dataset:
|
657
|
+
columns_to_select.append(input_cols[i])
|
658
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
659
|
+
columns_to_select.append(unquoted_input_cols[i])
|
660
|
+
else:
|
661
|
+
columns_to_select.append(quoted_input_cols[i])
|
662
|
+
|
663
|
+
if len(missing_features) > 0:
|
664
|
+
raise ValueError(
|
665
|
+
"The feature names should match with those that were passed during fit.\n"
|
666
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
667
|
+
f"Features in the input dataframe : {input_cols}\n"
|
668
|
+
)
|
669
|
+
input_df = dataset[columns_to_select]
|
670
|
+
input_df.columns = features_required_by_estimator
|
660
671
|
|
661
672
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
662
673
|
input_df
|
@@ -643,26 +643,37 @@ class CategoricalNB(BaseTransformer):
|
|
643
643
|
# input cols need to match unquoted / quoted
|
644
644
|
input_cols = self.input_cols
|
645
645
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
646
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
646
647
|
|
647
648
|
estimator = self._sklearn_object
|
648
649
|
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
650
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
651
|
+
missing_features = []
|
652
|
+
features_in_dataset = set(dataset.columns)
|
653
|
+
columns_to_select = []
|
654
|
+
for i, f in enumerate(features_required_by_estimator):
|
655
|
+
if (
|
656
|
+
i >= len(input_cols)
|
657
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
658
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
659
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
660
|
+
):
|
661
|
+
missing_features.append(f)
|
662
|
+
elif input_cols[i] in features_in_dataset:
|
663
|
+
columns_to_select.append(input_cols[i])
|
664
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
665
|
+
columns_to_select.append(unquoted_input_cols[i])
|
666
|
+
else:
|
667
|
+
columns_to_select.append(quoted_input_cols[i])
|
668
|
+
|
669
|
+
if len(missing_features) > 0:
|
670
|
+
raise ValueError(
|
671
|
+
"The feature names should match with those that were passed during fit.\n"
|
672
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
673
|
+
f"Features in the input dataframe : {input_cols}\n"
|
674
|
+
)
|
675
|
+
input_df = dataset[columns_to_select]
|
676
|
+
input_df.columns = features_required_by_estimator
|
666
677
|
|
667
678
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
668
679
|
input_df
|
@@ -637,26 +637,37 @@ class ComplementNB(BaseTransformer):
|
|
637
637
|
# input cols need to match unquoted / quoted
|
638
638
|
input_cols = self.input_cols
|
639
639
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
640
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
640
641
|
|
641
642
|
estimator = self._sklearn_object
|
642
643
|
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
644
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
645
|
+
missing_features = []
|
646
|
+
features_in_dataset = set(dataset.columns)
|
647
|
+
columns_to_select = []
|
648
|
+
for i, f in enumerate(features_required_by_estimator):
|
649
|
+
if (
|
650
|
+
i >= len(input_cols)
|
651
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
652
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
653
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
654
|
+
):
|
655
|
+
missing_features.append(f)
|
656
|
+
elif input_cols[i] in features_in_dataset:
|
657
|
+
columns_to_select.append(input_cols[i])
|
658
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
659
|
+
columns_to_select.append(unquoted_input_cols[i])
|
660
|
+
else:
|
661
|
+
columns_to_select.append(quoted_input_cols[i])
|
662
|
+
|
663
|
+
if len(missing_features) > 0:
|
664
|
+
raise ValueError(
|
665
|
+
"The feature names should match with those that were passed during fit.\n"
|
666
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
667
|
+
f"Features in the input dataframe : {input_cols}\n"
|
668
|
+
)
|
669
|
+
input_df = dataset[columns_to_select]
|
670
|
+
input_df.columns = features_required_by_estimator
|
660
671
|
|
661
672
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
662
673
|
input_df
|
@@ -618,26 +618,37 @@ class GaussianNB(BaseTransformer):
|
|
618
618
|
# input cols need to match unquoted / quoted
|
619
619
|
input_cols = self.input_cols
|
620
620
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
621
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
621
622
|
|
622
623
|
estimator = self._sklearn_object
|
623
624
|
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
625
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
626
|
+
missing_features = []
|
627
|
+
features_in_dataset = set(dataset.columns)
|
628
|
+
columns_to_select = []
|
629
|
+
for i, f in enumerate(features_required_by_estimator):
|
630
|
+
if (
|
631
|
+
i >= len(input_cols)
|
632
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
633
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
634
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
635
|
+
):
|
636
|
+
missing_features.append(f)
|
637
|
+
elif input_cols[i] in features_in_dataset:
|
638
|
+
columns_to_select.append(input_cols[i])
|
639
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
640
|
+
columns_to_select.append(unquoted_input_cols[i])
|
641
|
+
else:
|
642
|
+
columns_to_select.append(quoted_input_cols[i])
|
643
|
+
|
644
|
+
if len(missing_features) > 0:
|
645
|
+
raise ValueError(
|
646
|
+
"The feature names should match with those that were passed during fit.\n"
|
647
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
648
|
+
f"Features in the input dataframe : {input_cols}\n"
|
649
|
+
)
|
650
|
+
input_df = dataset[columns_to_select]
|
651
|
+
input_df.columns = features_required_by_estimator
|
641
652
|
|
642
653
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
643
654
|
input_df
|
@@ -631,26 +631,37 @@ class MultinomialNB(BaseTransformer):
|
|
631
631
|
# input cols need to match unquoted / quoted
|
632
632
|
input_cols = self.input_cols
|
633
633
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
634
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
634
635
|
|
635
636
|
estimator = self._sklearn_object
|
636
637
|
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
638
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
639
|
+
missing_features = []
|
640
|
+
features_in_dataset = set(dataset.columns)
|
641
|
+
columns_to_select = []
|
642
|
+
for i, f in enumerate(features_required_by_estimator):
|
643
|
+
if (
|
644
|
+
i >= len(input_cols)
|
645
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
646
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
647
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
648
|
+
):
|
649
|
+
missing_features.append(f)
|
650
|
+
elif input_cols[i] in features_in_dataset:
|
651
|
+
columns_to_select.append(input_cols[i])
|
652
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
653
|
+
columns_to_select.append(unquoted_input_cols[i])
|
654
|
+
else:
|
655
|
+
columns_to_select.append(quoted_input_cols[i])
|
656
|
+
|
657
|
+
if len(missing_features) > 0:
|
658
|
+
raise ValueError(
|
659
|
+
"The feature names should match with those that were passed during fit.\n"
|
660
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
661
|
+
f"Features in the input dataframe : {input_cols}\n"
|
662
|
+
)
|
663
|
+
input_df = dataset[columns_to_select]
|
664
|
+
input_df.columns = features_required_by_estimator
|
654
665
|
|
655
666
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
656
667
|
input_df
|