snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +8 -35
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/sklearn.py +1 -0
- snowflake/ml/model/_handlers/xgboost.py +1 -1
- snowflake/ml/model/_model.py +24 -19
- snowflake/ml/model/_model_meta.py +24 -15
- snowflake/ml/model/type_hints.py +5 -11
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
- snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
- snowflake/ml/modeling/cluster/birch.py +28 -17
- snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
- snowflake/ml/modeling/cluster/dbscan.py +28 -17
- snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
- snowflake/ml/modeling/cluster/k_means.py +28 -17
- snowflake/ml/modeling/cluster/mean_shift.py +28 -17
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
- snowflake/ml/modeling/cluster/optics.py +28 -17
- snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
- snowflake/ml/modeling/compose/column_transformer.py +28 -17
- snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
- snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
- snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
- snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
- snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
- snowflake/ml/modeling/covariance/oas.py +28 -17
- snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
- snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
- snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
- snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
- snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/pca.py +28 -17
- snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
- snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
- snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
- snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
- snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
- snowflake/ml/modeling/impute/knn_imputer.py +28 -17
- snowflake/ml/modeling/impute/missing_indicator.py +28 -17
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/lars.py +28 -17
- snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
- snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/perceptron.py +28 -17
- snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ridge.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
- snowflake/ml/modeling/manifold/isomap.py +28 -17
- snowflake/ml/modeling/manifold/mds.py +28 -17
- snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
- snowflake/ml/modeling/manifold/tsne.py +28 -17
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
- snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
- snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
- snowflake/ml/modeling/svm/linear_svc.py +28 -17
- snowflake/ml/modeling/svm/linear_svr.py +28 -17
- snowflake/ml/modeling/svm/nu_svc.py +28 -17
- snowflake/ml/modeling/svm/nu_svr.py +28 -17
- snowflake/ml/modeling/svm/svc.py +28 -17
- snowflake/ml/modeling/svm/svr.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
- snowflake/ml/registry/model_registry.py +49 -65
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
- snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
snowflake/ml/modeling/svm/svc.py
CHANGED
@@ -717,26 +717,37 @@ class SVC(BaseTransformer):
|
|
717
717
|
# input cols need to match unquoted / quoted
|
718
718
|
input_cols = self.input_cols
|
719
719
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
720
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
720
721
|
|
721
722
|
estimator = self._sklearn_object
|
722
723
|
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
724
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
725
|
+
missing_features = []
|
726
|
+
features_in_dataset = set(dataset.columns)
|
727
|
+
columns_to_select = []
|
728
|
+
for i, f in enumerate(features_required_by_estimator):
|
729
|
+
if (
|
730
|
+
i >= len(input_cols)
|
731
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
732
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
733
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
734
|
+
):
|
735
|
+
missing_features.append(f)
|
736
|
+
elif input_cols[i] in features_in_dataset:
|
737
|
+
columns_to_select.append(input_cols[i])
|
738
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
739
|
+
columns_to_select.append(unquoted_input_cols[i])
|
740
|
+
else:
|
741
|
+
columns_to_select.append(quoted_input_cols[i])
|
742
|
+
|
743
|
+
if len(missing_features) > 0:
|
744
|
+
raise ValueError(
|
745
|
+
"The feature names should match with those that were passed during fit.\n"
|
746
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
747
|
+
f"Features in the input dataframe : {input_cols}\n"
|
748
|
+
)
|
749
|
+
input_df = dataset[columns_to_select]
|
750
|
+
input_df.columns = features_required_by_estimator
|
740
751
|
|
741
752
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
742
753
|
input_df
|
snowflake/ml/modeling/svm/svr.py
CHANGED
@@ -678,26 +678,37 @@ class SVR(BaseTransformer):
|
|
678
678
|
# input cols need to match unquoted / quoted
|
679
679
|
input_cols = self.input_cols
|
680
680
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
681
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
681
682
|
|
682
683
|
estimator = self._sklearn_object
|
683
684
|
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
685
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
686
|
+
missing_features = []
|
687
|
+
features_in_dataset = set(dataset.columns)
|
688
|
+
columns_to_select = []
|
689
|
+
for i, f in enumerate(features_required_by_estimator):
|
690
|
+
if (
|
691
|
+
i >= len(input_cols)
|
692
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
693
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
694
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
695
|
+
):
|
696
|
+
missing_features.append(f)
|
697
|
+
elif input_cols[i] in features_in_dataset:
|
698
|
+
columns_to_select.append(input_cols[i])
|
699
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
700
|
+
columns_to_select.append(unquoted_input_cols[i])
|
701
|
+
else:
|
702
|
+
columns_to_select.append(quoted_input_cols[i])
|
703
|
+
|
704
|
+
if len(missing_features) > 0:
|
705
|
+
raise ValueError(
|
706
|
+
"The feature names should match with those that were passed during fit.\n"
|
707
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
708
|
+
f"Features in the input dataframe : {input_cols}\n"
|
709
|
+
)
|
710
|
+
input_df = dataset[columns_to_select]
|
711
|
+
input_df.columns = features_required_by_estimator
|
701
712
|
|
702
713
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
703
714
|
input_df
|
@@ -746,26 +746,37 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
746
746
|
# input cols need to match unquoted / quoted
|
747
747
|
input_cols = self.input_cols
|
748
748
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
749
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
749
750
|
|
750
751
|
estimator = self._sklearn_object
|
751
752
|
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
753
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
754
|
+
missing_features = []
|
755
|
+
features_in_dataset = set(dataset.columns)
|
756
|
+
columns_to_select = []
|
757
|
+
for i, f in enumerate(features_required_by_estimator):
|
758
|
+
if (
|
759
|
+
i >= len(input_cols)
|
760
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
761
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
762
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
763
|
+
):
|
764
|
+
missing_features.append(f)
|
765
|
+
elif input_cols[i] in features_in_dataset:
|
766
|
+
columns_to_select.append(input_cols[i])
|
767
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
768
|
+
columns_to_select.append(unquoted_input_cols[i])
|
769
|
+
else:
|
770
|
+
columns_to_select.append(quoted_input_cols[i])
|
771
|
+
|
772
|
+
if len(missing_features) > 0:
|
773
|
+
raise ValueError(
|
774
|
+
"The feature names should match with those that were passed during fit.\n"
|
775
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
776
|
+
f"Features in the input dataframe : {input_cols}\n"
|
777
|
+
)
|
778
|
+
input_df = dataset[columns_to_select]
|
779
|
+
input_df.columns = features_required_by_estimator
|
769
780
|
|
770
781
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
771
782
|
input_df
|
@@ -728,26 +728,37 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
728
728
|
# input cols need to match unquoted / quoted
|
729
729
|
input_cols = self.input_cols
|
730
730
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
731
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
731
732
|
|
732
733
|
estimator = self._sklearn_object
|
733
734
|
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
735
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
736
|
+
missing_features = []
|
737
|
+
features_in_dataset = set(dataset.columns)
|
738
|
+
columns_to_select = []
|
739
|
+
for i, f in enumerate(features_required_by_estimator):
|
740
|
+
if (
|
741
|
+
i >= len(input_cols)
|
742
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
743
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
744
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
745
|
+
):
|
746
|
+
missing_features.append(f)
|
747
|
+
elif input_cols[i] in features_in_dataset:
|
748
|
+
columns_to_select.append(input_cols[i])
|
749
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
750
|
+
columns_to_select.append(unquoted_input_cols[i])
|
751
|
+
else:
|
752
|
+
columns_to_select.append(quoted_input_cols[i])
|
753
|
+
|
754
|
+
if len(missing_features) > 0:
|
755
|
+
raise ValueError(
|
756
|
+
"The feature names should match with those that were passed during fit.\n"
|
757
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
758
|
+
f"Features in the input dataframe : {input_cols}\n"
|
759
|
+
)
|
760
|
+
input_df = dataset[columns_to_select]
|
761
|
+
input_df.columns = features_required_by_estimator
|
751
762
|
|
752
763
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
753
764
|
input_df
|
@@ -738,26 +738,37 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
738
738
|
# input cols need to match unquoted / quoted
|
739
739
|
input_cols = self.input_cols
|
740
740
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
741
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
741
742
|
|
742
743
|
estimator = self._sklearn_object
|
743
744
|
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
745
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
746
|
+
missing_features = []
|
747
|
+
features_in_dataset = set(dataset.columns)
|
748
|
+
columns_to_select = []
|
749
|
+
for i, f in enumerate(features_required_by_estimator):
|
750
|
+
if (
|
751
|
+
i >= len(input_cols)
|
752
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
753
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
754
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
755
|
+
):
|
756
|
+
missing_features.append(f)
|
757
|
+
elif input_cols[i] in features_in_dataset:
|
758
|
+
columns_to_select.append(input_cols[i])
|
759
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
760
|
+
columns_to_select.append(unquoted_input_cols[i])
|
761
|
+
else:
|
762
|
+
columns_to_select.append(quoted_input_cols[i])
|
763
|
+
|
764
|
+
if len(missing_features) > 0:
|
765
|
+
raise ValueError(
|
766
|
+
"The feature names should match with those that were passed during fit.\n"
|
767
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
768
|
+
f"Features in the input dataframe : {input_cols}\n"
|
769
|
+
)
|
770
|
+
input_df = dataset[columns_to_select]
|
771
|
+
input_df.columns = features_required_by_estimator
|
761
772
|
|
762
773
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
763
774
|
input_df
|
@@ -720,26 +720,37 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
720
720
|
# input cols need to match unquoted / quoted
|
721
721
|
input_cols = self.input_cols
|
722
722
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
723
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
723
724
|
|
724
725
|
estimator = self._sklearn_object
|
725
726
|
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
727
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
728
|
+
missing_features = []
|
729
|
+
features_in_dataset = set(dataset.columns)
|
730
|
+
columns_to_select = []
|
731
|
+
for i, f in enumerate(features_required_by_estimator):
|
732
|
+
if (
|
733
|
+
i >= len(input_cols)
|
734
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
735
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
736
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
737
|
+
):
|
738
|
+
missing_features.append(f)
|
739
|
+
elif input_cols[i] in features_in_dataset:
|
740
|
+
columns_to_select.append(input_cols[i])
|
741
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
742
|
+
columns_to_select.append(unquoted_input_cols[i])
|
743
|
+
else:
|
744
|
+
columns_to_select.append(quoted_input_cols[i])
|
745
|
+
|
746
|
+
if len(missing_features) > 0:
|
747
|
+
raise ValueError(
|
748
|
+
"The feature names should match with those that were passed during fit.\n"
|
749
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
750
|
+
f"Features in the input dataframe : {input_cols}\n"
|
751
|
+
)
|
752
|
+
input_df = dataset[columns_to_select]
|
753
|
+
input_df.columns = features_required_by_estimator
|
743
754
|
|
744
755
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
745
756
|
input_df
|
@@ -820,26 +820,37 @@ class XGBClassifier(BaseTransformer):
|
|
820
820
|
# input cols need to match unquoted / quoted
|
821
821
|
input_cols = self.input_cols
|
822
822
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
823
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
823
824
|
|
824
825
|
estimator = self._sklearn_object
|
825
826
|
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
827
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
828
|
+
missing_features = []
|
829
|
+
features_in_dataset = set(dataset.columns)
|
830
|
+
columns_to_select = []
|
831
|
+
for i, f in enumerate(features_required_by_estimator):
|
832
|
+
if (
|
833
|
+
i >= len(input_cols)
|
834
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
835
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
836
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
837
|
+
):
|
838
|
+
missing_features.append(f)
|
839
|
+
elif input_cols[i] in features_in_dataset:
|
840
|
+
columns_to_select.append(input_cols[i])
|
841
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
842
|
+
columns_to_select.append(unquoted_input_cols[i])
|
843
|
+
else:
|
844
|
+
columns_to_select.append(quoted_input_cols[i])
|
845
|
+
|
846
|
+
if len(missing_features) > 0:
|
847
|
+
raise ValueError(
|
848
|
+
"The feature names should match with those that were passed during fit.\n"
|
849
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
850
|
+
f"Features in the input dataframe : {input_cols}\n"
|
851
|
+
)
|
852
|
+
input_df = dataset[columns_to_select]
|
853
|
+
input_df.columns = features_required_by_estimator
|
843
854
|
|
844
855
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
845
856
|
input_df
|
@@ -819,26 +819,37 @@ class XGBRegressor(BaseTransformer):
|
|
819
819
|
# input cols need to match unquoted / quoted
|
820
820
|
input_cols = self.input_cols
|
821
821
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
822
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
822
823
|
|
823
824
|
estimator = self._sklearn_object
|
824
825
|
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
826
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
827
|
+
missing_features = []
|
828
|
+
features_in_dataset = set(dataset.columns)
|
829
|
+
columns_to_select = []
|
830
|
+
for i, f in enumerate(features_required_by_estimator):
|
831
|
+
if (
|
832
|
+
i >= len(input_cols)
|
833
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
834
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
835
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
836
|
+
):
|
837
|
+
missing_features.append(f)
|
838
|
+
elif input_cols[i] in features_in_dataset:
|
839
|
+
columns_to_select.append(input_cols[i])
|
840
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
841
|
+
columns_to_select.append(unquoted_input_cols[i])
|
842
|
+
else:
|
843
|
+
columns_to_select.append(quoted_input_cols[i])
|
844
|
+
|
845
|
+
if len(missing_features) > 0:
|
846
|
+
raise ValueError(
|
847
|
+
"The feature names should match with those that were passed during fit.\n"
|
848
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
849
|
+
f"Features in the input dataframe : {input_cols}\n"
|
850
|
+
)
|
851
|
+
input_df = dataset[columns_to_select]
|
852
|
+
input_df.columns = features_required_by_estimator
|
842
853
|
|
843
854
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
844
855
|
input_df
|
@@ -824,26 +824,37 @@ class XGBRFClassifier(BaseTransformer):
|
|
824
824
|
# input cols need to match unquoted / quoted
|
825
825
|
input_cols = self.input_cols
|
826
826
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
827
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
827
828
|
|
828
829
|
estimator = self._sklearn_object
|
829
830
|
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
831
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
832
|
+
missing_features = []
|
833
|
+
features_in_dataset = set(dataset.columns)
|
834
|
+
columns_to_select = []
|
835
|
+
for i, f in enumerate(features_required_by_estimator):
|
836
|
+
if (
|
837
|
+
i >= len(input_cols)
|
838
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
839
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
840
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
841
|
+
):
|
842
|
+
missing_features.append(f)
|
843
|
+
elif input_cols[i] in features_in_dataset:
|
844
|
+
columns_to_select.append(input_cols[i])
|
845
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
846
|
+
columns_to_select.append(unquoted_input_cols[i])
|
847
|
+
else:
|
848
|
+
columns_to_select.append(quoted_input_cols[i])
|
849
|
+
|
850
|
+
if len(missing_features) > 0:
|
851
|
+
raise ValueError(
|
852
|
+
"The feature names should match with those that were passed during fit.\n"
|
853
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
854
|
+
f"Features in the input dataframe : {input_cols}\n"
|
855
|
+
)
|
856
|
+
input_df = dataset[columns_to_select]
|
857
|
+
input_df.columns = features_required_by_estimator
|
847
858
|
|
848
859
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
849
860
|
input_df
|
@@ -824,26 +824,37 @@ class XGBRFRegressor(BaseTransformer):
|
|
824
824
|
# input cols need to match unquoted / quoted
|
825
825
|
input_cols = self.input_cols
|
826
826
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
827
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
827
828
|
|
828
829
|
estimator = self._sklearn_object
|
829
830
|
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
831
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
832
|
+
missing_features = []
|
833
|
+
features_in_dataset = set(dataset.columns)
|
834
|
+
columns_to_select = []
|
835
|
+
for i, f in enumerate(features_required_by_estimator):
|
836
|
+
if (
|
837
|
+
i >= len(input_cols)
|
838
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
839
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
840
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
841
|
+
):
|
842
|
+
missing_features.append(f)
|
843
|
+
elif input_cols[i] in features_in_dataset:
|
844
|
+
columns_to_select.append(input_cols[i])
|
845
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
846
|
+
columns_to_select.append(unquoted_input_cols[i])
|
847
|
+
else:
|
848
|
+
columns_to_select.append(quoted_input_cols[i])
|
849
|
+
|
850
|
+
if len(missing_features) > 0:
|
851
|
+
raise ValueError(
|
852
|
+
"The feature names should match with those that were passed during fit.\n"
|
853
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
854
|
+
f"Features in the input dataframe : {input_cols}\n"
|
855
|
+
)
|
856
|
+
input_df = dataset[columns_to_select]
|
857
|
+
input_df.columns = features_required_by_estimator
|
847
858
|
|
848
859
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
849
860
|
input_df
|