snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +8 -35
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/sklearn.py +1 -0
- snowflake/ml/model/_handlers/xgboost.py +1 -1
- snowflake/ml/model/_model.py +24 -19
- snowflake/ml/model/_model_meta.py +24 -15
- snowflake/ml/model/type_hints.py +5 -11
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
- snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
- snowflake/ml/modeling/cluster/birch.py +28 -17
- snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
- snowflake/ml/modeling/cluster/dbscan.py +28 -17
- snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
- snowflake/ml/modeling/cluster/k_means.py +28 -17
- snowflake/ml/modeling/cluster/mean_shift.py +28 -17
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
- snowflake/ml/modeling/cluster/optics.py +28 -17
- snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
- snowflake/ml/modeling/compose/column_transformer.py +28 -17
- snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
- snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
- snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
- snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
- snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
- snowflake/ml/modeling/covariance/oas.py +28 -17
- snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
- snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
- snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
- snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
- snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/pca.py +28 -17
- snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
- snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
- snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
- snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
- snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
- snowflake/ml/modeling/impute/knn_imputer.py +28 -17
- snowflake/ml/modeling/impute/missing_indicator.py +28 -17
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/lars.py +28 -17
- snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
- snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/perceptron.py +28 -17
- snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ridge.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
- snowflake/ml/modeling/manifold/isomap.py +28 -17
- snowflake/ml/modeling/manifold/mds.py +28 -17
- snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
- snowflake/ml/modeling/manifold/tsne.py +28 -17
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
- snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
- snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
- snowflake/ml/modeling/svm/linear_svc.py +28 -17
- snowflake/ml/modeling/svm/linear_svr.py +28 -17
- snowflake/ml/modeling/svm/nu_svc.py +28 -17
- snowflake/ml/modeling/svm/nu_svr.py +28 -17
- snowflake/ml/modeling/svm/svc.py +28 -17
- snowflake/ml/modeling/svm/svr.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
- snowflake/ml/registry/model_registry.py +49 -65
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
- snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -736,26 +736,37 @@ class SpectralClustering(BaseTransformer):
|
|
736
736
|
# input cols need to match unquoted / quoted
|
737
737
|
input_cols = self.input_cols
|
738
738
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
739
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
739
740
|
|
740
741
|
estimator = self._sklearn_object
|
741
742
|
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
743
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
744
|
+
missing_features = []
|
745
|
+
features_in_dataset = set(dataset.columns)
|
746
|
+
columns_to_select = []
|
747
|
+
for i, f in enumerate(features_required_by_estimator):
|
748
|
+
if (
|
749
|
+
i >= len(input_cols)
|
750
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
751
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
752
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
753
|
+
):
|
754
|
+
missing_features.append(f)
|
755
|
+
elif input_cols[i] in features_in_dataset:
|
756
|
+
columns_to_select.append(input_cols[i])
|
757
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
758
|
+
columns_to_select.append(unquoted_input_cols[i])
|
759
|
+
else:
|
760
|
+
columns_to_select.append(quoted_input_cols[i])
|
761
|
+
|
762
|
+
if len(missing_features) > 0:
|
763
|
+
raise ValueError(
|
764
|
+
"The feature names should match with those that were passed during fit.\n"
|
765
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
766
|
+
f"Features in the input dataframe : {input_cols}\n"
|
767
|
+
)
|
768
|
+
input_df = dataset[columns_to_select]
|
769
|
+
input_df.columns = features_required_by_estimator
|
759
770
|
|
760
771
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
761
772
|
input_df
|
@@ -657,26 +657,37 @@ class SpectralCoclustering(BaseTransformer):
|
|
657
657
|
# input cols need to match unquoted / quoted
|
658
658
|
input_cols = self.input_cols
|
659
659
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
660
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
660
661
|
|
661
662
|
estimator = self._sklearn_object
|
662
663
|
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
664
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
665
|
+
missing_features = []
|
666
|
+
features_in_dataset = set(dataset.columns)
|
667
|
+
columns_to_select = []
|
668
|
+
for i, f in enumerate(features_required_by_estimator):
|
669
|
+
if (
|
670
|
+
i >= len(input_cols)
|
671
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
672
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
673
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
674
|
+
):
|
675
|
+
missing_features.append(f)
|
676
|
+
elif input_cols[i] in features_in_dataset:
|
677
|
+
columns_to_select.append(input_cols[i])
|
678
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
679
|
+
columns_to_select.append(unquoted_input_cols[i])
|
680
|
+
else:
|
681
|
+
columns_to_select.append(quoted_input_cols[i])
|
682
|
+
|
683
|
+
if len(missing_features) > 0:
|
684
|
+
raise ValueError(
|
685
|
+
"The feature names should match with those that were passed during fit.\n"
|
686
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
687
|
+
f"Features in the input dataframe : {input_cols}\n"
|
688
|
+
)
|
689
|
+
input_df = dataset[columns_to_select]
|
690
|
+
input_df.columns = features_required_by_estimator
|
680
691
|
|
681
692
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
682
693
|
input_df
|
@@ -687,26 +687,37 @@ class ColumnTransformer(BaseTransformer):
|
|
687
687
|
# input cols need to match unquoted / quoted
|
688
688
|
input_cols = self.input_cols
|
689
689
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
690
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
690
691
|
|
691
692
|
estimator = self._sklearn_object
|
692
693
|
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
694
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
695
|
+
missing_features = []
|
696
|
+
features_in_dataset = set(dataset.columns)
|
697
|
+
columns_to_select = []
|
698
|
+
for i, f in enumerate(features_required_by_estimator):
|
699
|
+
if (
|
700
|
+
i >= len(input_cols)
|
701
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
702
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
703
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
704
|
+
):
|
705
|
+
missing_features.append(f)
|
706
|
+
elif input_cols[i] in features_in_dataset:
|
707
|
+
columns_to_select.append(input_cols[i])
|
708
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
709
|
+
columns_to_select.append(unquoted_input_cols[i])
|
710
|
+
else:
|
711
|
+
columns_to_select.append(quoted_input_cols[i])
|
712
|
+
|
713
|
+
if len(missing_features) > 0:
|
714
|
+
raise ValueError(
|
715
|
+
"The feature names should match with those that were passed during fit.\n"
|
716
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
717
|
+
f"Features in the input dataframe : {input_cols}\n"
|
718
|
+
)
|
719
|
+
input_df = dataset[columns_to_select]
|
720
|
+
input_df.columns = features_required_by_estimator
|
710
721
|
|
711
722
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
712
723
|
input_df
|
@@ -646,26 +646,37 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
646
646
|
# input cols need to match unquoted / quoted
|
647
647
|
input_cols = self.input_cols
|
648
648
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
649
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
649
650
|
|
650
651
|
estimator = self._sklearn_object
|
651
652
|
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
653
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
654
|
+
missing_features = []
|
655
|
+
features_in_dataset = set(dataset.columns)
|
656
|
+
columns_to_select = []
|
657
|
+
for i, f in enumerate(features_required_by_estimator):
|
658
|
+
if (
|
659
|
+
i >= len(input_cols)
|
660
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
661
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
662
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
663
|
+
):
|
664
|
+
missing_features.append(f)
|
665
|
+
elif input_cols[i] in features_in_dataset:
|
666
|
+
columns_to_select.append(input_cols[i])
|
667
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
668
|
+
columns_to_select.append(unquoted_input_cols[i])
|
669
|
+
else:
|
670
|
+
columns_to_select.append(quoted_input_cols[i])
|
671
|
+
|
672
|
+
if len(missing_features) > 0:
|
673
|
+
raise ValueError(
|
674
|
+
"The feature names should match with those that were passed during fit.\n"
|
675
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
676
|
+
f"Features in the input dataframe : {input_cols}\n"
|
677
|
+
)
|
678
|
+
input_df = dataset[columns_to_select]
|
679
|
+
input_df.columns = features_required_by_estimator
|
669
680
|
|
670
681
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
671
682
|
input_df
|
@@ -643,26 +643,37 @@ class EllipticEnvelope(BaseTransformer):
|
|
643
643
|
# input cols need to match unquoted / quoted
|
644
644
|
input_cols = self.input_cols
|
645
645
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
646
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
646
647
|
|
647
648
|
estimator = self._sklearn_object
|
648
649
|
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
650
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
651
|
+
missing_features = []
|
652
|
+
features_in_dataset = set(dataset.columns)
|
653
|
+
columns_to_select = []
|
654
|
+
for i, f in enumerate(features_required_by_estimator):
|
655
|
+
if (
|
656
|
+
i >= len(input_cols)
|
657
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
658
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
659
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
660
|
+
):
|
661
|
+
missing_features.append(f)
|
662
|
+
elif input_cols[i] in features_in_dataset:
|
663
|
+
columns_to_select.append(input_cols[i])
|
664
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
665
|
+
columns_to_select.append(unquoted_input_cols[i])
|
666
|
+
else:
|
667
|
+
columns_to_select.append(quoted_input_cols[i])
|
668
|
+
|
669
|
+
if len(missing_features) > 0:
|
670
|
+
raise ValueError(
|
671
|
+
"The feature names should match with those that were passed during fit.\n"
|
672
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
673
|
+
f"Features in the input dataframe : {input_cols}\n"
|
674
|
+
)
|
675
|
+
input_df = dataset[columns_to_select]
|
676
|
+
input_df.columns = features_required_by_estimator
|
666
677
|
|
667
678
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
668
679
|
input_df
|
@@ -619,26 +619,37 @@ class EmpiricalCovariance(BaseTransformer):
|
|
619
619
|
# input cols need to match unquoted / quoted
|
620
620
|
input_cols = self.input_cols
|
621
621
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
622
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
622
623
|
|
623
624
|
estimator = self._sklearn_object
|
624
625
|
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
626
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
627
|
+
missing_features = []
|
628
|
+
features_in_dataset = set(dataset.columns)
|
629
|
+
columns_to_select = []
|
630
|
+
for i, f in enumerate(features_required_by_estimator):
|
631
|
+
if (
|
632
|
+
i >= len(input_cols)
|
633
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
634
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
635
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
636
|
+
):
|
637
|
+
missing_features.append(f)
|
638
|
+
elif input_cols[i] in features_in_dataset:
|
639
|
+
columns_to_select.append(input_cols[i])
|
640
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
641
|
+
columns_to_select.append(unquoted_input_cols[i])
|
642
|
+
else:
|
643
|
+
columns_to_select.append(quoted_input_cols[i])
|
644
|
+
|
645
|
+
if len(missing_features) > 0:
|
646
|
+
raise ValueError(
|
647
|
+
"The feature names should match with those that were passed during fit.\n"
|
648
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
649
|
+
f"Features in the input dataframe : {input_cols}\n"
|
650
|
+
)
|
651
|
+
input_df = dataset[columns_to_select]
|
652
|
+
input_df.columns = features_required_by_estimator
|
642
653
|
|
643
654
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
644
655
|
input_df
|
@@ -653,26 +653,37 @@ class GraphicalLasso(BaseTransformer):
|
|
653
653
|
# input cols need to match unquoted / quoted
|
654
654
|
input_cols = self.input_cols
|
655
655
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
656
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
656
657
|
|
657
658
|
estimator = self._sklearn_object
|
658
659
|
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
660
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
661
|
+
missing_features = []
|
662
|
+
features_in_dataset = set(dataset.columns)
|
663
|
+
columns_to_select = []
|
664
|
+
for i, f in enumerate(features_required_by_estimator):
|
665
|
+
if (
|
666
|
+
i >= len(input_cols)
|
667
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
668
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
669
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
670
|
+
):
|
671
|
+
missing_features.append(f)
|
672
|
+
elif input_cols[i] in features_in_dataset:
|
673
|
+
columns_to_select.append(input_cols[i])
|
674
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
675
|
+
columns_to_select.append(unquoted_input_cols[i])
|
676
|
+
else:
|
677
|
+
columns_to_select.append(quoted_input_cols[i])
|
678
|
+
|
679
|
+
if len(missing_features) > 0:
|
680
|
+
raise ValueError(
|
681
|
+
"The feature names should match with those that were passed during fit.\n"
|
682
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
683
|
+
f"Features in the input dataframe : {input_cols}\n"
|
684
|
+
)
|
685
|
+
input_df = dataset[columns_to_select]
|
686
|
+
input_df.columns = features_required_by_estimator
|
676
687
|
|
677
688
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
678
689
|
input_df
|
@@ -686,26 +686,37 @@ class GraphicalLassoCV(BaseTransformer):
|
|
686
686
|
# input cols need to match unquoted / quoted
|
687
687
|
input_cols = self.input_cols
|
688
688
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
689
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
689
690
|
|
690
691
|
estimator = self._sklearn_object
|
691
692
|
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
693
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
694
|
+
missing_features = []
|
695
|
+
features_in_dataset = set(dataset.columns)
|
696
|
+
columns_to_select = []
|
697
|
+
for i, f in enumerate(features_required_by_estimator):
|
698
|
+
if (
|
699
|
+
i >= len(input_cols)
|
700
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
701
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
702
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
703
|
+
):
|
704
|
+
missing_features.append(f)
|
705
|
+
elif input_cols[i] in features_in_dataset:
|
706
|
+
columns_to_select.append(input_cols[i])
|
707
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
708
|
+
columns_to_select.append(unquoted_input_cols[i])
|
709
|
+
else:
|
710
|
+
columns_to_select.append(quoted_input_cols[i])
|
711
|
+
|
712
|
+
if len(missing_features) > 0:
|
713
|
+
raise ValueError(
|
714
|
+
"The feature names should match with those that were passed during fit.\n"
|
715
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
716
|
+
f"Features in the input dataframe : {input_cols}\n"
|
717
|
+
)
|
718
|
+
input_df = dataset[columns_to_select]
|
719
|
+
input_df.columns = features_required_by_estimator
|
709
720
|
|
710
721
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
711
722
|
input_df
|
@@ -626,26 +626,37 @@ class LedoitWolf(BaseTransformer):
|
|
626
626
|
# input cols need to match unquoted / quoted
|
627
627
|
input_cols = self.input_cols
|
628
628
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
629
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
629
630
|
|
630
631
|
estimator = self._sklearn_object
|
631
632
|
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
633
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
634
|
+
missing_features = []
|
635
|
+
features_in_dataset = set(dataset.columns)
|
636
|
+
columns_to_select = []
|
637
|
+
for i, f in enumerate(features_required_by_estimator):
|
638
|
+
if (
|
639
|
+
i >= len(input_cols)
|
640
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
641
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
642
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
643
|
+
):
|
644
|
+
missing_features.append(f)
|
645
|
+
elif input_cols[i] in features_in_dataset:
|
646
|
+
columns_to_select.append(input_cols[i])
|
647
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
648
|
+
columns_to_select.append(unquoted_input_cols[i])
|
649
|
+
else:
|
650
|
+
columns_to_select.append(quoted_input_cols[i])
|
651
|
+
|
652
|
+
if len(missing_features) > 0:
|
653
|
+
raise ValueError(
|
654
|
+
"The feature names should match with those that were passed during fit.\n"
|
655
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
656
|
+
f"Features in the input dataframe : {input_cols}\n"
|
657
|
+
)
|
658
|
+
input_df = dataset[columns_to_select]
|
659
|
+
input_df.columns = features_required_by_estimator
|
649
660
|
|
650
661
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
651
662
|
input_df
|
@@ -638,26 +638,37 @@ class MinCovDet(BaseTransformer):
|
|
638
638
|
# input cols need to match unquoted / quoted
|
639
639
|
input_cols = self.input_cols
|
640
640
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
641
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
641
642
|
|
642
643
|
estimator = self._sklearn_object
|
643
644
|
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
645
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
646
|
+
missing_features = []
|
647
|
+
features_in_dataset = set(dataset.columns)
|
648
|
+
columns_to_select = []
|
649
|
+
for i, f in enumerate(features_required_by_estimator):
|
650
|
+
if (
|
651
|
+
i >= len(input_cols)
|
652
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
653
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
654
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
655
|
+
):
|
656
|
+
missing_features.append(f)
|
657
|
+
elif input_cols[i] in features_in_dataset:
|
658
|
+
columns_to_select.append(input_cols[i])
|
659
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
660
|
+
columns_to_select.append(unquoted_input_cols[i])
|
661
|
+
else:
|
662
|
+
columns_to_select.append(quoted_input_cols[i])
|
663
|
+
|
664
|
+
if len(missing_features) > 0:
|
665
|
+
raise ValueError(
|
666
|
+
"The feature names should match with those that were passed during fit.\n"
|
667
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
668
|
+
f"Features in the input dataframe : {input_cols}\n"
|
669
|
+
)
|
670
|
+
input_df = dataset[columns_to_select]
|
671
|
+
input_df.columns = features_required_by_estimator
|
661
672
|
|
662
673
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
663
674
|
input_df
|
@@ -619,26 +619,37 @@ class OAS(BaseTransformer):
|
|
619
619
|
# input cols need to match unquoted / quoted
|
620
620
|
input_cols = self.input_cols
|
621
621
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
622
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
622
623
|
|
623
624
|
estimator = self._sklearn_object
|
624
625
|
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
626
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
627
|
+
missing_features = []
|
628
|
+
features_in_dataset = set(dataset.columns)
|
629
|
+
columns_to_select = []
|
630
|
+
for i, f in enumerate(features_required_by_estimator):
|
631
|
+
if (
|
632
|
+
i >= len(input_cols)
|
633
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
634
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
635
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
636
|
+
):
|
637
|
+
missing_features.append(f)
|
638
|
+
elif input_cols[i] in features_in_dataset:
|
639
|
+
columns_to_select.append(input_cols[i])
|
640
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
641
|
+
columns_to_select.append(unquoted_input_cols[i])
|
642
|
+
else:
|
643
|
+
columns_to_select.append(quoted_input_cols[i])
|
644
|
+
|
645
|
+
if len(missing_features) > 0:
|
646
|
+
raise ValueError(
|
647
|
+
"The feature names should match with those that were passed during fit.\n"
|
648
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
649
|
+
f"Features in the input dataframe : {input_cols}\n"
|
650
|
+
)
|
651
|
+
input_df = dataset[columns_to_select]
|
652
|
+
input_df.columns = features_required_by_estimator
|
642
653
|
|
643
654
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
644
655
|
input_df
|