snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +8 -35
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/sklearn.py +1 -0
- snowflake/ml/model/_handlers/xgboost.py +1 -1
- snowflake/ml/model/_model.py +24 -19
- snowflake/ml/model/_model_meta.py +24 -15
- snowflake/ml/model/type_hints.py +5 -11
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
- snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
- snowflake/ml/modeling/cluster/birch.py +28 -17
- snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
- snowflake/ml/modeling/cluster/dbscan.py +28 -17
- snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
- snowflake/ml/modeling/cluster/k_means.py +28 -17
- snowflake/ml/modeling/cluster/mean_shift.py +28 -17
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
- snowflake/ml/modeling/cluster/optics.py +28 -17
- snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
- snowflake/ml/modeling/compose/column_transformer.py +28 -17
- snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
- snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
- snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
- snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
- snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
- snowflake/ml/modeling/covariance/oas.py +28 -17
- snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
- snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
- snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
- snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
- snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/pca.py +28 -17
- snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
- snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
- snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
- snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
- snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
- snowflake/ml/modeling/impute/knn_imputer.py +28 -17
- snowflake/ml/modeling/impute/missing_indicator.py +28 -17
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/lars.py +28 -17
- snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
- snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/perceptron.py +28 -17
- snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ridge.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
- snowflake/ml/modeling/manifold/isomap.py +28 -17
- snowflake/ml/modeling/manifold/mds.py +28 -17
- snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
- snowflake/ml/modeling/manifold/tsne.py +28 -17
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
- snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
- snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
- snowflake/ml/modeling/svm/linear_svc.py +28 -17
- snowflake/ml/modeling/svm/linear_svr.py +28 -17
- snowflake/ml/modeling/svm/nu_svc.py +28 -17
- snowflake/ml/modeling/svm/nu_svr.py +28 -17
- snowflake/ml/modeling/svm/svc.py +28 -17
- snowflake/ml/modeling/svm/svr.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
- snowflake/ml/registry/model_registry.py +49 -65
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
- snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -625,26 +625,37 @@ class ShrunkCovariance(BaseTransformer):
|
|
625
625
|
# input cols need to match unquoted / quoted
|
626
626
|
input_cols = self.input_cols
|
627
627
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
628
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
628
629
|
|
629
630
|
estimator = self._sklearn_object
|
630
631
|
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
632
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
633
|
+
missing_features = []
|
634
|
+
features_in_dataset = set(dataset.columns)
|
635
|
+
columns_to_select = []
|
636
|
+
for i, f in enumerate(features_required_by_estimator):
|
637
|
+
if (
|
638
|
+
i >= len(input_cols)
|
639
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
640
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
641
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
642
|
+
):
|
643
|
+
missing_features.append(f)
|
644
|
+
elif input_cols[i] in features_in_dataset:
|
645
|
+
columns_to_select.append(input_cols[i])
|
646
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
647
|
+
columns_to_select.append(unquoted_input_cols[i])
|
648
|
+
else:
|
649
|
+
columns_to_select.append(quoted_input_cols[i])
|
650
|
+
|
651
|
+
if len(missing_features) > 0:
|
652
|
+
raise ValueError(
|
653
|
+
"The feature names should match with those that were passed during fit.\n"
|
654
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
655
|
+
f"Features in the input dataframe : {input_cols}\n"
|
656
|
+
)
|
657
|
+
input_df = dataset[columns_to_select]
|
658
|
+
input_df.columns = features_required_by_estimator
|
648
659
|
|
649
660
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
650
661
|
input_df
|
@@ -726,26 +726,37 @@ class DictionaryLearning(BaseTransformer):
|
|
726
726
|
# input cols need to match unquoted / quoted
|
727
727
|
input_cols = self.input_cols
|
728
728
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
729
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
729
730
|
|
730
731
|
estimator = self._sklearn_object
|
731
732
|
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
733
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
734
|
+
missing_features = []
|
735
|
+
features_in_dataset = set(dataset.columns)
|
736
|
+
columns_to_select = []
|
737
|
+
for i, f in enumerate(features_required_by_estimator):
|
738
|
+
if (
|
739
|
+
i >= len(input_cols)
|
740
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
741
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
742
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
743
|
+
):
|
744
|
+
missing_features.append(f)
|
745
|
+
elif input_cols[i] in features_in_dataset:
|
746
|
+
columns_to_select.append(input_cols[i])
|
747
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
748
|
+
columns_to_select.append(unquoted_input_cols[i])
|
749
|
+
else:
|
750
|
+
columns_to_select.append(quoted_input_cols[i])
|
751
|
+
|
752
|
+
if len(missing_features) > 0:
|
753
|
+
raise ValueError(
|
754
|
+
"The feature names should match with those that were passed during fit.\n"
|
755
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
756
|
+
f"Features in the input dataframe : {input_cols}\n"
|
757
|
+
)
|
758
|
+
input_df = dataset[columns_to_select]
|
759
|
+
input_df.columns = features_required_by_estimator
|
749
760
|
|
750
761
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
751
762
|
input_df
|
@@ -668,26 +668,37 @@ class FactorAnalysis(BaseTransformer):
|
|
668
668
|
# input cols need to match unquoted / quoted
|
669
669
|
input_cols = self.input_cols
|
670
670
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
671
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
671
672
|
|
672
673
|
estimator = self._sklearn_object
|
673
674
|
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
675
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
676
|
+
missing_features = []
|
677
|
+
features_in_dataset = set(dataset.columns)
|
678
|
+
columns_to_select = []
|
679
|
+
for i, f in enumerate(features_required_by_estimator):
|
680
|
+
if (
|
681
|
+
i >= len(input_cols)
|
682
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
683
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
684
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
685
|
+
):
|
686
|
+
missing_features.append(f)
|
687
|
+
elif input_cols[i] in features_in_dataset:
|
688
|
+
columns_to_select.append(input_cols[i])
|
689
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
690
|
+
columns_to_select.append(unquoted_input_cols[i])
|
691
|
+
else:
|
692
|
+
columns_to_select.append(quoted_input_cols[i])
|
693
|
+
|
694
|
+
if len(missing_features) > 0:
|
695
|
+
raise ValueError(
|
696
|
+
"The feature names should match with those that were passed during fit.\n"
|
697
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
698
|
+
f"Features in the input dataframe : {input_cols}\n"
|
699
|
+
)
|
700
|
+
input_df = dataset[columns_to_select]
|
701
|
+
input_df.columns = features_required_by_estimator
|
691
702
|
|
692
703
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
693
704
|
input_df
|
@@ -686,26 +686,37 @@ class FastICA(BaseTransformer):
|
|
686
686
|
# input cols need to match unquoted / quoted
|
687
687
|
input_cols = self.input_cols
|
688
688
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
689
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
689
690
|
|
690
691
|
estimator = self._sklearn_object
|
691
692
|
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
693
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
694
|
+
missing_features = []
|
695
|
+
features_in_dataset = set(dataset.columns)
|
696
|
+
columns_to_select = []
|
697
|
+
for i, f in enumerate(features_required_by_estimator):
|
698
|
+
if (
|
699
|
+
i >= len(input_cols)
|
700
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
701
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
702
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
703
|
+
):
|
704
|
+
missing_features.append(f)
|
705
|
+
elif input_cols[i] in features_in_dataset:
|
706
|
+
columns_to_select.append(input_cols[i])
|
707
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
708
|
+
columns_to_select.append(unquoted_input_cols[i])
|
709
|
+
else:
|
710
|
+
columns_to_select.append(quoted_input_cols[i])
|
711
|
+
|
712
|
+
if len(missing_features) > 0:
|
713
|
+
raise ValueError(
|
714
|
+
"The feature names should match with those that were passed during fit.\n"
|
715
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
716
|
+
f"Features in the input dataframe : {input_cols}\n"
|
717
|
+
)
|
718
|
+
input_df = dataset[columns_to_select]
|
719
|
+
input_df.columns = features_required_by_estimator
|
709
720
|
|
710
721
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
711
722
|
input_df
|
@@ -638,26 +638,37 @@ class IncrementalPCA(BaseTransformer):
|
|
638
638
|
# input cols need to match unquoted / quoted
|
639
639
|
input_cols = self.input_cols
|
640
640
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
641
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
641
642
|
|
642
643
|
estimator = self._sklearn_object
|
643
644
|
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
645
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
646
|
+
missing_features = []
|
647
|
+
features_in_dataset = set(dataset.columns)
|
648
|
+
columns_to_select = []
|
649
|
+
for i, f in enumerate(features_required_by_estimator):
|
650
|
+
if (
|
651
|
+
i >= len(input_cols)
|
652
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
653
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
654
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
655
|
+
):
|
656
|
+
missing_features.append(f)
|
657
|
+
elif input_cols[i] in features_in_dataset:
|
658
|
+
columns_to_select.append(input_cols[i])
|
659
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
660
|
+
columns_to_select.append(unquoted_input_cols[i])
|
661
|
+
else:
|
662
|
+
columns_to_select.append(quoted_input_cols[i])
|
663
|
+
|
664
|
+
if len(missing_features) > 0:
|
665
|
+
raise ValueError(
|
666
|
+
"The feature names should match with those that were passed during fit.\n"
|
667
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
668
|
+
f"Features in the input dataframe : {input_cols}\n"
|
669
|
+
)
|
670
|
+
input_df = dataset[columns_to_select]
|
671
|
+
input_df.columns = features_required_by_estimator
|
661
672
|
|
662
673
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
663
674
|
input_df
|
@@ -734,26 +734,37 @@ class KernelPCA(BaseTransformer):
|
|
734
734
|
# input cols need to match unquoted / quoted
|
735
735
|
input_cols = self.input_cols
|
736
736
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
737
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
737
738
|
|
738
739
|
estimator = self._sklearn_object
|
739
740
|
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
741
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
742
|
+
missing_features = []
|
743
|
+
features_in_dataset = set(dataset.columns)
|
744
|
+
columns_to_select = []
|
745
|
+
for i, f in enumerate(features_required_by_estimator):
|
746
|
+
if (
|
747
|
+
i >= len(input_cols)
|
748
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
749
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
750
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
751
|
+
):
|
752
|
+
missing_features.append(f)
|
753
|
+
elif input_cols[i] in features_in_dataset:
|
754
|
+
columns_to_select.append(input_cols[i])
|
755
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
756
|
+
columns_to_select.append(unquoted_input_cols[i])
|
757
|
+
else:
|
758
|
+
columns_to_select.append(quoted_input_cols[i])
|
759
|
+
|
760
|
+
if len(missing_features) > 0:
|
761
|
+
raise ValueError(
|
762
|
+
"The feature names should match with those that were passed during fit.\n"
|
763
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
764
|
+
f"Features in the input dataframe : {input_cols}\n"
|
765
|
+
)
|
766
|
+
input_df = dataset[columns_to_select]
|
767
|
+
input_df.columns = features_required_by_estimator
|
757
768
|
|
758
769
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
759
770
|
input_df
|
@@ -756,26 +756,37 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
756
756
|
# input cols need to match unquoted / quoted
|
757
757
|
input_cols = self.input_cols
|
758
758
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
759
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
759
760
|
|
760
761
|
estimator = self._sklearn_object
|
761
762
|
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
763
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
764
|
+
missing_features = []
|
765
|
+
features_in_dataset = set(dataset.columns)
|
766
|
+
columns_to_select = []
|
767
|
+
for i, f in enumerate(features_required_by_estimator):
|
768
|
+
if (
|
769
|
+
i >= len(input_cols)
|
770
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
771
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
772
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
773
|
+
):
|
774
|
+
missing_features.append(f)
|
775
|
+
elif input_cols[i] in features_in_dataset:
|
776
|
+
columns_to_select.append(input_cols[i])
|
777
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
778
|
+
columns_to_select.append(unquoted_input_cols[i])
|
779
|
+
else:
|
780
|
+
columns_to_select.append(quoted_input_cols[i])
|
781
|
+
|
782
|
+
if len(missing_features) > 0:
|
783
|
+
raise ValueError(
|
784
|
+
"The feature names should match with those that were passed during fit.\n"
|
785
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
786
|
+
f"Features in the input dataframe : {input_cols}\n"
|
787
|
+
)
|
788
|
+
input_df = dataset[columns_to_select]
|
789
|
+
input_df.columns = features_required_by_estimator
|
779
790
|
|
780
791
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
781
792
|
input_df
|
@@ -701,26 +701,37 @@ class MiniBatchSparsePCA(BaseTransformer):
|
|
701
701
|
# input cols need to match unquoted / quoted
|
702
702
|
input_cols = self.input_cols
|
703
703
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
704
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
704
705
|
|
705
706
|
estimator = self._sklearn_object
|
706
707
|
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
708
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
709
|
+
missing_features = []
|
710
|
+
features_in_dataset = set(dataset.columns)
|
711
|
+
columns_to_select = []
|
712
|
+
for i, f in enumerate(features_required_by_estimator):
|
713
|
+
if (
|
714
|
+
i >= len(input_cols)
|
715
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
716
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
717
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
718
|
+
):
|
719
|
+
missing_features.append(f)
|
720
|
+
elif input_cols[i] in features_in_dataset:
|
721
|
+
columns_to_select.append(input_cols[i])
|
722
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
723
|
+
columns_to_select.append(unquoted_input_cols[i])
|
724
|
+
else:
|
725
|
+
columns_to_select.append(quoted_input_cols[i])
|
726
|
+
|
727
|
+
if len(missing_features) > 0:
|
728
|
+
raise ValueError(
|
729
|
+
"The feature names should match with those that were passed during fit.\n"
|
730
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
731
|
+
f"Features in the input dataframe : {input_cols}\n"
|
732
|
+
)
|
733
|
+
input_df = dataset[columns_to_select]
|
734
|
+
input_df.columns = features_required_by_estimator
|
724
735
|
|
725
736
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
726
737
|
input_df
|
@@ -703,26 +703,37 @@ class PCA(BaseTransformer):
|
|
703
703
|
# input cols need to match unquoted / quoted
|
704
704
|
input_cols = self.input_cols
|
705
705
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
706
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
706
707
|
|
707
708
|
estimator = self._sklearn_object
|
708
709
|
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
710
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
711
|
+
missing_features = []
|
712
|
+
features_in_dataset = set(dataset.columns)
|
713
|
+
columns_to_select = []
|
714
|
+
for i, f in enumerate(features_required_by_estimator):
|
715
|
+
if (
|
716
|
+
i >= len(input_cols)
|
717
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
718
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
719
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
720
|
+
):
|
721
|
+
missing_features.append(f)
|
722
|
+
elif input_cols[i] in features_in_dataset:
|
723
|
+
columns_to_select.append(input_cols[i])
|
724
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
725
|
+
columns_to_select.append(unquoted_input_cols[i])
|
726
|
+
else:
|
727
|
+
columns_to_select.append(quoted_input_cols[i])
|
728
|
+
|
729
|
+
if len(missing_features) > 0:
|
730
|
+
raise ValueError(
|
731
|
+
"The feature names should match with those that were passed during fit.\n"
|
732
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
733
|
+
f"Features in the input dataframe : {input_cols}\n"
|
734
|
+
)
|
735
|
+
input_df = dataset[columns_to_select]
|
736
|
+
input_df.columns = features_required_by_estimator
|
726
737
|
|
727
738
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
728
739
|
input_df
|
@@ -676,26 +676,37 @@ class SparsePCA(BaseTransformer):
|
|
676
676
|
# input cols need to match unquoted / quoted
|
677
677
|
input_cols = self.input_cols
|
678
678
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
679
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
679
680
|
|
680
681
|
estimator = self._sklearn_object
|
681
682
|
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
683
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
684
|
+
missing_features = []
|
685
|
+
features_in_dataset = set(dataset.columns)
|
686
|
+
columns_to_select = []
|
687
|
+
for i, f in enumerate(features_required_by_estimator):
|
688
|
+
if (
|
689
|
+
i >= len(input_cols)
|
690
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
691
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
692
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
693
|
+
):
|
694
|
+
missing_features.append(f)
|
695
|
+
elif input_cols[i] in features_in_dataset:
|
696
|
+
columns_to_select.append(input_cols[i])
|
697
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
698
|
+
columns_to_select.append(unquoted_input_cols[i])
|
699
|
+
else:
|
700
|
+
columns_to_select.append(quoted_input_cols[i])
|
701
|
+
|
702
|
+
if len(missing_features) > 0:
|
703
|
+
raise ValueError(
|
704
|
+
"The feature names should match with those that were passed during fit.\n"
|
705
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
706
|
+
f"Features in the input dataframe : {input_cols}\n"
|
707
|
+
)
|
708
|
+
input_df = dataset[columns_to_select]
|
709
|
+
input_df.columns = features_required_by_estimator
|
699
710
|
|
700
711
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
701
712
|
input_df
|
@@ -657,26 +657,37 @@ class TruncatedSVD(BaseTransformer):
|
|
657
657
|
# input cols need to match unquoted / quoted
|
658
658
|
input_cols = self.input_cols
|
659
659
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
660
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
660
661
|
|
661
662
|
estimator = self._sklearn_object
|
662
663
|
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
664
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
665
|
+
missing_features = []
|
666
|
+
features_in_dataset = set(dataset.columns)
|
667
|
+
columns_to_select = []
|
668
|
+
for i, f in enumerate(features_required_by_estimator):
|
669
|
+
if (
|
670
|
+
i >= len(input_cols)
|
671
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
672
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
673
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
674
|
+
):
|
675
|
+
missing_features.append(f)
|
676
|
+
elif input_cols[i] in features_in_dataset:
|
677
|
+
columns_to_select.append(input_cols[i])
|
678
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
679
|
+
columns_to_select.append(unquoted_input_cols[i])
|
680
|
+
else:
|
681
|
+
columns_to_select.append(quoted_input_cols[i])
|
682
|
+
|
683
|
+
if len(missing_features) > 0:
|
684
|
+
raise ValueError(
|
685
|
+
"The feature names should match with those that were passed during fit.\n"
|
686
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
687
|
+
f"Features in the input dataframe : {input_cols}\n"
|
688
|
+
)
|
689
|
+
input_df = dataset[columns_to_select]
|
690
|
+
input_df.columns = features_required_by_estimator
|
680
691
|
|
681
692
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
682
693
|
input_df
|