snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +8 -35
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/sklearn.py +1 -0
- snowflake/ml/model/_handlers/xgboost.py +1 -1
- snowflake/ml/model/_model.py +24 -19
- snowflake/ml/model/_model_meta.py +24 -15
- snowflake/ml/model/type_hints.py +5 -11
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
- snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
- snowflake/ml/modeling/cluster/birch.py +28 -17
- snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
- snowflake/ml/modeling/cluster/dbscan.py +28 -17
- snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
- snowflake/ml/modeling/cluster/k_means.py +28 -17
- snowflake/ml/modeling/cluster/mean_shift.py +28 -17
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
- snowflake/ml/modeling/cluster/optics.py +28 -17
- snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
- snowflake/ml/modeling/compose/column_transformer.py +28 -17
- snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
- snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
- snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
- snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
- snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
- snowflake/ml/modeling/covariance/oas.py +28 -17
- snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
- snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
- snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
- snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
- snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/pca.py +28 -17
- snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
- snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
- snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
- snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
- snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
- snowflake/ml/modeling/impute/knn_imputer.py +28 -17
- snowflake/ml/modeling/impute/missing_indicator.py +28 -17
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/lars.py +28 -17
- snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
- snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/perceptron.py +28 -17
- snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ridge.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
- snowflake/ml/modeling/manifold/isomap.py +28 -17
- snowflake/ml/modeling/manifold/mds.py +28 -17
- snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
- snowflake/ml/modeling/manifold/tsne.py +28 -17
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
- snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
- snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
- snowflake/ml/modeling/svm/linear_svc.py +28 -17
- snowflake/ml/modeling/svm/linear_svr.py +28 -17
- snowflake/ml/modeling/svm/nu_svc.py +28 -17
- snowflake/ml/modeling/svm/nu_svr.py +28 -17
- snowflake/ml/modeling/svm/svc.py +28 -17
- snowflake/ml/modeling/svm/svr.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
- snowflake/ml/registry/model_registry.py +49 -65
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
- snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -659,26 +659,37 @@ class AffinityPropagation(BaseTransformer):
|
|
659
659
|
# input cols need to match unquoted / quoted
|
660
660
|
input_cols = self.input_cols
|
661
661
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
662
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
662
663
|
|
663
664
|
estimator = self._sklearn_object
|
664
665
|
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
666
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
667
|
+
missing_features = []
|
668
|
+
features_in_dataset = set(dataset.columns)
|
669
|
+
columns_to_select = []
|
670
|
+
for i, f in enumerate(features_required_by_estimator):
|
671
|
+
if (
|
672
|
+
i >= len(input_cols)
|
673
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
674
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
675
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
676
|
+
):
|
677
|
+
missing_features.append(f)
|
678
|
+
elif input_cols[i] in features_in_dataset:
|
679
|
+
columns_to_select.append(input_cols[i])
|
680
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
681
|
+
columns_to_select.append(unquoted_input_cols[i])
|
682
|
+
else:
|
683
|
+
columns_to_select.append(quoted_input_cols[i])
|
684
|
+
|
685
|
+
if len(missing_features) > 0:
|
686
|
+
raise ValueError(
|
687
|
+
"The feature names should match with those that were passed during fit.\n"
|
688
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
689
|
+
f"Features in the input dataframe : {input_cols}\n"
|
690
|
+
)
|
691
|
+
input_df = dataset[columns_to_select]
|
692
|
+
input_df.columns = features_required_by_estimator
|
682
693
|
|
683
694
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
684
695
|
input_df
|
@@ -692,26 +692,37 @@ class AgglomerativeClustering(BaseTransformer):
|
|
692
692
|
# input cols need to match unquoted / quoted
|
693
693
|
input_cols = self.input_cols
|
694
694
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
695
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
695
696
|
|
696
697
|
estimator = self._sklearn_object
|
697
698
|
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
699
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
700
|
+
missing_features = []
|
701
|
+
features_in_dataset = set(dataset.columns)
|
702
|
+
columns_to_select = []
|
703
|
+
for i, f in enumerate(features_required_by_estimator):
|
704
|
+
if (
|
705
|
+
i >= len(input_cols)
|
706
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
707
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
708
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
709
|
+
):
|
710
|
+
missing_features.append(f)
|
711
|
+
elif input_cols[i] in features_in_dataset:
|
712
|
+
columns_to_select.append(input_cols[i])
|
713
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
714
|
+
columns_to_select.append(unquoted_input_cols[i])
|
715
|
+
else:
|
716
|
+
columns_to_select.append(quoted_input_cols[i])
|
717
|
+
|
718
|
+
if len(missing_features) > 0:
|
719
|
+
raise ValueError(
|
720
|
+
"The feature names should match with those that were passed during fit.\n"
|
721
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
722
|
+
f"Features in the input dataframe : {input_cols}\n"
|
723
|
+
)
|
724
|
+
input_df = dataset[columns_to_select]
|
725
|
+
input_df.columns = features_required_by_estimator
|
715
726
|
|
716
727
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
717
728
|
input_df
|
@@ -650,26 +650,37 @@ class Birch(BaseTransformer):
|
|
650
650
|
# input cols need to match unquoted / quoted
|
651
651
|
input_cols = self.input_cols
|
652
652
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
653
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
653
654
|
|
654
655
|
estimator = self._sklearn_object
|
655
656
|
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
657
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
658
|
+
missing_features = []
|
659
|
+
features_in_dataset = set(dataset.columns)
|
660
|
+
columns_to_select = []
|
661
|
+
for i, f in enumerate(features_required_by_estimator):
|
662
|
+
if (
|
663
|
+
i >= len(input_cols)
|
664
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
665
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
666
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
667
|
+
):
|
668
|
+
missing_features.append(f)
|
669
|
+
elif input_cols[i] in features_in_dataset:
|
670
|
+
columns_to_select.append(input_cols[i])
|
671
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
672
|
+
columns_to_select.append(unquoted_input_cols[i])
|
673
|
+
else:
|
674
|
+
columns_to_select.append(quoted_input_cols[i])
|
675
|
+
|
676
|
+
if len(missing_features) > 0:
|
677
|
+
raise ValueError(
|
678
|
+
"The feature names should match with those that were passed during fit.\n"
|
679
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
680
|
+
f"Features in the input dataframe : {input_cols}\n"
|
681
|
+
)
|
682
|
+
input_df = dataset[columns_to_select]
|
683
|
+
input_df.columns = features_required_by_estimator
|
673
684
|
|
674
685
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
675
686
|
input_df
|
@@ -699,26 +699,37 @@ class BisectingKMeans(BaseTransformer):
|
|
699
699
|
# input cols need to match unquoted / quoted
|
700
700
|
input_cols = self.input_cols
|
701
701
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
702
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
702
703
|
|
703
704
|
estimator = self._sklearn_object
|
704
705
|
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
706
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
707
|
+
missing_features = []
|
708
|
+
features_in_dataset = set(dataset.columns)
|
709
|
+
columns_to_select = []
|
710
|
+
for i, f in enumerate(features_required_by_estimator):
|
711
|
+
if (
|
712
|
+
i >= len(input_cols)
|
713
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
714
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
715
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
716
|
+
):
|
717
|
+
missing_features.append(f)
|
718
|
+
elif input_cols[i] in features_in_dataset:
|
719
|
+
columns_to_select.append(input_cols[i])
|
720
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
721
|
+
columns_to_select.append(unquoted_input_cols[i])
|
722
|
+
else:
|
723
|
+
columns_to_select.append(quoted_input_cols[i])
|
724
|
+
|
725
|
+
if len(missing_features) > 0:
|
726
|
+
raise ValueError(
|
727
|
+
"The feature names should match with those that were passed during fit.\n"
|
728
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
729
|
+
f"Features in the input dataframe : {input_cols}\n"
|
730
|
+
)
|
731
|
+
input_df = dataset[columns_to_select]
|
732
|
+
input_df.columns = features_required_by_estimator
|
722
733
|
|
723
734
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
724
735
|
input_df
|
@@ -667,26 +667,37 @@ class DBSCAN(BaseTransformer):
|
|
667
667
|
# input cols need to match unquoted / quoted
|
668
668
|
input_cols = self.input_cols
|
669
669
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
670
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
670
671
|
|
671
672
|
estimator = self._sklearn_object
|
672
673
|
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
674
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
675
|
+
missing_features = []
|
676
|
+
features_in_dataset = set(dataset.columns)
|
677
|
+
columns_to_select = []
|
678
|
+
for i, f in enumerate(features_required_by_estimator):
|
679
|
+
if (
|
680
|
+
i >= len(input_cols)
|
681
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
682
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
683
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
684
|
+
):
|
685
|
+
missing_features.append(f)
|
686
|
+
elif input_cols[i] in features_in_dataset:
|
687
|
+
columns_to_select.append(input_cols[i])
|
688
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
689
|
+
columns_to_select.append(unquoted_input_cols[i])
|
690
|
+
else:
|
691
|
+
columns_to_select.append(quoted_input_cols[i])
|
692
|
+
|
693
|
+
if len(missing_features) > 0:
|
694
|
+
raise ValueError(
|
695
|
+
"The feature names should match with those that were passed during fit.\n"
|
696
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
697
|
+
f"Features in the input dataframe : {input_cols}\n"
|
698
|
+
)
|
699
|
+
input_df = dataset[columns_to_select]
|
700
|
+
input_df.columns = features_required_by_estimator
|
690
701
|
|
691
702
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
692
703
|
input_df
|
@@ -699,26 +699,37 @@ class FeatureAgglomeration(BaseTransformer):
|
|
699
699
|
# input cols need to match unquoted / quoted
|
700
700
|
input_cols = self.input_cols
|
701
701
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
702
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
702
703
|
|
703
704
|
estimator = self._sklearn_object
|
704
705
|
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
706
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
707
|
+
missing_features = []
|
708
|
+
features_in_dataset = set(dataset.columns)
|
709
|
+
columns_to_select = []
|
710
|
+
for i, f in enumerate(features_required_by_estimator):
|
711
|
+
if (
|
712
|
+
i >= len(input_cols)
|
713
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
714
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
715
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
716
|
+
):
|
717
|
+
missing_features.append(f)
|
718
|
+
elif input_cols[i] in features_in_dataset:
|
719
|
+
columns_to_select.append(input_cols[i])
|
720
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
721
|
+
columns_to_select.append(unquoted_input_cols[i])
|
722
|
+
else:
|
723
|
+
columns_to_select.append(quoted_input_cols[i])
|
724
|
+
|
725
|
+
if len(missing_features) > 0:
|
726
|
+
raise ValueError(
|
727
|
+
"The feature names should match with those that were passed during fit.\n"
|
728
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
729
|
+
f"Features in the input dataframe : {input_cols}\n"
|
730
|
+
)
|
731
|
+
input_df = dataset[columns_to_select]
|
732
|
+
input_df.columns = features_required_by_estimator
|
722
733
|
|
723
734
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
724
735
|
input_df
|
@@ -694,26 +694,37 @@ class KMeans(BaseTransformer):
|
|
694
694
|
# input cols need to match unquoted / quoted
|
695
695
|
input_cols = self.input_cols
|
696
696
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
697
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
697
698
|
|
698
699
|
estimator = self._sklearn_object
|
699
700
|
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
701
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
702
|
+
missing_features = []
|
703
|
+
features_in_dataset = set(dataset.columns)
|
704
|
+
columns_to_select = []
|
705
|
+
for i, f in enumerate(features_required_by_estimator):
|
706
|
+
if (
|
707
|
+
i >= len(input_cols)
|
708
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
709
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
710
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
711
|
+
):
|
712
|
+
missing_features.append(f)
|
713
|
+
elif input_cols[i] in features_in_dataset:
|
714
|
+
columns_to_select.append(input_cols[i])
|
715
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
716
|
+
columns_to_select.append(unquoted_input_cols[i])
|
717
|
+
else:
|
718
|
+
columns_to_select.append(quoted_input_cols[i])
|
719
|
+
|
720
|
+
if len(missing_features) > 0:
|
721
|
+
raise ValueError(
|
722
|
+
"The feature names should match with those that were passed during fit.\n"
|
723
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
724
|
+
f"Features in the input dataframe : {input_cols}\n"
|
725
|
+
)
|
726
|
+
input_df = dataset[columns_to_select]
|
727
|
+
input_df.columns = features_required_by_estimator
|
717
728
|
|
718
729
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
719
730
|
input_df
|
@@ -670,26 +670,37 @@ class MeanShift(BaseTransformer):
|
|
670
670
|
# input cols need to match unquoted / quoted
|
671
671
|
input_cols = self.input_cols
|
672
672
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
673
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
673
674
|
|
674
675
|
estimator = self._sklearn_object
|
675
676
|
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
677
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
678
|
+
missing_features = []
|
679
|
+
features_in_dataset = set(dataset.columns)
|
680
|
+
columns_to_select = []
|
681
|
+
for i, f in enumerate(features_required_by_estimator):
|
682
|
+
if (
|
683
|
+
i >= len(input_cols)
|
684
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
685
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
686
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
687
|
+
):
|
688
|
+
missing_features.append(f)
|
689
|
+
elif input_cols[i] in features_in_dataset:
|
690
|
+
columns_to_select.append(input_cols[i])
|
691
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
692
|
+
columns_to_select.append(unquoted_input_cols[i])
|
693
|
+
else:
|
694
|
+
columns_to_select.append(quoted_input_cols[i])
|
695
|
+
|
696
|
+
if len(missing_features) > 0:
|
697
|
+
raise ValueError(
|
698
|
+
"The feature names should match with those that were passed during fit.\n"
|
699
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
700
|
+
f"Features in the input dataframe : {input_cols}\n"
|
701
|
+
)
|
702
|
+
input_df = dataset[columns_to_select]
|
703
|
+
input_df.columns = features_required_by_estimator
|
693
704
|
|
694
705
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
695
706
|
input_df
|
@@ -720,26 +720,37 @@ class MiniBatchKMeans(BaseTransformer):
|
|
720
720
|
# input cols need to match unquoted / quoted
|
721
721
|
input_cols = self.input_cols
|
722
722
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
723
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
723
724
|
|
724
725
|
estimator = self._sklearn_object
|
725
726
|
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
727
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
728
|
+
missing_features = []
|
729
|
+
features_in_dataset = set(dataset.columns)
|
730
|
+
columns_to_select = []
|
731
|
+
for i, f in enumerate(features_required_by_estimator):
|
732
|
+
if (
|
733
|
+
i >= len(input_cols)
|
734
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
735
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
736
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
737
|
+
):
|
738
|
+
missing_features.append(f)
|
739
|
+
elif input_cols[i] in features_in_dataset:
|
740
|
+
columns_to_select.append(input_cols[i])
|
741
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
742
|
+
columns_to_select.append(unquoted_input_cols[i])
|
743
|
+
else:
|
744
|
+
columns_to_select.append(quoted_input_cols[i])
|
745
|
+
|
746
|
+
if len(missing_features) > 0:
|
747
|
+
raise ValueError(
|
748
|
+
"The feature names should match with those that were passed during fit.\n"
|
749
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
750
|
+
f"Features in the input dataframe : {input_cols}\n"
|
751
|
+
)
|
752
|
+
input_df = dataset[columns_to_select]
|
753
|
+
input_df.columns = features_required_by_estimator
|
743
754
|
|
744
755
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
745
756
|
input_df
|
@@ -740,26 +740,37 @@ class OPTICS(BaseTransformer):
|
|
740
740
|
# input cols need to match unquoted / quoted
|
741
741
|
input_cols = self.input_cols
|
742
742
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
743
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
743
744
|
|
744
745
|
estimator = self._sklearn_object
|
745
746
|
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
747
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
748
|
+
missing_features = []
|
749
|
+
features_in_dataset = set(dataset.columns)
|
750
|
+
columns_to_select = []
|
751
|
+
for i, f in enumerate(features_required_by_estimator):
|
752
|
+
if (
|
753
|
+
i >= len(input_cols)
|
754
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
755
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
756
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
757
|
+
):
|
758
|
+
missing_features.append(f)
|
759
|
+
elif input_cols[i] in features_in_dataset:
|
760
|
+
columns_to_select.append(input_cols[i])
|
761
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
762
|
+
columns_to_select.append(unquoted_input_cols[i])
|
763
|
+
else:
|
764
|
+
columns_to_select.append(quoted_input_cols[i])
|
765
|
+
|
766
|
+
if len(missing_features) > 0:
|
767
|
+
raise ValueError(
|
768
|
+
"The feature names should match with those that were passed during fit.\n"
|
769
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
770
|
+
f"Features in the input dataframe : {input_cols}\n"
|
771
|
+
)
|
772
|
+
input_df = dataset[columns_to_select]
|
773
|
+
input_df.columns = features_required_by_estimator
|
763
774
|
|
764
775
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
765
776
|
input_df
|
@@ -678,26 +678,37 @@ class SpectralBiclustering(BaseTransformer):
|
|
678
678
|
# input cols need to match unquoted / quoted
|
679
679
|
input_cols = self.input_cols
|
680
680
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
681
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
681
682
|
|
682
683
|
estimator = self._sklearn_object
|
683
684
|
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
685
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
686
|
+
missing_features = []
|
687
|
+
features_in_dataset = set(dataset.columns)
|
688
|
+
columns_to_select = []
|
689
|
+
for i, f in enumerate(features_required_by_estimator):
|
690
|
+
if (
|
691
|
+
i >= len(input_cols)
|
692
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
693
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
694
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
695
|
+
):
|
696
|
+
missing_features.append(f)
|
697
|
+
elif input_cols[i] in features_in_dataset:
|
698
|
+
columns_to_select.append(input_cols[i])
|
699
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
700
|
+
columns_to_select.append(unquoted_input_cols[i])
|
701
|
+
else:
|
702
|
+
columns_to_select.append(quoted_input_cols[i])
|
703
|
+
|
704
|
+
if len(missing_features) > 0:
|
705
|
+
raise ValueError(
|
706
|
+
"The feature names should match with those that were passed during fit.\n"
|
707
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
708
|
+
f"Features in the input dataframe : {input_cols}\n"
|
709
|
+
)
|
710
|
+
input_df = dataset[columns_to_select]
|
711
|
+
input_df.columns = features_required_by_estimator
|
701
712
|
|
702
713
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
703
714
|
input_df
|