snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +8 -35
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/sklearn.py +1 -0
- snowflake/ml/model/_handlers/xgboost.py +1 -1
- snowflake/ml/model/_model.py +24 -19
- snowflake/ml/model/_model_meta.py +24 -15
- snowflake/ml/model/type_hints.py +5 -11
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
- snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
- snowflake/ml/modeling/cluster/birch.py +28 -17
- snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
- snowflake/ml/modeling/cluster/dbscan.py +28 -17
- snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
- snowflake/ml/modeling/cluster/k_means.py +28 -17
- snowflake/ml/modeling/cluster/mean_shift.py +28 -17
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
- snowflake/ml/modeling/cluster/optics.py +28 -17
- snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
- snowflake/ml/modeling/compose/column_transformer.py +28 -17
- snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
- snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
- snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
- snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
- snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
- snowflake/ml/modeling/covariance/oas.py +28 -17
- snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
- snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
- snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
- snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
- snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/pca.py +28 -17
- snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
- snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
- snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
- snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
- snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
- snowflake/ml/modeling/impute/knn_imputer.py +28 -17
- snowflake/ml/modeling/impute/missing_indicator.py +28 -17
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/lars.py +28 -17
- snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
- snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/perceptron.py +28 -17
- snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ridge.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
- snowflake/ml/modeling/manifold/isomap.py +28 -17
- snowflake/ml/modeling/manifold/mds.py +28 -17
- snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
- snowflake/ml/modeling/manifold/tsne.py +28 -17
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
- snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
- snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
- snowflake/ml/modeling/svm/linear_svc.py +28 -17
- snowflake/ml/modeling/svm/linear_svr.py +28 -17
- snowflake/ml/modeling/svm/nu_svc.py +28 -17
- snowflake/ml/modeling/svm/nu_svr.py +28 -17
- snowflake/ml/modeling/svm/svc.py +28 -17
- snowflake/ml/modeling/svm/svr.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
- snowflake/ml/registry/model_registry.py +49 -65
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
- snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -647,26 +647,37 @@ class HuberRegressor(BaseTransformer):
|
|
647
647
|
# input cols need to match unquoted / quoted
|
648
648
|
input_cols = self.input_cols
|
649
649
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
650
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
650
651
|
|
651
652
|
estimator = self._sklearn_object
|
652
653
|
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
654
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
655
|
+
missing_features = []
|
656
|
+
features_in_dataset = set(dataset.columns)
|
657
|
+
columns_to_select = []
|
658
|
+
for i, f in enumerate(features_required_by_estimator):
|
659
|
+
if (
|
660
|
+
i >= len(input_cols)
|
661
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
662
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
663
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
664
|
+
):
|
665
|
+
missing_features.append(f)
|
666
|
+
elif input_cols[i] in features_in_dataset:
|
667
|
+
columns_to_select.append(input_cols[i])
|
668
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
669
|
+
columns_to_select.append(unquoted_input_cols[i])
|
670
|
+
else:
|
671
|
+
columns_to_select.append(quoted_input_cols[i])
|
672
|
+
|
673
|
+
if len(missing_features) > 0:
|
674
|
+
raise ValueError(
|
675
|
+
"The feature names should match with those that were passed during fit.\n"
|
676
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
677
|
+
f"Features in the input dataframe : {input_cols}\n"
|
678
|
+
)
|
679
|
+
input_df = dataset[columns_to_select]
|
680
|
+
input_df.columns = features_required_by_estimator
|
670
681
|
|
671
682
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
672
683
|
input_df
|
@@ -676,26 +676,37 @@ class Lars(BaseTransformer):
|
|
676
676
|
# input cols need to match unquoted / quoted
|
677
677
|
input_cols = self.input_cols
|
678
678
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
679
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
679
680
|
|
680
681
|
estimator = self._sklearn_object
|
681
682
|
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
683
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
684
|
+
missing_features = []
|
685
|
+
features_in_dataset = set(dataset.columns)
|
686
|
+
columns_to_select = []
|
687
|
+
for i, f in enumerate(features_required_by_estimator):
|
688
|
+
if (
|
689
|
+
i >= len(input_cols)
|
690
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
691
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
692
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
693
|
+
):
|
694
|
+
missing_features.append(f)
|
695
|
+
elif input_cols[i] in features_in_dataset:
|
696
|
+
columns_to_select.append(input_cols[i])
|
697
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
698
|
+
columns_to_select.append(unquoted_input_cols[i])
|
699
|
+
else:
|
700
|
+
columns_to_select.append(quoted_input_cols[i])
|
701
|
+
|
702
|
+
if len(missing_features) > 0:
|
703
|
+
raise ValueError(
|
704
|
+
"The feature names should match with those that were passed during fit.\n"
|
705
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
706
|
+
f"Features in the input dataframe : {input_cols}\n"
|
707
|
+
)
|
708
|
+
input_df = dataset[columns_to_select]
|
709
|
+
input_df.columns = features_required_by_estimator
|
699
710
|
|
700
711
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
701
712
|
input_df
|
@@ -684,26 +684,37 @@ class LarsCV(BaseTransformer):
|
|
684
684
|
# input cols need to match unquoted / quoted
|
685
685
|
input_cols = self.input_cols
|
686
686
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
687
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
687
688
|
|
688
689
|
estimator = self._sklearn_object
|
689
690
|
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
691
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
692
|
+
missing_features = []
|
693
|
+
features_in_dataset = set(dataset.columns)
|
694
|
+
columns_to_select = []
|
695
|
+
for i, f in enumerate(features_required_by_estimator):
|
696
|
+
if (
|
697
|
+
i >= len(input_cols)
|
698
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
699
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
700
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
701
|
+
):
|
702
|
+
missing_features.append(f)
|
703
|
+
elif input_cols[i] in features_in_dataset:
|
704
|
+
columns_to_select.append(input_cols[i])
|
705
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
706
|
+
columns_to_select.append(unquoted_input_cols[i])
|
707
|
+
else:
|
708
|
+
columns_to_select.append(quoted_input_cols[i])
|
709
|
+
|
710
|
+
if len(missing_features) > 0:
|
711
|
+
raise ValueError(
|
712
|
+
"The feature names should match with those that were passed during fit.\n"
|
713
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
714
|
+
f"Features in the input dataframe : {input_cols}\n"
|
715
|
+
)
|
716
|
+
input_df = dataset[columns_to_select]
|
717
|
+
input_df.columns = features_required_by_estimator
|
707
718
|
|
708
719
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
709
720
|
input_df
|
@@ -677,26 +677,37 @@ class Lasso(BaseTransformer):
|
|
677
677
|
# input cols need to match unquoted / quoted
|
678
678
|
input_cols = self.input_cols
|
679
679
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
680
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
680
681
|
|
681
682
|
estimator = self._sklearn_object
|
682
683
|
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
684
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
685
|
+
missing_features = []
|
686
|
+
features_in_dataset = set(dataset.columns)
|
687
|
+
columns_to_select = []
|
688
|
+
for i, f in enumerate(features_required_by_estimator):
|
689
|
+
if (
|
690
|
+
i >= len(input_cols)
|
691
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
692
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
693
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
694
|
+
):
|
695
|
+
missing_features.append(f)
|
696
|
+
elif input_cols[i] in features_in_dataset:
|
697
|
+
columns_to_select.append(input_cols[i])
|
698
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
699
|
+
columns_to_select.append(unquoted_input_cols[i])
|
700
|
+
else:
|
701
|
+
columns_to_select.append(quoted_input_cols[i])
|
702
|
+
|
703
|
+
if len(missing_features) > 0:
|
704
|
+
raise ValueError(
|
705
|
+
"The feature names should match with those that were passed during fit.\n"
|
706
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
707
|
+
f"Features in the input dataframe : {input_cols}\n"
|
708
|
+
)
|
709
|
+
input_df = dataset[columns_to_select]
|
710
|
+
input_df.columns = features_required_by_estimator
|
700
711
|
|
701
712
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
702
713
|
input_df
|
@@ -705,26 +705,37 @@ class LassoCV(BaseTransformer):
|
|
705
705
|
# input cols need to match unquoted / quoted
|
706
706
|
input_cols = self.input_cols
|
707
707
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
708
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
708
709
|
|
709
710
|
estimator = self._sklearn_object
|
710
711
|
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
712
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
713
|
+
missing_features = []
|
714
|
+
features_in_dataset = set(dataset.columns)
|
715
|
+
columns_to_select = []
|
716
|
+
for i, f in enumerate(features_required_by_estimator):
|
717
|
+
if (
|
718
|
+
i >= len(input_cols)
|
719
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
720
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
721
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
722
|
+
):
|
723
|
+
missing_features.append(f)
|
724
|
+
elif input_cols[i] in features_in_dataset:
|
725
|
+
columns_to_select.append(input_cols[i])
|
726
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
727
|
+
columns_to_select.append(unquoted_input_cols[i])
|
728
|
+
else:
|
729
|
+
columns_to_select.append(quoted_input_cols[i])
|
730
|
+
|
731
|
+
if len(missing_features) > 0:
|
732
|
+
raise ValueError(
|
733
|
+
"The feature names should match with those that were passed during fit.\n"
|
734
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
735
|
+
f"Features in the input dataframe : {input_cols}\n"
|
736
|
+
)
|
737
|
+
input_df = dataset[columns_to_select]
|
738
|
+
input_df.columns = features_required_by_estimator
|
728
739
|
|
729
740
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
730
741
|
input_df
|
@@ -697,26 +697,37 @@ class LassoLars(BaseTransformer):
|
|
697
697
|
# input cols need to match unquoted / quoted
|
698
698
|
input_cols = self.input_cols
|
699
699
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
700
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
700
701
|
|
701
702
|
estimator = self._sklearn_object
|
702
703
|
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
704
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
705
|
+
missing_features = []
|
706
|
+
features_in_dataset = set(dataset.columns)
|
707
|
+
columns_to_select = []
|
708
|
+
for i, f in enumerate(features_required_by_estimator):
|
709
|
+
if (
|
710
|
+
i >= len(input_cols)
|
711
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
712
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
713
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
714
|
+
):
|
715
|
+
missing_features.append(f)
|
716
|
+
elif input_cols[i] in features_in_dataset:
|
717
|
+
columns_to_select.append(input_cols[i])
|
718
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
719
|
+
columns_to_select.append(unquoted_input_cols[i])
|
720
|
+
else:
|
721
|
+
columns_to_select.append(quoted_input_cols[i])
|
722
|
+
|
723
|
+
if len(missing_features) > 0:
|
724
|
+
raise ValueError(
|
725
|
+
"The feature names should match with those that were passed during fit.\n"
|
726
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
727
|
+
f"Features in the input dataframe : {input_cols}\n"
|
728
|
+
)
|
729
|
+
input_df = dataset[columns_to_select]
|
730
|
+
input_df.columns = features_required_by_estimator
|
720
731
|
|
721
732
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
722
733
|
input_df
|
@@ -698,26 +698,37 @@ class LassoLarsCV(BaseTransformer):
|
|
698
698
|
# input cols need to match unquoted / quoted
|
699
699
|
input_cols = self.input_cols
|
700
700
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
701
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
701
702
|
|
702
703
|
estimator = self._sklearn_object
|
703
704
|
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
705
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
706
|
+
missing_features = []
|
707
|
+
features_in_dataset = set(dataset.columns)
|
708
|
+
columns_to_select = []
|
709
|
+
for i, f in enumerate(features_required_by_estimator):
|
710
|
+
if (
|
711
|
+
i >= len(input_cols)
|
712
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
713
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
714
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
715
|
+
):
|
716
|
+
missing_features.append(f)
|
717
|
+
elif input_cols[i] in features_in_dataset:
|
718
|
+
columns_to_select.append(input_cols[i])
|
719
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
720
|
+
columns_to_select.append(unquoted_input_cols[i])
|
721
|
+
else:
|
722
|
+
columns_to_select.append(quoted_input_cols[i])
|
723
|
+
|
724
|
+
if len(missing_features) > 0:
|
725
|
+
raise ValueError(
|
726
|
+
"The feature names should match with those that were passed during fit.\n"
|
727
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
728
|
+
f"Features in the input dataframe : {input_cols}\n"
|
729
|
+
)
|
730
|
+
input_df = dataset[columns_to_select]
|
731
|
+
input_df.columns = features_required_by_estimator
|
721
732
|
|
722
733
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
723
734
|
input_df
|
@@ -681,26 +681,37 @@ class LassoLarsIC(BaseTransformer):
|
|
681
681
|
# input cols need to match unquoted / quoted
|
682
682
|
input_cols = self.input_cols
|
683
683
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
684
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
684
685
|
|
685
686
|
estimator = self._sklearn_object
|
686
687
|
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
688
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
689
|
+
missing_features = []
|
690
|
+
features_in_dataset = set(dataset.columns)
|
691
|
+
columns_to_select = []
|
692
|
+
for i, f in enumerate(features_required_by_estimator):
|
693
|
+
if (
|
694
|
+
i >= len(input_cols)
|
695
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
696
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
697
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
698
|
+
):
|
699
|
+
missing_features.append(f)
|
700
|
+
elif input_cols[i] in features_in_dataset:
|
701
|
+
columns_to_select.append(input_cols[i])
|
702
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
703
|
+
columns_to_select.append(unquoted_input_cols[i])
|
704
|
+
else:
|
705
|
+
columns_to_select.append(quoted_input_cols[i])
|
706
|
+
|
707
|
+
if len(missing_features) > 0:
|
708
|
+
raise ValueError(
|
709
|
+
"The feature names should match with those that were passed during fit.\n"
|
710
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
711
|
+
f"Features in the input dataframe : {input_cols}\n"
|
712
|
+
)
|
713
|
+
input_df = dataset[columns_to_select]
|
714
|
+
input_df.columns = features_required_by_estimator
|
704
715
|
|
705
716
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
706
717
|
input_df
|
@@ -634,26 +634,37 @@ class LinearRegression(BaseTransformer):
|
|
634
634
|
# input cols need to match unquoted / quoted
|
635
635
|
input_cols = self.input_cols
|
636
636
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
637
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
637
638
|
|
638
639
|
estimator = self._sklearn_object
|
639
640
|
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
641
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
642
|
+
missing_features = []
|
643
|
+
features_in_dataset = set(dataset.columns)
|
644
|
+
columns_to_select = []
|
645
|
+
for i, f in enumerate(features_required_by_estimator):
|
646
|
+
if (
|
647
|
+
i >= len(input_cols)
|
648
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
649
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
650
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
651
|
+
):
|
652
|
+
missing_features.append(f)
|
653
|
+
elif input_cols[i] in features_in_dataset:
|
654
|
+
columns_to_select.append(input_cols[i])
|
655
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
656
|
+
columns_to_select.append(unquoted_input_cols[i])
|
657
|
+
else:
|
658
|
+
columns_to_select.append(quoted_input_cols[i])
|
659
|
+
|
660
|
+
if len(missing_features) > 0:
|
661
|
+
raise ValueError(
|
662
|
+
"The feature names should match with those that were passed during fit.\n"
|
663
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
664
|
+
f"Features in the input dataframe : {input_cols}\n"
|
665
|
+
)
|
666
|
+
input_df = dataset[columns_to_select]
|
667
|
+
input_df.columns = features_required_by_estimator
|
657
668
|
|
658
669
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
659
670
|
input_df
|
@@ -748,26 +748,37 @@ class LogisticRegression(BaseTransformer):
|
|
748
748
|
# input cols need to match unquoted / quoted
|
749
749
|
input_cols = self.input_cols
|
750
750
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
751
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
751
752
|
|
752
753
|
estimator = self._sklearn_object
|
753
754
|
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
755
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
756
|
+
missing_features = []
|
757
|
+
features_in_dataset = set(dataset.columns)
|
758
|
+
columns_to_select = []
|
759
|
+
for i, f in enumerate(features_required_by_estimator):
|
760
|
+
if (
|
761
|
+
i >= len(input_cols)
|
762
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
763
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
764
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
765
|
+
):
|
766
|
+
missing_features.append(f)
|
767
|
+
elif input_cols[i] in features_in_dataset:
|
768
|
+
columns_to_select.append(input_cols[i])
|
769
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
770
|
+
columns_to_select.append(unquoted_input_cols[i])
|
771
|
+
else:
|
772
|
+
columns_to_select.append(quoted_input_cols[i])
|
773
|
+
|
774
|
+
if len(missing_features) > 0:
|
775
|
+
raise ValueError(
|
776
|
+
"The feature names should match with those that were passed during fit.\n"
|
777
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
778
|
+
f"Features in the input dataframe : {input_cols}\n"
|
779
|
+
)
|
780
|
+
input_df = dataset[columns_to_select]
|
781
|
+
input_df.columns = features_required_by_estimator
|
771
782
|
|
772
783
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
773
784
|
input_df
|
@@ -769,26 +769,37 @@ class LogisticRegressionCV(BaseTransformer):
|
|
769
769
|
# input cols need to match unquoted / quoted
|
770
770
|
input_cols = self.input_cols
|
771
771
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
772
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
772
773
|
|
773
774
|
estimator = self._sklearn_object
|
774
775
|
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
776
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
777
|
+
missing_features = []
|
778
|
+
features_in_dataset = set(dataset.columns)
|
779
|
+
columns_to_select = []
|
780
|
+
for i, f in enumerate(features_required_by_estimator):
|
781
|
+
if (
|
782
|
+
i >= len(input_cols)
|
783
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
784
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
785
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
786
|
+
):
|
787
|
+
missing_features.append(f)
|
788
|
+
elif input_cols[i] in features_in_dataset:
|
789
|
+
columns_to_select.append(input_cols[i])
|
790
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
791
|
+
columns_to_select.append(unquoted_input_cols[i])
|
792
|
+
else:
|
793
|
+
columns_to_select.append(quoted_input_cols[i])
|
794
|
+
|
795
|
+
if len(missing_features) > 0:
|
796
|
+
raise ValueError(
|
797
|
+
"The feature names should match with those that were passed during fit.\n"
|
798
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
799
|
+
f"Features in the input dataframe : {input_cols}\n"
|
800
|
+
)
|
801
|
+
input_df = dataset[columns_to_select]
|
802
|
+
input_df.columns = features_required_by_estimator
|
792
803
|
|
793
804
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
794
805
|
input_df
|