snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -621,26 +621,37 @@ class SelectKBest(BaseTransformer):
621
621
  # input cols need to match unquoted / quoted
622
622
  input_cols = self.input_cols
623
623
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
624
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
624
625
 
625
626
  estimator = self._sklearn_object
626
627
 
627
- input_df = dataset[input_cols] # Select input columns with quoted column names.
628
- if hasattr(estimator, "feature_names_in_"):
629
- missing_features = []
630
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
631
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
632
- missing_features.append(f)
633
-
634
- if len(missing_features) > 0:
635
- raise ValueError(
636
- "The feature names should match with those that were passed during fit.\n"
637
- f"Features seen during fit call but not present in the input: {missing_features}\n"
638
- f"Features in the input dataframe : {input_cols}\n"
639
- )
640
- input_df.columns = getattr(estimator, "feature_names_in_")
641
- else:
642
- # Just rename the column names to unquoted identifiers.
643
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
628
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
629
+ missing_features = []
630
+ features_in_dataset = set(dataset.columns)
631
+ columns_to_select = []
632
+ for i, f in enumerate(features_required_by_estimator):
633
+ if (
634
+ i >= len(input_cols)
635
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
636
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
637
+ and quoted_input_cols[i] not in features_in_dataset)
638
+ ):
639
+ missing_features.append(f)
640
+ elif input_cols[i] in features_in_dataset:
641
+ columns_to_select.append(input_cols[i])
642
+ elif unquoted_input_cols[i] in features_in_dataset:
643
+ columns_to_select.append(unquoted_input_cols[i])
644
+ else:
645
+ columns_to_select.append(quoted_input_cols[i])
646
+
647
+ if len(missing_features) > 0:
648
+ raise ValueError(
649
+ "The feature names should match with those that were passed during fit.\n"
650
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
651
+ f"Features in the input dataframe : {input_cols}\n"
652
+ )
653
+ input_df = dataset[columns_to_select]
654
+ input_df.columns = features_required_by_estimator
644
655
 
645
656
  transformed_numpy_array = getattr(estimator, inference_method)(
646
657
  input_df
@@ -620,26 +620,37 @@ class SelectPercentile(BaseTransformer):
620
620
  # input cols need to match unquoted / quoted
621
621
  input_cols = self.input_cols
622
622
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
623
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
623
624
 
624
625
  estimator = self._sklearn_object
625
626
 
626
- input_df = dataset[input_cols] # Select input columns with quoted column names.
627
- if hasattr(estimator, "feature_names_in_"):
628
- missing_features = []
629
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
630
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
631
- missing_features.append(f)
632
-
633
- if len(missing_features) > 0:
634
- raise ValueError(
635
- "The feature names should match with those that were passed during fit.\n"
636
- f"Features seen during fit call but not present in the input: {missing_features}\n"
637
- f"Features in the input dataframe : {input_cols}\n"
638
- )
639
- input_df.columns = getattr(estimator, "feature_names_in_")
640
- else:
641
- # Just rename the column names to unquoted identifiers.
642
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
627
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
628
+ missing_features = []
629
+ features_in_dataset = set(dataset.columns)
630
+ columns_to_select = []
631
+ for i, f in enumerate(features_required_by_estimator):
632
+ if (
633
+ i >= len(input_cols)
634
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
635
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
636
+ and quoted_input_cols[i] not in features_in_dataset)
637
+ ):
638
+ missing_features.append(f)
639
+ elif input_cols[i] in features_in_dataset:
640
+ columns_to_select.append(input_cols[i])
641
+ elif unquoted_input_cols[i] in features_in_dataset:
642
+ columns_to_select.append(unquoted_input_cols[i])
643
+ else:
644
+ columns_to_select.append(quoted_input_cols[i])
645
+
646
+ if len(missing_features) > 0:
647
+ raise ValueError(
648
+ "The feature names should match with those that were passed during fit.\n"
649
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
650
+ f"Features in the input dataframe : {input_cols}\n"
651
+ )
652
+ input_df = dataset[columns_to_select]
653
+ input_df.columns = features_required_by_estimator
643
654
 
644
655
  transformed_numpy_array = getattr(estimator, inference_method)(
645
656
  input_df
@@ -680,26 +680,37 @@ class SequentialFeatureSelector(BaseTransformer):
680
680
  # input cols need to match unquoted / quoted
681
681
  input_cols = self.input_cols
682
682
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
683
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
683
684
 
684
685
  estimator = self._sklearn_object
685
686
 
686
- input_df = dataset[input_cols] # Select input columns with quoted column names.
687
- if hasattr(estimator, "feature_names_in_"):
688
- missing_features = []
689
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
690
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
691
- missing_features.append(f)
692
-
693
- if len(missing_features) > 0:
694
- raise ValueError(
695
- "The feature names should match with those that were passed during fit.\n"
696
- f"Features seen during fit call but not present in the input: {missing_features}\n"
697
- f"Features in the input dataframe : {input_cols}\n"
698
- )
699
- input_df.columns = getattr(estimator, "feature_names_in_")
700
- else:
701
- # Just rename the column names to unquoted identifiers.
702
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
687
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
688
+ missing_features = []
689
+ features_in_dataset = set(dataset.columns)
690
+ columns_to_select = []
691
+ for i, f in enumerate(features_required_by_estimator):
692
+ if (
693
+ i >= len(input_cols)
694
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
695
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
696
+ and quoted_input_cols[i] not in features_in_dataset)
697
+ ):
698
+ missing_features.append(f)
699
+ elif input_cols[i] in features_in_dataset:
700
+ columns_to_select.append(input_cols[i])
701
+ elif unquoted_input_cols[i] in features_in_dataset:
702
+ columns_to_select.append(unquoted_input_cols[i])
703
+ else:
704
+ columns_to_select.append(quoted_input_cols[i])
705
+
706
+ if len(missing_features) > 0:
707
+ raise ValueError(
708
+ "The feature names should match with those that were passed during fit.\n"
709
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
710
+ f"Features in the input dataframe : {input_cols}\n"
711
+ )
712
+ input_df = dataset[columns_to_select]
713
+ input_df.columns = features_required_by_estimator
703
714
 
704
715
  transformed_numpy_array = getattr(estimator, inference_method)(
705
716
  input_df
@@ -613,26 +613,37 @@ class VarianceThreshold(BaseTransformer):
613
613
  # input cols need to match unquoted / quoted
614
614
  input_cols = self.input_cols
615
615
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
616
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
616
617
 
617
618
  estimator = self._sklearn_object
618
619
 
619
- input_df = dataset[input_cols] # Select input columns with quoted column names.
620
- if hasattr(estimator, "feature_names_in_"):
621
- missing_features = []
622
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
623
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
624
- missing_features.append(f)
625
-
626
- if len(missing_features) > 0:
627
- raise ValueError(
628
- "The feature names should match with those that were passed during fit.\n"
629
- f"Features seen during fit call but not present in the input: {missing_features}\n"
630
- f"Features in the input dataframe : {input_cols}\n"
631
- )
632
- input_df.columns = getattr(estimator, "feature_names_in_")
633
- else:
634
- # Just rename the column names to unquoted identifiers.
635
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
620
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
621
+ missing_features = []
622
+ features_in_dataset = set(dataset.columns)
623
+ columns_to_select = []
624
+ for i, f in enumerate(features_required_by_estimator):
625
+ if (
626
+ i >= len(input_cols)
627
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
628
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
629
+ and quoted_input_cols[i] not in features_in_dataset)
630
+ ):
631
+ missing_features.append(f)
632
+ elif input_cols[i] in features_in_dataset:
633
+ columns_to_select.append(input_cols[i])
634
+ elif unquoted_input_cols[i] in features_in_dataset:
635
+ columns_to_select.append(unquoted_input_cols[i])
636
+ else:
637
+ columns_to_select.append(quoted_input_cols[i])
638
+
639
+ if len(missing_features) > 0:
640
+ raise ValueError(
641
+ "The feature names should match with those that were passed during fit.\n"
642
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
643
+ f"Features in the input dataframe : {input_cols}\n"
644
+ )
645
+ input_df = dataset[columns_to_select]
646
+ input_df.columns = features_required_by_estimator
636
647
 
637
648
  transformed_numpy_array = getattr(estimator, inference_method)(
638
649
  input_df
@@ -706,26 +706,37 @@ class GaussianProcessClassifier(BaseTransformer):
706
706
  # input cols need to match unquoted / quoted
707
707
  input_cols = self.input_cols
708
708
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
709
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
709
710
 
710
711
  estimator = self._sklearn_object
711
712
 
712
- input_df = dataset[input_cols] # Select input columns with quoted column names.
713
- if hasattr(estimator, "feature_names_in_"):
714
- missing_features = []
715
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
716
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
717
- missing_features.append(f)
718
-
719
- if len(missing_features) > 0:
720
- raise ValueError(
721
- "The feature names should match with those that were passed during fit.\n"
722
- f"Features seen during fit call but not present in the input: {missing_features}\n"
723
- f"Features in the input dataframe : {input_cols}\n"
724
- )
725
- input_df.columns = getattr(estimator, "feature_names_in_")
726
- else:
727
- # Just rename the column names to unquoted identifiers.
728
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
713
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
714
+ missing_features = []
715
+ features_in_dataset = set(dataset.columns)
716
+ columns_to_select = []
717
+ for i, f in enumerate(features_required_by_estimator):
718
+ if (
719
+ i >= len(input_cols)
720
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
721
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
722
+ and quoted_input_cols[i] not in features_in_dataset)
723
+ ):
724
+ missing_features.append(f)
725
+ elif input_cols[i] in features_in_dataset:
726
+ columns_to_select.append(input_cols[i])
727
+ elif unquoted_input_cols[i] in features_in_dataset:
728
+ columns_to_select.append(unquoted_input_cols[i])
729
+ else:
730
+ columns_to_select.append(quoted_input_cols[i])
731
+
732
+ if len(missing_features) > 0:
733
+ raise ValueError(
734
+ "The feature names should match with those that were passed during fit.\n"
735
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
736
+ f"Features in the input dataframe : {input_cols}\n"
737
+ )
738
+ input_df = dataset[columns_to_select]
739
+ input_df.columns = features_required_by_estimator
729
740
 
730
741
  transformed_numpy_array = getattr(estimator, inference_method)(
731
742
  input_df
@@ -689,26 +689,37 @@ class GaussianProcessRegressor(BaseTransformer):
689
689
  # input cols need to match unquoted / quoted
690
690
  input_cols = self.input_cols
691
691
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
692
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
692
693
 
693
694
  estimator = self._sklearn_object
694
695
 
695
- input_df = dataset[input_cols] # Select input columns with quoted column names.
696
- if hasattr(estimator, "feature_names_in_"):
697
- missing_features = []
698
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
699
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
700
- missing_features.append(f)
701
-
702
- if len(missing_features) > 0:
703
- raise ValueError(
704
- "The feature names should match with those that were passed during fit.\n"
705
- f"Features seen during fit call but not present in the input: {missing_features}\n"
706
- f"Features in the input dataframe : {input_cols}\n"
707
- )
708
- input_df.columns = getattr(estimator, "feature_names_in_")
709
- else:
710
- # Just rename the column names to unquoted identifiers.
711
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
696
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
697
+ missing_features = []
698
+ features_in_dataset = set(dataset.columns)
699
+ columns_to_select = []
700
+ for i, f in enumerate(features_required_by_estimator):
701
+ if (
702
+ i >= len(input_cols)
703
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
704
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
705
+ and quoted_input_cols[i] not in features_in_dataset)
706
+ ):
707
+ missing_features.append(f)
708
+ elif input_cols[i] in features_in_dataset:
709
+ columns_to_select.append(input_cols[i])
710
+ elif unquoted_input_cols[i] in features_in_dataset:
711
+ columns_to_select.append(unquoted_input_cols[i])
712
+ else:
713
+ columns_to_select.append(quoted_input_cols[i])
714
+
715
+ if len(missing_features) > 0:
716
+ raise ValueError(
717
+ "The feature names should match with those that were passed during fit.\n"
718
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
719
+ f"Features in the input dataframe : {input_cols}\n"
720
+ )
721
+ input_df = dataset[columns_to_select]
722
+ input_df.columns = features_required_by_estimator
712
723
 
713
724
  transformed_numpy_array = getattr(estimator, inference_method)(
714
725
  input_df
@@ -732,26 +732,37 @@ class IterativeImputer(BaseTransformer):
732
732
  # input cols need to match unquoted / quoted
733
733
  input_cols = self.input_cols
734
734
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
735
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
735
736
 
736
737
  estimator = self._sklearn_object
737
738
 
738
- input_df = dataset[input_cols] # Select input columns with quoted column names.
739
- if hasattr(estimator, "feature_names_in_"):
740
- missing_features = []
741
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
742
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
743
- missing_features.append(f)
744
-
745
- if len(missing_features) > 0:
746
- raise ValueError(
747
- "The feature names should match with those that were passed during fit.\n"
748
- f"Features seen during fit call but not present in the input: {missing_features}\n"
749
- f"Features in the input dataframe : {input_cols}\n"
750
- )
751
- input_df.columns = getattr(estimator, "feature_names_in_")
752
- else:
753
- # Just rename the column names to unquoted identifiers.
754
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
739
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
740
+ missing_features = []
741
+ features_in_dataset = set(dataset.columns)
742
+ columns_to_select = []
743
+ for i, f in enumerate(features_required_by_estimator):
744
+ if (
745
+ i >= len(input_cols)
746
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
747
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
748
+ and quoted_input_cols[i] not in features_in_dataset)
749
+ ):
750
+ missing_features.append(f)
751
+ elif input_cols[i] in features_in_dataset:
752
+ columns_to_select.append(input_cols[i])
753
+ elif unquoted_input_cols[i] in features_in_dataset:
754
+ columns_to_select.append(unquoted_input_cols[i])
755
+ else:
756
+ columns_to_select.append(quoted_input_cols[i])
757
+
758
+ if len(missing_features) > 0:
759
+ raise ValueError(
760
+ "The feature names should match with those that were passed during fit.\n"
761
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
762
+ f"Features in the input dataframe : {input_cols}\n"
763
+ )
764
+ input_df = dataset[columns_to_select]
765
+ input_df.columns = features_required_by_estimator
755
766
 
756
767
  transformed_numpy_array = getattr(estimator, inference_method)(
757
768
  input_df
@@ -667,26 +667,37 @@ class KNNImputer(BaseTransformer):
667
667
  # input cols need to match unquoted / quoted
668
668
  input_cols = self.input_cols
669
669
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
670
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
670
671
 
671
672
  estimator = self._sklearn_object
672
673
 
673
- input_df = dataset[input_cols] # Select input columns with quoted column names.
674
- if hasattr(estimator, "feature_names_in_"):
675
- missing_features = []
676
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
677
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
678
- missing_features.append(f)
679
-
680
- if len(missing_features) > 0:
681
- raise ValueError(
682
- "The feature names should match with those that were passed during fit.\n"
683
- f"Features seen during fit call but not present in the input: {missing_features}\n"
684
- f"Features in the input dataframe : {input_cols}\n"
685
- )
686
- input_df.columns = getattr(estimator, "feature_names_in_")
687
- else:
688
- # Just rename the column names to unquoted identifiers.
689
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
674
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
675
+ missing_features = []
676
+ features_in_dataset = set(dataset.columns)
677
+ columns_to_select = []
678
+ for i, f in enumerate(features_required_by_estimator):
679
+ if (
680
+ i >= len(input_cols)
681
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
682
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
683
+ and quoted_input_cols[i] not in features_in_dataset)
684
+ ):
685
+ missing_features.append(f)
686
+ elif input_cols[i] in features_in_dataset:
687
+ columns_to_select.append(input_cols[i])
688
+ elif unquoted_input_cols[i] in features_in_dataset:
689
+ columns_to_select.append(unquoted_input_cols[i])
690
+ else:
691
+ columns_to_select.append(quoted_input_cols[i])
692
+
693
+ if len(missing_features) > 0:
694
+ raise ValueError(
695
+ "The feature names should match with those that were passed during fit.\n"
696
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
697
+ f"Features in the input dataframe : {input_cols}\n"
698
+ )
699
+ input_df = dataset[columns_to_select]
700
+ input_df.columns = features_required_by_estimator
690
701
 
691
702
  transformed_numpy_array = getattr(estimator, inference_method)(
692
703
  input_df
@@ -641,26 +641,37 @@ class MissingIndicator(BaseTransformer):
641
641
  # input cols need to match unquoted / quoted
642
642
  input_cols = self.input_cols
643
643
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
644
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
644
645
 
645
646
  estimator = self._sklearn_object
646
647
 
647
- input_df = dataset[input_cols] # Select input columns with quoted column names.
648
- if hasattr(estimator, "feature_names_in_"):
649
- missing_features = []
650
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
651
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
652
- missing_features.append(f)
653
-
654
- if len(missing_features) > 0:
655
- raise ValueError(
656
- "The feature names should match with those that were passed during fit.\n"
657
- f"Features seen during fit call but not present in the input: {missing_features}\n"
658
- f"Features in the input dataframe : {input_cols}\n"
659
- )
660
- input_df.columns = getattr(estimator, "feature_names_in_")
661
- else:
662
- # Just rename the column names to unquoted identifiers.
663
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
648
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
649
+ missing_features = []
650
+ features_in_dataset = set(dataset.columns)
651
+ columns_to_select = []
652
+ for i, f in enumerate(features_required_by_estimator):
653
+ if (
654
+ i >= len(input_cols)
655
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
656
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
657
+ and quoted_input_cols[i] not in features_in_dataset)
658
+ ):
659
+ missing_features.append(f)
660
+ elif input_cols[i] in features_in_dataset:
661
+ columns_to_select.append(input_cols[i])
662
+ elif unquoted_input_cols[i] in features_in_dataset:
663
+ columns_to_select.append(unquoted_input_cols[i])
664
+ else:
665
+ columns_to_select.append(quoted_input_cols[i])
666
+
667
+ if len(missing_features) > 0:
668
+ raise ValueError(
669
+ "The feature names should match with those that were passed during fit.\n"
670
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
671
+ f"Features in the input dataframe : {input_cols}\n"
672
+ )
673
+ input_df = dataset[columns_to_select]
674
+ input_df.columns = features_required_by_estimator
664
675
 
665
676
  transformed_numpy_array = getattr(estimator, inference_method)(
666
677
  input_df
@@ -616,26 +616,37 @@ class AdditiveChi2Sampler(BaseTransformer):
616
616
  # input cols need to match unquoted / quoted
617
617
  input_cols = self.input_cols
618
618
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
619
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
619
620
 
620
621
  estimator = self._sklearn_object
621
622
 
622
- input_df = dataset[input_cols] # Select input columns with quoted column names.
623
- if hasattr(estimator, "feature_names_in_"):
624
- missing_features = []
625
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
626
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
627
- missing_features.append(f)
628
-
629
- if len(missing_features) > 0:
630
- raise ValueError(
631
- "The feature names should match with those that were passed during fit.\n"
632
- f"Features seen during fit call but not present in the input: {missing_features}\n"
633
- f"Features in the input dataframe : {input_cols}\n"
634
- )
635
- input_df.columns = getattr(estimator, "feature_names_in_")
636
- else:
637
- # Just rename the column names to unquoted identifiers.
638
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
623
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
624
+ missing_features = []
625
+ features_in_dataset = set(dataset.columns)
626
+ columns_to_select = []
627
+ for i, f in enumerate(features_required_by_estimator):
628
+ if (
629
+ i >= len(input_cols)
630
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
631
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
632
+ and quoted_input_cols[i] not in features_in_dataset)
633
+ ):
634
+ missing_features.append(f)
635
+ elif input_cols[i] in features_in_dataset:
636
+ columns_to_select.append(input_cols[i])
637
+ elif unquoted_input_cols[i] in features_in_dataset:
638
+ columns_to_select.append(unquoted_input_cols[i])
639
+ else:
640
+ columns_to_select.append(quoted_input_cols[i])
641
+
642
+ if len(missing_features) > 0:
643
+ raise ValueError(
644
+ "The feature names should match with those that were passed during fit.\n"
645
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
646
+ f"Features in the input dataframe : {input_cols}\n"
647
+ )
648
+ input_df = dataset[columns_to_select]
649
+ input_df.columns = features_required_by_estimator
639
650
 
640
651
  transformed_numpy_array = getattr(estimator, inference_method)(
641
652
  input_df
@@ -664,26 +664,37 @@ class Nystroem(BaseTransformer):
664
664
  # input cols need to match unquoted / quoted
665
665
  input_cols = self.input_cols
666
666
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
667
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
667
668
 
668
669
  estimator = self._sklearn_object
669
670
 
670
- input_df = dataset[input_cols] # Select input columns with quoted column names.
671
- if hasattr(estimator, "feature_names_in_"):
672
- missing_features = []
673
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
674
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
675
- missing_features.append(f)
676
-
677
- if len(missing_features) > 0:
678
- raise ValueError(
679
- "The feature names should match with those that were passed during fit.\n"
680
- f"Features seen during fit call but not present in the input: {missing_features}\n"
681
- f"Features in the input dataframe : {input_cols}\n"
682
- )
683
- input_df.columns = getattr(estimator, "feature_names_in_")
684
- else:
685
- # Just rename the column names to unquoted identifiers.
686
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
671
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
672
+ missing_features = []
673
+ features_in_dataset = set(dataset.columns)
674
+ columns_to_select = []
675
+ for i, f in enumerate(features_required_by_estimator):
676
+ if (
677
+ i >= len(input_cols)
678
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
679
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
680
+ and quoted_input_cols[i] not in features_in_dataset)
681
+ ):
682
+ missing_features.append(f)
683
+ elif input_cols[i] in features_in_dataset:
684
+ columns_to_select.append(input_cols[i])
685
+ elif unquoted_input_cols[i] in features_in_dataset:
686
+ columns_to_select.append(unquoted_input_cols[i])
687
+ else:
688
+ columns_to_select.append(quoted_input_cols[i])
689
+
690
+ if len(missing_features) > 0:
691
+ raise ValueError(
692
+ "The feature names should match with those that were passed during fit.\n"
693
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
694
+ f"Features in the input dataframe : {input_cols}\n"
695
+ )
696
+ input_df = dataset[columns_to_select]
697
+ input_df.columns = features_required_by_estimator
687
698
 
688
699
  transformed_numpy_array = getattr(estimator, inference_method)(
689
700
  input_df