snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -775,26 +775,37 @@ class HistGradientBoostingRegressor(BaseTransformer):
775
775
  # input cols need to match unquoted / quoted
776
776
  input_cols = self.input_cols
777
777
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
778
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
778
779
 
779
780
  estimator = self._sklearn_object
780
781
 
781
- input_df = dataset[input_cols] # Select input columns with quoted column names.
782
- if hasattr(estimator, "feature_names_in_"):
783
- missing_features = []
784
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
785
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
786
- missing_features.append(f)
787
-
788
- if len(missing_features) > 0:
789
- raise ValueError(
790
- "The feature names should match with those that were passed during fit.\n"
791
- f"Features seen during fit call but not present in the input: {missing_features}\n"
792
- f"Features in the input dataframe : {input_cols}\n"
793
- )
794
- input_df.columns = getattr(estimator, "feature_names_in_")
795
- else:
796
- # Just rename the column names to unquoted identifiers.
797
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
782
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
783
+ missing_features = []
784
+ features_in_dataset = set(dataset.columns)
785
+ columns_to_select = []
786
+ for i, f in enumerate(features_required_by_estimator):
787
+ if (
788
+ i >= len(input_cols)
789
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
790
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
791
+ and quoted_input_cols[i] not in features_in_dataset)
792
+ ):
793
+ missing_features.append(f)
794
+ elif input_cols[i] in features_in_dataset:
795
+ columns_to_select.append(input_cols[i])
796
+ elif unquoted_input_cols[i] in features_in_dataset:
797
+ columns_to_select.append(unquoted_input_cols[i])
798
+ else:
799
+ columns_to_select.append(quoted_input_cols[i])
800
+
801
+ if len(missing_features) > 0:
802
+ raise ValueError(
803
+ "The feature names should match with those that were passed during fit.\n"
804
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
805
+ f"Features in the input dataframe : {input_cols}\n"
806
+ )
807
+ input_df = dataset[columns_to_select]
808
+ input_df.columns = features_required_by_estimator
798
809
 
799
810
  transformed_numpy_array = getattr(estimator, inference_method)(
800
811
  input_df
@@ -680,26 +680,37 @@ class IsolationForest(BaseTransformer):
680
680
  # input cols need to match unquoted / quoted
681
681
  input_cols = self.input_cols
682
682
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
683
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
683
684
 
684
685
  estimator = self._sklearn_object
685
686
 
686
- input_df = dataset[input_cols] # Select input columns with quoted column names.
687
- if hasattr(estimator, "feature_names_in_"):
688
- missing_features = []
689
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
690
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
691
- missing_features.append(f)
692
-
693
- if len(missing_features) > 0:
694
- raise ValueError(
695
- "The feature names should match with those that were passed during fit.\n"
696
- f"Features seen during fit call but not present in the input: {missing_features}\n"
697
- f"Features in the input dataframe : {input_cols}\n"
698
- )
699
- input_df.columns = getattr(estimator, "feature_names_in_")
700
- else:
701
- # Just rename the column names to unquoted identifiers.
702
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
687
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
688
+ missing_features = []
689
+ features_in_dataset = set(dataset.columns)
690
+ columns_to_select = []
691
+ for i, f in enumerate(features_required_by_estimator):
692
+ if (
693
+ i >= len(input_cols)
694
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
695
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
696
+ and quoted_input_cols[i] not in features_in_dataset)
697
+ ):
698
+ missing_features.append(f)
699
+ elif input_cols[i] in features_in_dataset:
700
+ columns_to_select.append(input_cols[i])
701
+ elif unquoted_input_cols[i] in features_in_dataset:
702
+ columns_to_select.append(unquoted_input_cols[i])
703
+ else:
704
+ columns_to_select.append(quoted_input_cols[i])
705
+
706
+ if len(missing_features) > 0:
707
+ raise ValueError(
708
+ "The feature names should match with those that were passed during fit.\n"
709
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
710
+ f"Features in the input dataframe : {input_cols}\n"
711
+ )
712
+ input_df = dataset[columns_to_select]
713
+ input_df.columns = features_required_by_estimator
703
714
 
704
715
  transformed_numpy_array = getattr(estimator, inference_method)(
705
716
  input_df
@@ -789,26 +789,37 @@ class RandomForestClassifier(BaseTransformer):
789
789
  # input cols need to match unquoted / quoted
790
790
  input_cols = self.input_cols
791
791
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
792
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
792
793
 
793
794
  estimator = self._sklearn_object
794
795
 
795
- input_df = dataset[input_cols] # Select input columns with quoted column names.
796
- if hasattr(estimator, "feature_names_in_"):
797
- missing_features = []
798
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
799
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
800
- missing_features.append(f)
801
-
802
- if len(missing_features) > 0:
803
- raise ValueError(
804
- "The feature names should match with those that were passed during fit.\n"
805
- f"Features seen during fit call but not present in the input: {missing_features}\n"
806
- f"Features in the input dataframe : {input_cols}\n"
807
- )
808
- input_df.columns = getattr(estimator, "feature_names_in_")
809
- else:
810
- # Just rename the column names to unquoted identifiers.
811
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
796
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
797
+ missing_features = []
798
+ features_in_dataset = set(dataset.columns)
799
+ columns_to_select = []
800
+ for i, f in enumerate(features_required_by_estimator):
801
+ if (
802
+ i >= len(input_cols)
803
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
804
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
805
+ and quoted_input_cols[i] not in features_in_dataset)
806
+ ):
807
+ missing_features.append(f)
808
+ elif input_cols[i] in features_in_dataset:
809
+ columns_to_select.append(input_cols[i])
810
+ elif unquoted_input_cols[i] in features_in_dataset:
811
+ columns_to_select.append(unquoted_input_cols[i])
812
+ else:
813
+ columns_to_select.append(quoted_input_cols[i])
814
+
815
+ if len(missing_features) > 0:
816
+ raise ValueError(
817
+ "The feature names should match with those that were passed during fit.\n"
818
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
819
+ f"Features in the input dataframe : {input_cols}\n"
820
+ )
821
+ input_df = dataset[columns_to_select]
822
+ input_df.columns = features_required_by_estimator
812
823
 
813
824
  transformed_numpy_array = getattr(estimator, inference_method)(
814
825
  input_df
@@ -768,26 +768,37 @@ class RandomForestRegressor(BaseTransformer):
768
768
  # input cols need to match unquoted / quoted
769
769
  input_cols = self.input_cols
770
770
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
771
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
771
772
 
772
773
  estimator = self._sklearn_object
773
774
 
774
- input_df = dataset[input_cols] # Select input columns with quoted column names.
775
- if hasattr(estimator, "feature_names_in_"):
776
- missing_features = []
777
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
778
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
779
- missing_features.append(f)
780
-
781
- if len(missing_features) > 0:
782
- raise ValueError(
783
- "The feature names should match with those that were passed during fit.\n"
784
- f"Features seen during fit call but not present in the input: {missing_features}\n"
785
- f"Features in the input dataframe : {input_cols}\n"
786
- )
787
- input_df.columns = getattr(estimator, "feature_names_in_")
788
- else:
789
- # Just rename the column names to unquoted identifiers.
790
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
775
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
776
+ missing_features = []
777
+ features_in_dataset = set(dataset.columns)
778
+ columns_to_select = []
779
+ for i, f in enumerate(features_required_by_estimator):
780
+ if (
781
+ i >= len(input_cols)
782
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
783
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
784
+ and quoted_input_cols[i] not in features_in_dataset)
785
+ ):
786
+ missing_features.append(f)
787
+ elif input_cols[i] in features_in_dataset:
788
+ columns_to_select.append(input_cols[i])
789
+ elif unquoted_input_cols[i] in features_in_dataset:
790
+ columns_to_select.append(unquoted_input_cols[i])
791
+ else:
792
+ columns_to_select.append(quoted_input_cols[i])
793
+
794
+ if len(missing_features) > 0:
795
+ raise ValueError(
796
+ "The feature names should match with those that were passed during fit.\n"
797
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
798
+ f"Features in the input dataframe : {input_cols}\n"
799
+ )
800
+ input_df = dataset[columns_to_select]
801
+ input_df.columns = features_required_by_estimator
791
802
 
792
803
  transformed_numpy_array = getattr(estimator, inference_method)(
793
804
  input_df
@@ -670,26 +670,37 @@ class StackingRegressor(BaseTransformer):
670
670
  # input cols need to match unquoted / quoted
671
671
  input_cols = self.input_cols
672
672
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
673
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
673
674
 
674
675
  estimator = self._sklearn_object
675
676
 
676
- input_df = dataset[input_cols] # Select input columns with quoted column names.
677
- if hasattr(estimator, "feature_names_in_"):
678
- missing_features = []
679
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
680
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
681
- missing_features.append(f)
682
-
683
- if len(missing_features) > 0:
684
- raise ValueError(
685
- "The feature names should match with those that were passed during fit.\n"
686
- f"Features seen during fit call but not present in the input: {missing_features}\n"
687
- f"Features in the input dataframe : {input_cols}\n"
688
- )
689
- input_df.columns = getattr(estimator, "feature_names_in_")
690
- else:
691
- # Just rename the column names to unquoted identifiers.
692
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
677
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
678
+ missing_features = []
679
+ features_in_dataset = set(dataset.columns)
680
+ columns_to_select = []
681
+ for i, f in enumerate(features_required_by_estimator):
682
+ if (
683
+ i >= len(input_cols)
684
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
685
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
686
+ and quoted_input_cols[i] not in features_in_dataset)
687
+ ):
688
+ missing_features.append(f)
689
+ elif input_cols[i] in features_in_dataset:
690
+ columns_to_select.append(input_cols[i])
691
+ elif unquoted_input_cols[i] in features_in_dataset:
692
+ columns_to_select.append(unquoted_input_cols[i])
693
+ else:
694
+ columns_to_select.append(quoted_input_cols[i])
695
+
696
+ if len(missing_features) > 0:
697
+ raise ValueError(
698
+ "The feature names should match with those that were passed during fit.\n"
699
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
700
+ f"Features in the input dataframe : {input_cols}\n"
701
+ )
702
+ input_df = dataset[columns_to_select]
703
+ input_df.columns = features_required_by_estimator
693
704
 
694
705
  transformed_numpy_array = getattr(estimator, inference_method)(
695
706
  input_df
@@ -652,26 +652,37 @@ class VotingClassifier(BaseTransformer):
652
652
  # input cols need to match unquoted / quoted
653
653
  input_cols = self.input_cols
654
654
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
655
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
655
656
 
656
657
  estimator = self._sklearn_object
657
658
 
658
- input_df = dataset[input_cols] # Select input columns with quoted column names.
659
- if hasattr(estimator, "feature_names_in_"):
660
- missing_features = []
661
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
662
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
663
- missing_features.append(f)
664
-
665
- if len(missing_features) > 0:
666
- raise ValueError(
667
- "The feature names should match with those that were passed during fit.\n"
668
- f"Features seen during fit call but not present in the input: {missing_features}\n"
669
- f"Features in the input dataframe : {input_cols}\n"
670
- )
671
- input_df.columns = getattr(estimator, "feature_names_in_")
672
- else:
673
- # Just rename the column names to unquoted identifiers.
674
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
659
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
660
+ missing_features = []
661
+ features_in_dataset = set(dataset.columns)
662
+ columns_to_select = []
663
+ for i, f in enumerate(features_required_by_estimator):
664
+ if (
665
+ i >= len(input_cols)
666
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
667
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
668
+ and quoted_input_cols[i] not in features_in_dataset)
669
+ ):
670
+ missing_features.append(f)
671
+ elif input_cols[i] in features_in_dataset:
672
+ columns_to_select.append(input_cols[i])
673
+ elif unquoted_input_cols[i] in features_in_dataset:
674
+ columns_to_select.append(unquoted_input_cols[i])
675
+ else:
676
+ columns_to_select.append(quoted_input_cols[i])
677
+
678
+ if len(missing_features) > 0:
679
+ raise ValueError(
680
+ "The feature names should match with those that were passed during fit.\n"
681
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
682
+ f"Features in the input dataframe : {input_cols}\n"
683
+ )
684
+ input_df = dataset[columns_to_select]
685
+ input_df.columns = features_required_by_estimator
675
686
 
676
687
  transformed_numpy_array = getattr(estimator, inference_method)(
677
688
  input_df
@@ -634,26 +634,37 @@ class VotingRegressor(BaseTransformer):
634
634
  # input cols need to match unquoted / quoted
635
635
  input_cols = self.input_cols
636
636
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
637
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
637
638
 
638
639
  estimator = self._sklearn_object
639
640
 
640
- input_df = dataset[input_cols] # Select input columns with quoted column names.
641
- if hasattr(estimator, "feature_names_in_"):
642
- missing_features = []
643
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
644
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
645
- missing_features.append(f)
646
-
647
- if len(missing_features) > 0:
648
- raise ValueError(
649
- "The feature names should match with those that were passed during fit.\n"
650
- f"Features seen during fit call but not present in the input: {missing_features}\n"
651
- f"Features in the input dataframe : {input_cols}\n"
652
- )
653
- input_df.columns = getattr(estimator, "feature_names_in_")
654
- else:
655
- # Just rename the column names to unquoted identifiers.
656
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
641
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
642
+ missing_features = []
643
+ features_in_dataset = set(dataset.columns)
644
+ columns_to_select = []
645
+ for i, f in enumerate(features_required_by_estimator):
646
+ if (
647
+ i >= len(input_cols)
648
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
649
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
650
+ and quoted_input_cols[i] not in features_in_dataset)
651
+ ):
652
+ missing_features.append(f)
653
+ elif input_cols[i] in features_in_dataset:
654
+ columns_to_select.append(input_cols[i])
655
+ elif unquoted_input_cols[i] in features_in_dataset:
656
+ columns_to_select.append(unquoted_input_cols[i])
657
+ else:
658
+ columns_to_select.append(quoted_input_cols[i])
659
+
660
+ if len(missing_features) > 0:
661
+ raise ValueError(
662
+ "The feature names should match with those that were passed during fit.\n"
663
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
664
+ f"Features in the input dataframe : {input_cols}\n"
665
+ )
666
+ input_df = dataset[columns_to_select]
667
+ input_df.columns = features_required_by_estimator
657
668
 
658
669
  transformed_numpy_array = getattr(estimator, inference_method)(
659
670
  input_df
@@ -624,26 +624,37 @@ class GenericUnivariateSelect(BaseTransformer):
624
624
  # input cols need to match unquoted / quoted
625
625
  input_cols = self.input_cols
626
626
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
627
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
627
628
 
628
629
  estimator = self._sklearn_object
629
630
 
630
- input_df = dataset[input_cols] # Select input columns with quoted column names.
631
- if hasattr(estimator, "feature_names_in_"):
632
- missing_features = []
633
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
634
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
635
- missing_features.append(f)
636
-
637
- if len(missing_features) > 0:
638
- raise ValueError(
639
- "The feature names should match with those that were passed during fit.\n"
640
- f"Features seen during fit call but not present in the input: {missing_features}\n"
641
- f"Features in the input dataframe : {input_cols}\n"
642
- )
643
- input_df.columns = getattr(estimator, "feature_names_in_")
644
- else:
645
- # Just rename the column names to unquoted identifiers.
646
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
631
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
632
+ missing_features = []
633
+ features_in_dataset = set(dataset.columns)
634
+ columns_to_select = []
635
+ for i, f in enumerate(features_required_by_estimator):
636
+ if (
637
+ i >= len(input_cols)
638
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
639
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
640
+ and quoted_input_cols[i] not in features_in_dataset)
641
+ ):
642
+ missing_features.append(f)
643
+ elif input_cols[i] in features_in_dataset:
644
+ columns_to_select.append(input_cols[i])
645
+ elif unquoted_input_cols[i] in features_in_dataset:
646
+ columns_to_select.append(unquoted_input_cols[i])
647
+ else:
648
+ columns_to_select.append(quoted_input_cols[i])
649
+
650
+ if len(missing_features) > 0:
651
+ raise ValueError(
652
+ "The feature names should match with those that were passed during fit.\n"
653
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
654
+ f"Features in the input dataframe : {input_cols}\n"
655
+ )
656
+ input_df = dataset[columns_to_select]
657
+ input_df.columns = features_required_by_estimator
647
658
 
648
659
  transformed_numpy_array = getattr(estimator, inference_method)(
649
660
  input_df
@@ -620,26 +620,37 @@ class SelectFdr(BaseTransformer):
620
620
  # input cols need to match unquoted / quoted
621
621
  input_cols = self.input_cols
622
622
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
623
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
623
624
 
624
625
  estimator = self._sklearn_object
625
626
 
626
- input_df = dataset[input_cols] # Select input columns with quoted column names.
627
- if hasattr(estimator, "feature_names_in_"):
628
- missing_features = []
629
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
630
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
631
- missing_features.append(f)
632
-
633
- if len(missing_features) > 0:
634
- raise ValueError(
635
- "The feature names should match with those that were passed during fit.\n"
636
- f"Features seen during fit call but not present in the input: {missing_features}\n"
637
- f"Features in the input dataframe : {input_cols}\n"
638
- )
639
- input_df.columns = getattr(estimator, "feature_names_in_")
640
- else:
641
- # Just rename the column names to unquoted identifiers.
642
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
627
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
628
+ missing_features = []
629
+ features_in_dataset = set(dataset.columns)
630
+ columns_to_select = []
631
+ for i, f in enumerate(features_required_by_estimator):
632
+ if (
633
+ i >= len(input_cols)
634
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
635
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
636
+ and quoted_input_cols[i] not in features_in_dataset)
637
+ ):
638
+ missing_features.append(f)
639
+ elif input_cols[i] in features_in_dataset:
640
+ columns_to_select.append(input_cols[i])
641
+ elif unquoted_input_cols[i] in features_in_dataset:
642
+ columns_to_select.append(unquoted_input_cols[i])
643
+ else:
644
+ columns_to_select.append(quoted_input_cols[i])
645
+
646
+ if len(missing_features) > 0:
647
+ raise ValueError(
648
+ "The feature names should match with those that were passed during fit.\n"
649
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
650
+ f"Features in the input dataframe : {input_cols}\n"
651
+ )
652
+ input_df = dataset[columns_to_select]
653
+ input_df.columns = features_required_by_estimator
643
654
 
644
655
  transformed_numpy_array = getattr(estimator, inference_method)(
645
656
  input_df
@@ -620,26 +620,37 @@ class SelectFpr(BaseTransformer):
620
620
  # input cols need to match unquoted / quoted
621
621
  input_cols = self.input_cols
622
622
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
623
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
623
624
 
624
625
  estimator = self._sklearn_object
625
626
 
626
- input_df = dataset[input_cols] # Select input columns with quoted column names.
627
- if hasattr(estimator, "feature_names_in_"):
628
- missing_features = []
629
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
630
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
631
- missing_features.append(f)
632
-
633
- if len(missing_features) > 0:
634
- raise ValueError(
635
- "The feature names should match with those that were passed during fit.\n"
636
- f"Features seen during fit call but not present in the input: {missing_features}\n"
637
- f"Features in the input dataframe : {input_cols}\n"
638
- )
639
- input_df.columns = getattr(estimator, "feature_names_in_")
640
- else:
641
- # Just rename the column names to unquoted identifiers.
642
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
627
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
628
+ missing_features = []
629
+ features_in_dataset = set(dataset.columns)
630
+ columns_to_select = []
631
+ for i, f in enumerate(features_required_by_estimator):
632
+ if (
633
+ i >= len(input_cols)
634
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
635
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
636
+ and quoted_input_cols[i] not in features_in_dataset)
637
+ ):
638
+ missing_features.append(f)
639
+ elif input_cols[i] in features_in_dataset:
640
+ columns_to_select.append(input_cols[i])
641
+ elif unquoted_input_cols[i] in features_in_dataset:
642
+ columns_to_select.append(unquoted_input_cols[i])
643
+ else:
644
+ columns_to_select.append(quoted_input_cols[i])
645
+
646
+ if len(missing_features) > 0:
647
+ raise ValueError(
648
+ "The feature names should match with those that were passed during fit.\n"
649
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
650
+ f"Features in the input dataframe : {input_cols}\n"
651
+ )
652
+ input_df = dataset[columns_to_select]
653
+ input_df.columns = features_required_by_estimator
643
654
 
644
655
  transformed_numpy_array = getattr(estimator, inference_method)(
645
656
  input_df
@@ -620,26 +620,37 @@ class SelectFwe(BaseTransformer):
620
620
  # input cols need to match unquoted / quoted
621
621
  input_cols = self.input_cols
622
622
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
623
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
623
624
 
624
625
  estimator = self._sklearn_object
625
626
 
626
- input_df = dataset[input_cols] # Select input columns with quoted column names.
627
- if hasattr(estimator, "feature_names_in_"):
628
- missing_features = []
629
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
630
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
631
- missing_features.append(f)
632
-
633
- if len(missing_features) > 0:
634
- raise ValueError(
635
- "The feature names should match with those that were passed during fit.\n"
636
- f"Features seen during fit call but not present in the input: {missing_features}\n"
637
- f"Features in the input dataframe : {input_cols}\n"
638
- )
639
- input_df.columns = getattr(estimator, "feature_names_in_")
640
- else:
641
- # Just rename the column names to unquoted identifiers.
642
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
627
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
628
+ missing_features = []
629
+ features_in_dataset = set(dataset.columns)
630
+ columns_to_select = []
631
+ for i, f in enumerate(features_required_by_estimator):
632
+ if (
633
+ i >= len(input_cols)
634
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
635
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
636
+ and quoted_input_cols[i] not in features_in_dataset)
637
+ ):
638
+ missing_features.append(f)
639
+ elif input_cols[i] in features_in_dataset:
640
+ columns_to_select.append(input_cols[i])
641
+ elif unquoted_input_cols[i] in features_in_dataset:
642
+ columns_to_select.append(unquoted_input_cols[i])
643
+ else:
644
+ columns_to_select.append(quoted_input_cols[i])
645
+
646
+ if len(missing_features) > 0:
647
+ raise ValueError(
648
+ "The feature names should match with those that were passed during fit.\n"
649
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
650
+ f"Features in the input dataframe : {input_cols}\n"
651
+ )
652
+ input_df = dataset[columns_to_select]
653
+ input_df.columns = features_required_by_estimator
643
654
 
644
655
  transformed_numpy_array = getattr(estimator, inference_method)(
645
656
  input_df