snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -717,26 +717,37 @@ class SVC(BaseTransformer):
717
717
  # input cols need to match unquoted / quoted
718
718
  input_cols = self.input_cols
719
719
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
720
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
720
721
 
721
722
  estimator = self._sklearn_object
722
723
 
723
- input_df = dataset[input_cols] # Select input columns with quoted column names.
724
- if hasattr(estimator, "feature_names_in_"):
725
- missing_features = []
726
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
727
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
728
- missing_features.append(f)
729
-
730
- if len(missing_features) > 0:
731
- raise ValueError(
732
- "The feature names should match with those that were passed during fit.\n"
733
- f"Features seen during fit call but not present in the input: {missing_features}\n"
734
- f"Features in the input dataframe : {input_cols}\n"
735
- )
736
- input_df.columns = getattr(estimator, "feature_names_in_")
737
- else:
738
- # Just rename the column names to unquoted identifiers.
739
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
724
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
725
+ missing_features = []
726
+ features_in_dataset = set(dataset.columns)
727
+ columns_to_select = []
728
+ for i, f in enumerate(features_required_by_estimator):
729
+ if (
730
+ i >= len(input_cols)
731
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
732
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
733
+ and quoted_input_cols[i] not in features_in_dataset)
734
+ ):
735
+ missing_features.append(f)
736
+ elif input_cols[i] in features_in_dataset:
737
+ columns_to_select.append(input_cols[i])
738
+ elif unquoted_input_cols[i] in features_in_dataset:
739
+ columns_to_select.append(unquoted_input_cols[i])
740
+ else:
741
+ columns_to_select.append(quoted_input_cols[i])
742
+
743
+ if len(missing_features) > 0:
744
+ raise ValueError(
745
+ "The feature names should match with those that were passed during fit.\n"
746
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
747
+ f"Features in the input dataframe : {input_cols}\n"
748
+ )
749
+ input_df = dataset[columns_to_select]
750
+ input_df.columns = features_required_by_estimator
740
751
 
741
752
  transformed_numpy_array = getattr(estimator, inference_method)(
742
753
  input_df
@@ -678,26 +678,37 @@ class SVR(BaseTransformer):
678
678
  # input cols need to match unquoted / quoted
679
679
  input_cols = self.input_cols
680
680
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
681
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
681
682
 
682
683
  estimator = self._sklearn_object
683
684
 
684
- input_df = dataset[input_cols] # Select input columns with quoted column names.
685
- if hasattr(estimator, "feature_names_in_"):
686
- missing_features = []
687
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
688
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
689
- missing_features.append(f)
690
-
691
- if len(missing_features) > 0:
692
- raise ValueError(
693
- "The feature names should match with those that were passed during fit.\n"
694
- f"Features seen during fit call but not present in the input: {missing_features}\n"
695
- f"Features in the input dataframe : {input_cols}\n"
696
- )
697
- input_df.columns = getattr(estimator, "feature_names_in_")
698
- else:
699
- # Just rename the column names to unquoted identifiers.
700
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
685
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
686
+ missing_features = []
687
+ features_in_dataset = set(dataset.columns)
688
+ columns_to_select = []
689
+ for i, f in enumerate(features_required_by_estimator):
690
+ if (
691
+ i >= len(input_cols)
692
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
693
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
694
+ and quoted_input_cols[i] not in features_in_dataset)
695
+ ):
696
+ missing_features.append(f)
697
+ elif input_cols[i] in features_in_dataset:
698
+ columns_to_select.append(input_cols[i])
699
+ elif unquoted_input_cols[i] in features_in_dataset:
700
+ columns_to_select.append(unquoted_input_cols[i])
701
+ else:
702
+ columns_to_select.append(quoted_input_cols[i])
703
+
704
+ if len(missing_features) > 0:
705
+ raise ValueError(
706
+ "The feature names should match with those that were passed during fit.\n"
707
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
708
+ f"Features in the input dataframe : {input_cols}\n"
709
+ )
710
+ input_df = dataset[columns_to_select]
711
+ input_df.columns = features_required_by_estimator
701
712
 
702
713
  transformed_numpy_array = getattr(estimator, inference_method)(
703
714
  input_df
@@ -746,26 +746,37 @@ class DecisionTreeClassifier(BaseTransformer):
746
746
  # input cols need to match unquoted / quoted
747
747
  input_cols = self.input_cols
748
748
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
749
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
749
750
 
750
751
  estimator = self._sklearn_object
751
752
 
752
- input_df = dataset[input_cols] # Select input columns with quoted column names.
753
- if hasattr(estimator, "feature_names_in_"):
754
- missing_features = []
755
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
756
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
757
- missing_features.append(f)
758
-
759
- if len(missing_features) > 0:
760
- raise ValueError(
761
- "The feature names should match with those that were passed during fit.\n"
762
- f"Features seen during fit call but not present in the input: {missing_features}\n"
763
- f"Features in the input dataframe : {input_cols}\n"
764
- )
765
- input_df.columns = getattr(estimator, "feature_names_in_")
766
- else:
767
- # Just rename the column names to unquoted identifiers.
768
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
753
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
754
+ missing_features = []
755
+ features_in_dataset = set(dataset.columns)
756
+ columns_to_select = []
757
+ for i, f in enumerate(features_required_by_estimator):
758
+ if (
759
+ i >= len(input_cols)
760
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
761
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
762
+ and quoted_input_cols[i] not in features_in_dataset)
763
+ ):
764
+ missing_features.append(f)
765
+ elif input_cols[i] in features_in_dataset:
766
+ columns_to_select.append(input_cols[i])
767
+ elif unquoted_input_cols[i] in features_in_dataset:
768
+ columns_to_select.append(unquoted_input_cols[i])
769
+ else:
770
+ columns_to_select.append(quoted_input_cols[i])
771
+
772
+ if len(missing_features) > 0:
773
+ raise ValueError(
774
+ "The feature names should match with those that were passed during fit.\n"
775
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
776
+ f"Features in the input dataframe : {input_cols}\n"
777
+ )
778
+ input_df = dataset[columns_to_select]
779
+ input_df.columns = features_required_by_estimator
769
780
 
770
781
  transformed_numpy_array = getattr(estimator, inference_method)(
771
782
  input_df
@@ -728,26 +728,37 @@ class DecisionTreeRegressor(BaseTransformer):
728
728
  # input cols need to match unquoted / quoted
729
729
  input_cols = self.input_cols
730
730
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
731
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
731
732
 
732
733
  estimator = self._sklearn_object
733
734
 
734
- input_df = dataset[input_cols] # Select input columns with quoted column names.
735
- if hasattr(estimator, "feature_names_in_"):
736
- missing_features = []
737
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
738
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
739
- missing_features.append(f)
740
-
741
- if len(missing_features) > 0:
742
- raise ValueError(
743
- "The feature names should match with those that were passed during fit.\n"
744
- f"Features seen during fit call but not present in the input: {missing_features}\n"
745
- f"Features in the input dataframe : {input_cols}\n"
746
- )
747
- input_df.columns = getattr(estimator, "feature_names_in_")
748
- else:
749
- # Just rename the column names to unquoted identifiers.
750
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
735
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
736
+ missing_features = []
737
+ features_in_dataset = set(dataset.columns)
738
+ columns_to_select = []
739
+ for i, f in enumerate(features_required_by_estimator):
740
+ if (
741
+ i >= len(input_cols)
742
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
743
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
744
+ and quoted_input_cols[i] not in features_in_dataset)
745
+ ):
746
+ missing_features.append(f)
747
+ elif input_cols[i] in features_in_dataset:
748
+ columns_to_select.append(input_cols[i])
749
+ elif unquoted_input_cols[i] in features_in_dataset:
750
+ columns_to_select.append(unquoted_input_cols[i])
751
+ else:
752
+ columns_to_select.append(quoted_input_cols[i])
753
+
754
+ if len(missing_features) > 0:
755
+ raise ValueError(
756
+ "The feature names should match with those that were passed during fit.\n"
757
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
758
+ f"Features in the input dataframe : {input_cols}\n"
759
+ )
760
+ input_df = dataset[columns_to_select]
761
+ input_df.columns = features_required_by_estimator
751
762
 
752
763
  transformed_numpy_array = getattr(estimator, inference_method)(
753
764
  input_df
@@ -738,26 +738,37 @@ class ExtraTreeClassifier(BaseTransformer):
738
738
  # input cols need to match unquoted / quoted
739
739
  input_cols = self.input_cols
740
740
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
741
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
741
742
 
742
743
  estimator = self._sklearn_object
743
744
 
744
- input_df = dataset[input_cols] # Select input columns with quoted column names.
745
- if hasattr(estimator, "feature_names_in_"):
746
- missing_features = []
747
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
748
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
749
- missing_features.append(f)
750
-
751
- if len(missing_features) > 0:
752
- raise ValueError(
753
- "The feature names should match with those that were passed during fit.\n"
754
- f"Features seen during fit call but not present in the input: {missing_features}\n"
755
- f"Features in the input dataframe : {input_cols}\n"
756
- )
757
- input_df.columns = getattr(estimator, "feature_names_in_")
758
- else:
759
- # Just rename the column names to unquoted identifiers.
760
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
745
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
746
+ missing_features = []
747
+ features_in_dataset = set(dataset.columns)
748
+ columns_to_select = []
749
+ for i, f in enumerate(features_required_by_estimator):
750
+ if (
751
+ i >= len(input_cols)
752
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
753
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
754
+ and quoted_input_cols[i] not in features_in_dataset)
755
+ ):
756
+ missing_features.append(f)
757
+ elif input_cols[i] in features_in_dataset:
758
+ columns_to_select.append(input_cols[i])
759
+ elif unquoted_input_cols[i] in features_in_dataset:
760
+ columns_to_select.append(unquoted_input_cols[i])
761
+ else:
762
+ columns_to_select.append(quoted_input_cols[i])
763
+
764
+ if len(missing_features) > 0:
765
+ raise ValueError(
766
+ "The feature names should match with those that were passed during fit.\n"
767
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
768
+ f"Features in the input dataframe : {input_cols}\n"
769
+ )
770
+ input_df = dataset[columns_to_select]
771
+ input_df.columns = features_required_by_estimator
761
772
 
762
773
  transformed_numpy_array = getattr(estimator, inference_method)(
763
774
  input_df
@@ -720,26 +720,37 @@ class ExtraTreeRegressor(BaseTransformer):
720
720
  # input cols need to match unquoted / quoted
721
721
  input_cols = self.input_cols
722
722
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
723
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
723
724
 
724
725
  estimator = self._sklearn_object
725
726
 
726
- input_df = dataset[input_cols] # Select input columns with quoted column names.
727
- if hasattr(estimator, "feature_names_in_"):
728
- missing_features = []
729
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
730
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
731
- missing_features.append(f)
732
-
733
- if len(missing_features) > 0:
734
- raise ValueError(
735
- "The feature names should match with those that were passed during fit.\n"
736
- f"Features seen during fit call but not present in the input: {missing_features}\n"
737
- f"Features in the input dataframe : {input_cols}\n"
738
- )
739
- input_df.columns = getattr(estimator, "feature_names_in_")
740
- else:
741
- # Just rename the column names to unquoted identifiers.
742
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
727
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
728
+ missing_features = []
729
+ features_in_dataset = set(dataset.columns)
730
+ columns_to_select = []
731
+ for i, f in enumerate(features_required_by_estimator):
732
+ if (
733
+ i >= len(input_cols)
734
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
735
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
736
+ and quoted_input_cols[i] not in features_in_dataset)
737
+ ):
738
+ missing_features.append(f)
739
+ elif input_cols[i] in features_in_dataset:
740
+ columns_to_select.append(input_cols[i])
741
+ elif unquoted_input_cols[i] in features_in_dataset:
742
+ columns_to_select.append(unquoted_input_cols[i])
743
+ else:
744
+ columns_to_select.append(quoted_input_cols[i])
745
+
746
+ if len(missing_features) > 0:
747
+ raise ValueError(
748
+ "The feature names should match with those that were passed during fit.\n"
749
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
750
+ f"Features in the input dataframe : {input_cols}\n"
751
+ )
752
+ input_df = dataset[columns_to_select]
753
+ input_df.columns = features_required_by_estimator
743
754
 
744
755
  transformed_numpy_array = getattr(estimator, inference_method)(
745
756
  input_df
@@ -820,26 +820,37 @@ class XGBClassifier(BaseTransformer):
820
820
  # input cols need to match unquoted / quoted
821
821
  input_cols = self.input_cols
822
822
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
823
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
823
824
 
824
825
  estimator = self._sklearn_object
825
826
 
826
- input_df = dataset[input_cols] # Select input columns with quoted column names.
827
- if hasattr(estimator, "feature_names_in_"):
828
- missing_features = []
829
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
830
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
831
- missing_features.append(f)
832
-
833
- if len(missing_features) > 0:
834
- raise ValueError(
835
- "The feature names should match with those that were passed during fit.\n"
836
- f"Features seen during fit call but not present in the input: {missing_features}\n"
837
- f"Features in the input dataframe : {input_cols}\n"
838
- )
839
- input_df.columns = getattr(estimator, "feature_names_in_")
840
- else:
841
- # Just rename the column names to unquoted identifiers.
842
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
827
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
828
+ missing_features = []
829
+ features_in_dataset = set(dataset.columns)
830
+ columns_to_select = []
831
+ for i, f in enumerate(features_required_by_estimator):
832
+ if (
833
+ i >= len(input_cols)
834
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
835
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
836
+ and quoted_input_cols[i] not in features_in_dataset)
837
+ ):
838
+ missing_features.append(f)
839
+ elif input_cols[i] in features_in_dataset:
840
+ columns_to_select.append(input_cols[i])
841
+ elif unquoted_input_cols[i] in features_in_dataset:
842
+ columns_to_select.append(unquoted_input_cols[i])
843
+ else:
844
+ columns_to_select.append(quoted_input_cols[i])
845
+
846
+ if len(missing_features) > 0:
847
+ raise ValueError(
848
+ "The feature names should match with those that were passed during fit.\n"
849
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
850
+ f"Features in the input dataframe : {input_cols}\n"
851
+ )
852
+ input_df = dataset[columns_to_select]
853
+ input_df.columns = features_required_by_estimator
843
854
 
844
855
  transformed_numpy_array = getattr(estimator, inference_method)(
845
856
  input_df
@@ -819,26 +819,37 @@ class XGBRegressor(BaseTransformer):
819
819
  # input cols need to match unquoted / quoted
820
820
  input_cols = self.input_cols
821
821
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
822
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
822
823
 
823
824
  estimator = self._sklearn_object
824
825
 
825
- input_df = dataset[input_cols] # Select input columns with quoted column names.
826
- if hasattr(estimator, "feature_names_in_"):
827
- missing_features = []
828
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
829
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
830
- missing_features.append(f)
831
-
832
- if len(missing_features) > 0:
833
- raise ValueError(
834
- "The feature names should match with those that were passed during fit.\n"
835
- f"Features seen during fit call but not present in the input: {missing_features}\n"
836
- f"Features in the input dataframe : {input_cols}\n"
837
- )
838
- input_df.columns = getattr(estimator, "feature_names_in_")
839
- else:
840
- # Just rename the column names to unquoted identifiers.
841
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
826
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
827
+ missing_features = []
828
+ features_in_dataset = set(dataset.columns)
829
+ columns_to_select = []
830
+ for i, f in enumerate(features_required_by_estimator):
831
+ if (
832
+ i >= len(input_cols)
833
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
834
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
835
+ and quoted_input_cols[i] not in features_in_dataset)
836
+ ):
837
+ missing_features.append(f)
838
+ elif input_cols[i] in features_in_dataset:
839
+ columns_to_select.append(input_cols[i])
840
+ elif unquoted_input_cols[i] in features_in_dataset:
841
+ columns_to_select.append(unquoted_input_cols[i])
842
+ else:
843
+ columns_to_select.append(quoted_input_cols[i])
844
+
845
+ if len(missing_features) > 0:
846
+ raise ValueError(
847
+ "The feature names should match with those that were passed during fit.\n"
848
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
849
+ f"Features in the input dataframe : {input_cols}\n"
850
+ )
851
+ input_df = dataset[columns_to_select]
852
+ input_df.columns = features_required_by_estimator
842
853
 
843
854
  transformed_numpy_array = getattr(estimator, inference_method)(
844
855
  input_df
@@ -824,26 +824,37 @@ class XGBRFClassifier(BaseTransformer):
824
824
  # input cols need to match unquoted / quoted
825
825
  input_cols = self.input_cols
826
826
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
827
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
827
828
 
828
829
  estimator = self._sklearn_object
829
830
 
830
- input_df = dataset[input_cols] # Select input columns with quoted column names.
831
- if hasattr(estimator, "feature_names_in_"):
832
- missing_features = []
833
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
834
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
835
- missing_features.append(f)
836
-
837
- if len(missing_features) > 0:
838
- raise ValueError(
839
- "The feature names should match with those that were passed during fit.\n"
840
- f"Features seen during fit call but not present in the input: {missing_features}\n"
841
- f"Features in the input dataframe : {input_cols}\n"
842
- )
843
- input_df.columns = getattr(estimator, "feature_names_in_")
844
- else:
845
- # Just rename the column names to unquoted identifiers.
846
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
831
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
832
+ missing_features = []
833
+ features_in_dataset = set(dataset.columns)
834
+ columns_to_select = []
835
+ for i, f in enumerate(features_required_by_estimator):
836
+ if (
837
+ i >= len(input_cols)
838
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
839
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
840
+ and quoted_input_cols[i] not in features_in_dataset)
841
+ ):
842
+ missing_features.append(f)
843
+ elif input_cols[i] in features_in_dataset:
844
+ columns_to_select.append(input_cols[i])
845
+ elif unquoted_input_cols[i] in features_in_dataset:
846
+ columns_to_select.append(unquoted_input_cols[i])
847
+ else:
848
+ columns_to_select.append(quoted_input_cols[i])
849
+
850
+ if len(missing_features) > 0:
851
+ raise ValueError(
852
+ "The feature names should match with those that were passed during fit.\n"
853
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
854
+ f"Features in the input dataframe : {input_cols}\n"
855
+ )
856
+ input_df = dataset[columns_to_select]
857
+ input_df.columns = features_required_by_estimator
847
858
 
848
859
  transformed_numpy_array = getattr(estimator, inference_method)(
849
860
  input_df
@@ -824,26 +824,37 @@ class XGBRFRegressor(BaseTransformer):
824
824
  # input cols need to match unquoted / quoted
825
825
  input_cols = self.input_cols
826
826
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
827
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
827
828
 
828
829
  estimator = self._sklearn_object
829
830
 
830
- input_df = dataset[input_cols] # Select input columns with quoted column names.
831
- if hasattr(estimator, "feature_names_in_"):
832
- missing_features = []
833
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
834
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
835
- missing_features.append(f)
836
-
837
- if len(missing_features) > 0:
838
- raise ValueError(
839
- "The feature names should match with those that were passed during fit.\n"
840
- f"Features seen during fit call but not present in the input: {missing_features}\n"
841
- f"Features in the input dataframe : {input_cols}\n"
842
- )
843
- input_df.columns = getattr(estimator, "feature_names_in_")
844
- else:
845
- # Just rename the column names to unquoted identifiers.
846
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
831
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
832
+ missing_features = []
833
+ features_in_dataset = set(dataset.columns)
834
+ columns_to_select = []
835
+ for i, f in enumerate(features_required_by_estimator):
836
+ if (
837
+ i >= len(input_cols)
838
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
839
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
840
+ and quoted_input_cols[i] not in features_in_dataset)
841
+ ):
842
+ missing_features.append(f)
843
+ elif input_cols[i] in features_in_dataset:
844
+ columns_to_select.append(input_cols[i])
845
+ elif unquoted_input_cols[i] in features_in_dataset:
846
+ columns_to_select.append(unquoted_input_cols[i])
847
+ else:
848
+ columns_to_select.append(quoted_input_cols[i])
849
+
850
+ if len(missing_features) > 0:
851
+ raise ValueError(
852
+ "The feature names should match with those that were passed during fit.\n"
853
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
854
+ f"Features in the input dataframe : {input_cols}\n"
855
+ )
856
+ input_df = dataset[columns_to_select]
857
+ input_df.columns = features_required_by_estimator
847
858
 
848
859
  transformed_numpy_array = getattr(estimator, inference_method)(
849
860
  input_df