snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -715,26 +715,37 @@ class GaussianMixture(BaseTransformer):
715
715
  # input cols need to match unquoted / quoted
716
716
  input_cols = self.input_cols
717
717
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
718
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
718
719
 
719
720
  estimator = self._sklearn_object
720
721
 
721
- input_df = dataset[input_cols] # Select input columns with quoted column names.
722
- if hasattr(estimator, "feature_names_in_"):
723
- missing_features = []
724
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
725
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
726
- missing_features.append(f)
727
-
728
- if len(missing_features) > 0:
729
- raise ValueError(
730
- "The feature names should match with those that were passed during fit.\n"
731
- f"Features seen during fit call but not present in the input: {missing_features}\n"
732
- f"Features in the input dataframe : {input_cols}\n"
733
- )
734
- input_df.columns = getattr(estimator, "feature_names_in_")
735
- else:
736
- # Just rename the column names to unquoted identifiers.
737
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
722
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
723
+ missing_features = []
724
+ features_in_dataset = set(dataset.columns)
725
+ columns_to_select = []
726
+ for i, f in enumerate(features_required_by_estimator):
727
+ if (
728
+ i >= len(input_cols)
729
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
730
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
731
+ and quoted_input_cols[i] not in features_in_dataset)
732
+ ):
733
+ missing_features.append(f)
734
+ elif input_cols[i] in features_in_dataset:
735
+ columns_to_select.append(input_cols[i])
736
+ elif unquoted_input_cols[i] in features_in_dataset:
737
+ columns_to_select.append(unquoted_input_cols[i])
738
+ else:
739
+ columns_to_select.append(quoted_input_cols[i])
740
+
741
+ if len(missing_features) > 0:
742
+ raise ValueError(
743
+ "The feature names should match with those that were passed during fit.\n"
744
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
745
+ f"Features in the input dataframe : {input_cols}\n"
746
+ )
747
+ input_df = dataset[columns_to_select]
748
+ input_df.columns = features_required_by_estimator
738
749
 
739
750
  transformed_numpy_array = getattr(estimator, inference_method)(
740
751
  input_df
@@ -751,26 +751,37 @@ class GridSearchCV(BaseTransformer):
751
751
  # input cols need to match unquoted / quoted
752
752
  input_cols = self.input_cols
753
753
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
754
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
754
755
 
755
756
  estimator = self._sklearn_object
756
757
 
757
- input_df = dataset[input_cols] # Select input columns with quoted column names.
758
- if hasattr(estimator, "feature_names_in_"):
759
- missing_features = []
760
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
761
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
762
- missing_features.append(f)
763
-
764
- if len(missing_features) > 0:
765
- raise ValueError(
766
- "The feature names should match with those that were passed during fit.\n"
767
- f"Features seen during fit call but not present in the input: {missing_features}\n"
768
- f"Features in the input dataframe : {input_cols}\n"
769
- )
770
- input_df.columns = getattr(estimator, "feature_names_in_")
771
- else:
772
- # Just rename the column names to unquoted identifiers.
773
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
758
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
759
+ missing_features = []
760
+ features_in_dataset = set(dataset.columns)
761
+ columns_to_select = []
762
+ for i, f in enumerate(features_required_by_estimator):
763
+ if (
764
+ i >= len(input_cols)
765
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
766
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
767
+ and quoted_input_cols[i] not in features_in_dataset)
768
+ ):
769
+ missing_features.append(f)
770
+ elif input_cols[i] in features_in_dataset:
771
+ columns_to_select.append(input_cols[i])
772
+ elif unquoted_input_cols[i] in features_in_dataset:
773
+ columns_to_select.append(unquoted_input_cols[i])
774
+ else:
775
+ columns_to_select.append(quoted_input_cols[i])
776
+
777
+ if len(missing_features) > 0:
778
+ raise ValueError(
779
+ "The feature names should match with those that were passed during fit.\n"
780
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
781
+ f"Features in the input dataframe : {input_cols}\n"
782
+ )
783
+ input_df = dataset[columns_to_select]
784
+ input_df.columns = features_required_by_estimator
774
785
 
775
786
  transformed_numpy_array = getattr(estimator, inference_method)(
776
787
  input_df
@@ -766,26 +766,37 @@ class RandomizedSearchCV(BaseTransformer):
766
766
  # input cols need to match unquoted / quoted
767
767
  input_cols = self.input_cols
768
768
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
769
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
769
770
 
770
771
  estimator = self._sklearn_object
771
772
 
772
- input_df = dataset[input_cols] # Select input columns with quoted column names.
773
- if hasattr(estimator, "feature_names_in_"):
774
- missing_features = []
775
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
776
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
777
- missing_features.append(f)
778
-
779
- if len(missing_features) > 0:
780
- raise ValueError(
781
- "The feature names should match with those that were passed during fit.\n"
782
- f"Features seen during fit call but not present in the input: {missing_features}\n"
783
- f"Features in the input dataframe : {input_cols}\n"
784
- )
785
- input_df.columns = getattr(estimator, "feature_names_in_")
786
- else:
787
- # Just rename the column names to unquoted identifiers.
788
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
773
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
774
+ missing_features = []
775
+ features_in_dataset = set(dataset.columns)
776
+ columns_to_select = []
777
+ for i, f in enumerate(features_required_by_estimator):
778
+ if (
779
+ i >= len(input_cols)
780
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
781
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
782
+ and quoted_input_cols[i] not in features_in_dataset)
783
+ ):
784
+ missing_features.append(f)
785
+ elif input_cols[i] in features_in_dataset:
786
+ columns_to_select.append(input_cols[i])
787
+ elif unquoted_input_cols[i] in features_in_dataset:
788
+ columns_to_select.append(unquoted_input_cols[i])
789
+ else:
790
+ columns_to_select.append(quoted_input_cols[i])
791
+
792
+ if len(missing_features) > 0:
793
+ raise ValueError(
794
+ "The feature names should match with those that were passed during fit.\n"
795
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
796
+ f"Features in the input dataframe : {input_cols}\n"
797
+ )
798
+ input_df = dataset[columns_to_select]
799
+ input_df.columns = features_required_by_estimator
789
800
 
790
801
  transformed_numpy_array = getattr(estimator, inference_method)(
791
802
  input_df
@@ -625,26 +625,37 @@ class OneVsOneClassifier(BaseTransformer):
625
625
  # input cols need to match unquoted / quoted
626
626
  input_cols = self.input_cols
627
627
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
628
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
628
629
 
629
630
  estimator = self._sklearn_object
630
631
 
631
- input_df = dataset[input_cols] # Select input columns with quoted column names.
632
- if hasattr(estimator, "feature_names_in_"):
633
- missing_features = []
634
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
635
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
636
- missing_features.append(f)
637
-
638
- if len(missing_features) > 0:
639
- raise ValueError(
640
- "The feature names should match with those that were passed during fit.\n"
641
- f"Features seen during fit call but not present in the input: {missing_features}\n"
642
- f"Features in the input dataframe : {input_cols}\n"
643
- )
644
- input_df.columns = getattr(estimator, "feature_names_in_")
645
- else:
646
- # Just rename the column names to unquoted identifiers.
647
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
632
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
633
+ missing_features = []
634
+ features_in_dataset = set(dataset.columns)
635
+ columns_to_select = []
636
+ for i, f in enumerate(features_required_by_estimator):
637
+ if (
638
+ i >= len(input_cols)
639
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
640
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
641
+ and quoted_input_cols[i] not in features_in_dataset)
642
+ ):
643
+ missing_features.append(f)
644
+ elif input_cols[i] in features_in_dataset:
645
+ columns_to_select.append(input_cols[i])
646
+ elif unquoted_input_cols[i] in features_in_dataset:
647
+ columns_to_select.append(unquoted_input_cols[i])
648
+ else:
649
+ columns_to_select.append(quoted_input_cols[i])
650
+
651
+ if len(missing_features) > 0:
652
+ raise ValueError(
653
+ "The feature names should match with those that were passed during fit.\n"
654
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
655
+ f"Features in the input dataframe : {input_cols}\n"
656
+ )
657
+ input_df = dataset[columns_to_select]
658
+ input_df.columns = features_required_by_estimator
648
659
 
649
660
  transformed_numpy_array = getattr(estimator, inference_method)(
650
661
  input_df
@@ -634,26 +634,37 @@ class OneVsRestClassifier(BaseTransformer):
634
634
  # input cols need to match unquoted / quoted
635
635
  input_cols = self.input_cols
636
636
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
637
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
637
638
 
638
639
  estimator = self._sklearn_object
639
640
 
640
- input_df = dataset[input_cols] # Select input columns with quoted column names.
641
- if hasattr(estimator, "feature_names_in_"):
642
- missing_features = []
643
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
644
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
645
- missing_features.append(f)
646
-
647
- if len(missing_features) > 0:
648
- raise ValueError(
649
- "The feature names should match with those that were passed during fit.\n"
650
- f"Features seen during fit call but not present in the input: {missing_features}\n"
651
- f"Features in the input dataframe : {input_cols}\n"
652
- )
653
- input_df.columns = getattr(estimator, "feature_names_in_")
654
- else:
655
- # Just rename the column names to unquoted identifiers.
656
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
641
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
642
+ missing_features = []
643
+ features_in_dataset = set(dataset.columns)
644
+ columns_to_select = []
645
+ for i, f in enumerate(features_required_by_estimator):
646
+ if (
647
+ i >= len(input_cols)
648
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
649
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
650
+ and quoted_input_cols[i] not in features_in_dataset)
651
+ ):
652
+ missing_features.append(f)
653
+ elif input_cols[i] in features_in_dataset:
654
+ columns_to_select.append(input_cols[i])
655
+ elif unquoted_input_cols[i] in features_in_dataset:
656
+ columns_to_select.append(unquoted_input_cols[i])
657
+ else:
658
+ columns_to_select.append(quoted_input_cols[i])
659
+
660
+ if len(missing_features) > 0:
661
+ raise ValueError(
662
+ "The feature names should match with those that were passed during fit.\n"
663
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
664
+ f"Features in the input dataframe : {input_cols}\n"
665
+ )
666
+ input_df = dataset[columns_to_select]
667
+ input_df.columns = features_required_by_estimator
657
668
 
658
669
  transformed_numpy_array = getattr(estimator, inference_method)(
659
670
  input_df
@@ -637,26 +637,37 @@ class OutputCodeClassifier(BaseTransformer):
637
637
  # input cols need to match unquoted / quoted
638
638
  input_cols = self.input_cols
639
639
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
640
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
640
641
 
641
642
  estimator = self._sklearn_object
642
643
 
643
- input_df = dataset[input_cols] # Select input columns with quoted column names.
644
- if hasattr(estimator, "feature_names_in_"):
645
- missing_features = []
646
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
647
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
648
- missing_features.append(f)
649
-
650
- if len(missing_features) > 0:
651
- raise ValueError(
652
- "The feature names should match with those that were passed during fit.\n"
653
- f"Features seen during fit call but not present in the input: {missing_features}\n"
654
- f"Features in the input dataframe : {input_cols}\n"
655
- )
656
- input_df.columns = getattr(estimator, "feature_names_in_")
657
- else:
658
- # Just rename the column names to unquoted identifiers.
659
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
644
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
645
+ missing_features = []
646
+ features_in_dataset = set(dataset.columns)
647
+ columns_to_select = []
648
+ for i, f in enumerate(features_required_by_estimator):
649
+ if (
650
+ i >= len(input_cols)
651
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
652
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
653
+ and quoted_input_cols[i] not in features_in_dataset)
654
+ ):
655
+ missing_features.append(f)
656
+ elif input_cols[i] in features_in_dataset:
657
+ columns_to_select.append(input_cols[i])
658
+ elif unquoted_input_cols[i] in features_in_dataset:
659
+ columns_to_select.append(unquoted_input_cols[i])
660
+ else:
661
+ columns_to_select.append(quoted_input_cols[i])
662
+
663
+ if len(missing_features) > 0:
664
+ raise ValueError(
665
+ "The feature names should match with those that were passed during fit.\n"
666
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
667
+ f"Features in the input dataframe : {input_cols}\n"
668
+ )
669
+ input_df = dataset[columns_to_select]
670
+ input_df.columns = features_required_by_estimator
660
671
 
661
672
  transformed_numpy_array = getattr(estimator, inference_method)(
662
673
  input_df
@@ -637,26 +637,37 @@ class BernoulliNB(BaseTransformer):
637
637
  # input cols need to match unquoted / quoted
638
638
  input_cols = self.input_cols
639
639
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
640
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
640
641
 
641
642
  estimator = self._sklearn_object
642
643
 
643
- input_df = dataset[input_cols] # Select input columns with quoted column names.
644
- if hasattr(estimator, "feature_names_in_"):
645
- missing_features = []
646
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
647
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
648
- missing_features.append(f)
649
-
650
- if len(missing_features) > 0:
651
- raise ValueError(
652
- "The feature names should match with those that were passed during fit.\n"
653
- f"Features seen during fit call but not present in the input: {missing_features}\n"
654
- f"Features in the input dataframe : {input_cols}\n"
655
- )
656
- input_df.columns = getattr(estimator, "feature_names_in_")
657
- else:
658
- # Just rename the column names to unquoted identifiers.
659
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
644
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
645
+ missing_features = []
646
+ features_in_dataset = set(dataset.columns)
647
+ columns_to_select = []
648
+ for i, f in enumerate(features_required_by_estimator):
649
+ if (
650
+ i >= len(input_cols)
651
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
652
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
653
+ and quoted_input_cols[i] not in features_in_dataset)
654
+ ):
655
+ missing_features.append(f)
656
+ elif input_cols[i] in features_in_dataset:
657
+ columns_to_select.append(input_cols[i])
658
+ elif unquoted_input_cols[i] in features_in_dataset:
659
+ columns_to_select.append(unquoted_input_cols[i])
660
+ else:
661
+ columns_to_select.append(quoted_input_cols[i])
662
+
663
+ if len(missing_features) > 0:
664
+ raise ValueError(
665
+ "The feature names should match with those that were passed during fit.\n"
666
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
667
+ f"Features in the input dataframe : {input_cols}\n"
668
+ )
669
+ input_df = dataset[columns_to_select]
670
+ input_df.columns = features_required_by_estimator
660
671
 
661
672
  transformed_numpy_array = getattr(estimator, inference_method)(
662
673
  input_df
@@ -643,26 +643,37 @@ class CategoricalNB(BaseTransformer):
643
643
  # input cols need to match unquoted / quoted
644
644
  input_cols = self.input_cols
645
645
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
646
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
646
647
 
647
648
  estimator = self._sklearn_object
648
649
 
649
- input_df = dataset[input_cols] # Select input columns with quoted column names.
650
- if hasattr(estimator, "feature_names_in_"):
651
- missing_features = []
652
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
653
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
654
- missing_features.append(f)
655
-
656
- if len(missing_features) > 0:
657
- raise ValueError(
658
- "The feature names should match with those that were passed during fit.\n"
659
- f"Features seen during fit call but not present in the input: {missing_features}\n"
660
- f"Features in the input dataframe : {input_cols}\n"
661
- )
662
- input_df.columns = getattr(estimator, "feature_names_in_")
663
- else:
664
- # Just rename the column names to unquoted identifiers.
665
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
650
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
651
+ missing_features = []
652
+ features_in_dataset = set(dataset.columns)
653
+ columns_to_select = []
654
+ for i, f in enumerate(features_required_by_estimator):
655
+ if (
656
+ i >= len(input_cols)
657
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
658
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
659
+ and quoted_input_cols[i] not in features_in_dataset)
660
+ ):
661
+ missing_features.append(f)
662
+ elif input_cols[i] in features_in_dataset:
663
+ columns_to_select.append(input_cols[i])
664
+ elif unquoted_input_cols[i] in features_in_dataset:
665
+ columns_to_select.append(unquoted_input_cols[i])
666
+ else:
667
+ columns_to_select.append(quoted_input_cols[i])
668
+
669
+ if len(missing_features) > 0:
670
+ raise ValueError(
671
+ "The feature names should match with those that were passed during fit.\n"
672
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
673
+ f"Features in the input dataframe : {input_cols}\n"
674
+ )
675
+ input_df = dataset[columns_to_select]
676
+ input_df.columns = features_required_by_estimator
666
677
 
667
678
  transformed_numpy_array = getattr(estimator, inference_method)(
668
679
  input_df
@@ -637,26 +637,37 @@ class ComplementNB(BaseTransformer):
637
637
  # input cols need to match unquoted / quoted
638
638
  input_cols = self.input_cols
639
639
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
640
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
640
641
 
641
642
  estimator = self._sklearn_object
642
643
 
643
- input_df = dataset[input_cols] # Select input columns with quoted column names.
644
- if hasattr(estimator, "feature_names_in_"):
645
- missing_features = []
646
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
647
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
648
- missing_features.append(f)
649
-
650
- if len(missing_features) > 0:
651
- raise ValueError(
652
- "The feature names should match with those that were passed during fit.\n"
653
- f"Features seen during fit call but not present in the input: {missing_features}\n"
654
- f"Features in the input dataframe : {input_cols}\n"
655
- )
656
- input_df.columns = getattr(estimator, "feature_names_in_")
657
- else:
658
- # Just rename the column names to unquoted identifiers.
659
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
644
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
645
+ missing_features = []
646
+ features_in_dataset = set(dataset.columns)
647
+ columns_to_select = []
648
+ for i, f in enumerate(features_required_by_estimator):
649
+ if (
650
+ i >= len(input_cols)
651
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
652
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
653
+ and quoted_input_cols[i] not in features_in_dataset)
654
+ ):
655
+ missing_features.append(f)
656
+ elif input_cols[i] in features_in_dataset:
657
+ columns_to_select.append(input_cols[i])
658
+ elif unquoted_input_cols[i] in features_in_dataset:
659
+ columns_to_select.append(unquoted_input_cols[i])
660
+ else:
661
+ columns_to_select.append(quoted_input_cols[i])
662
+
663
+ if len(missing_features) > 0:
664
+ raise ValueError(
665
+ "The feature names should match with those that were passed during fit.\n"
666
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
667
+ f"Features in the input dataframe : {input_cols}\n"
668
+ )
669
+ input_df = dataset[columns_to_select]
670
+ input_df.columns = features_required_by_estimator
660
671
 
661
672
  transformed_numpy_array = getattr(estimator, inference_method)(
662
673
  input_df
@@ -618,26 +618,37 @@ class GaussianNB(BaseTransformer):
618
618
  # input cols need to match unquoted / quoted
619
619
  input_cols = self.input_cols
620
620
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
621
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
621
622
 
622
623
  estimator = self._sklearn_object
623
624
 
624
- input_df = dataset[input_cols] # Select input columns with quoted column names.
625
- if hasattr(estimator, "feature_names_in_"):
626
- missing_features = []
627
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
628
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
629
- missing_features.append(f)
630
-
631
- if len(missing_features) > 0:
632
- raise ValueError(
633
- "The feature names should match with those that were passed during fit.\n"
634
- f"Features seen during fit call but not present in the input: {missing_features}\n"
635
- f"Features in the input dataframe : {input_cols}\n"
636
- )
637
- input_df.columns = getattr(estimator, "feature_names_in_")
638
- else:
639
- # Just rename the column names to unquoted identifiers.
640
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
625
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
626
+ missing_features = []
627
+ features_in_dataset = set(dataset.columns)
628
+ columns_to_select = []
629
+ for i, f in enumerate(features_required_by_estimator):
630
+ if (
631
+ i >= len(input_cols)
632
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
633
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
634
+ and quoted_input_cols[i] not in features_in_dataset)
635
+ ):
636
+ missing_features.append(f)
637
+ elif input_cols[i] in features_in_dataset:
638
+ columns_to_select.append(input_cols[i])
639
+ elif unquoted_input_cols[i] in features_in_dataset:
640
+ columns_to_select.append(unquoted_input_cols[i])
641
+ else:
642
+ columns_to_select.append(quoted_input_cols[i])
643
+
644
+ if len(missing_features) > 0:
645
+ raise ValueError(
646
+ "The feature names should match with those that were passed during fit.\n"
647
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
648
+ f"Features in the input dataframe : {input_cols}\n"
649
+ )
650
+ input_df = dataset[columns_to_select]
651
+ input_df.columns = features_required_by_estimator
641
652
 
642
653
  transformed_numpy_array = getattr(estimator, inference_method)(
643
654
  input_df
@@ -631,26 +631,37 @@ class MultinomialNB(BaseTransformer):
631
631
  # input cols need to match unquoted / quoted
632
632
  input_cols = self.input_cols
633
633
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
634
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
634
635
 
635
636
  estimator = self._sklearn_object
636
637
 
637
- input_df = dataset[input_cols] # Select input columns with quoted column names.
638
- if hasattr(estimator, "feature_names_in_"):
639
- missing_features = []
640
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
641
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
642
- missing_features.append(f)
643
-
644
- if len(missing_features) > 0:
645
- raise ValueError(
646
- "The feature names should match with those that were passed during fit.\n"
647
- f"Features seen during fit call but not present in the input: {missing_features}\n"
648
- f"Features in the input dataframe : {input_cols}\n"
649
- )
650
- input_df.columns = getattr(estimator, "feature_names_in_")
651
- else:
652
- # Just rename the column names to unquoted identifiers.
653
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
638
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
639
+ missing_features = []
640
+ features_in_dataset = set(dataset.columns)
641
+ columns_to_select = []
642
+ for i, f in enumerate(features_required_by_estimator):
643
+ if (
644
+ i >= len(input_cols)
645
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
646
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
647
+ and quoted_input_cols[i] not in features_in_dataset)
648
+ ):
649
+ missing_features.append(f)
650
+ elif input_cols[i] in features_in_dataset:
651
+ columns_to_select.append(input_cols[i])
652
+ elif unquoted_input_cols[i] in features_in_dataset:
653
+ columns_to_select.append(unquoted_input_cols[i])
654
+ else:
655
+ columns_to_select.append(quoted_input_cols[i])
656
+
657
+ if len(missing_features) > 0:
658
+ raise ValueError(
659
+ "The feature names should match with those that were passed during fit.\n"
660
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
661
+ f"Features in the input dataframe : {input_cols}\n"
662
+ )
663
+ input_df = dataset[columns_to_select]
664
+ input_df.columns = features_required_by_estimator
654
665
 
655
666
  transformed_numpy_array = getattr(estimator, inference_method)(
656
667
  input_df