snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -672,26 +672,37 @@ class LinearDiscriminantAnalysis(BaseTransformer):
672
672
  # input cols need to match unquoted / quoted
673
673
  input_cols = self.input_cols
674
674
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
675
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
675
676
 
676
677
  estimator = self._sklearn_object
677
678
 
678
- input_df = dataset[input_cols] # Select input columns with quoted column names.
679
- if hasattr(estimator, "feature_names_in_"):
680
- missing_features = []
681
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
682
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
683
- missing_features.append(f)
684
-
685
- if len(missing_features) > 0:
686
- raise ValueError(
687
- "The feature names should match with those that were passed during fit.\n"
688
- f"Features seen during fit call but not present in the input: {missing_features}\n"
689
- f"Features in the input dataframe : {input_cols}\n"
690
- )
691
- input_df.columns = getattr(estimator, "feature_names_in_")
692
- else:
693
- # Just rename the column names to unquoted identifiers.
694
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
679
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
680
+ missing_features = []
681
+ features_in_dataset = set(dataset.columns)
682
+ columns_to_select = []
683
+ for i, f in enumerate(features_required_by_estimator):
684
+ if (
685
+ i >= len(input_cols)
686
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
687
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
688
+ and quoted_input_cols[i] not in features_in_dataset)
689
+ ):
690
+ missing_features.append(f)
691
+ elif input_cols[i] in features_in_dataset:
692
+ columns_to_select.append(input_cols[i])
693
+ elif unquoted_input_cols[i] in features_in_dataset:
694
+ columns_to_select.append(unquoted_input_cols[i])
695
+ else:
696
+ columns_to_select.append(quoted_input_cols[i])
697
+
698
+ if len(missing_features) > 0:
699
+ raise ValueError(
700
+ "The feature names should match with those that were passed during fit.\n"
701
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
702
+ f"Features in the input dataframe : {input_cols}\n"
703
+ )
704
+ input_df = dataset[columns_to_select]
705
+ input_df.columns = features_required_by_estimator
695
706
 
696
707
  transformed_numpy_array = getattr(estimator, inference_method)(
697
708
  input_df
@@ -634,26 +634,37 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
634
634
  # input cols need to match unquoted / quoted
635
635
  input_cols = self.input_cols
636
636
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
637
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
637
638
 
638
639
  estimator = self._sklearn_object
639
640
 
640
- input_df = dataset[input_cols] # Select input columns with quoted column names.
641
- if hasattr(estimator, "feature_names_in_"):
642
- missing_features = []
643
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
644
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
645
- missing_features.append(f)
646
-
647
- if len(missing_features) > 0:
648
- raise ValueError(
649
- "The feature names should match with those that were passed during fit.\n"
650
- f"Features seen during fit call but not present in the input: {missing_features}\n"
651
- f"Features in the input dataframe : {input_cols}\n"
652
- )
653
- input_df.columns = getattr(estimator, "feature_names_in_")
654
- else:
655
- # Just rename the column names to unquoted identifiers.
656
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
641
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
642
+ missing_features = []
643
+ features_in_dataset = set(dataset.columns)
644
+ columns_to_select = []
645
+ for i, f in enumerate(features_required_by_estimator):
646
+ if (
647
+ i >= len(input_cols)
648
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
649
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
650
+ and quoted_input_cols[i] not in features_in_dataset)
651
+ ):
652
+ missing_features.append(f)
653
+ elif input_cols[i] in features_in_dataset:
654
+ columns_to_select.append(input_cols[i])
655
+ elif unquoted_input_cols[i] in features_in_dataset:
656
+ columns_to_select.append(unquoted_input_cols[i])
657
+ else:
658
+ columns_to_select.append(quoted_input_cols[i])
659
+
660
+ if len(missing_features) > 0:
661
+ raise ValueError(
662
+ "The feature names should match with those that were passed during fit.\n"
663
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
664
+ f"Features in the input dataframe : {input_cols}\n"
665
+ )
666
+ input_df = dataset[columns_to_select]
667
+ input_df.columns = features_required_by_estimator
657
668
 
658
669
  transformed_numpy_array = getattr(estimator, inference_method)(
659
670
  input_df
@@ -659,26 +659,37 @@ class AdaBoostClassifier(BaseTransformer):
659
659
  # input cols need to match unquoted / quoted
660
660
  input_cols = self.input_cols
661
661
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
662
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
662
663
 
663
664
  estimator = self._sklearn_object
664
665
 
665
- input_df = dataset[input_cols] # Select input columns with quoted column names.
666
- if hasattr(estimator, "feature_names_in_"):
667
- missing_features = []
668
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
669
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
670
- missing_features.append(f)
671
-
672
- if len(missing_features) > 0:
673
- raise ValueError(
674
- "The feature names should match with those that were passed during fit.\n"
675
- f"Features seen during fit call but not present in the input: {missing_features}\n"
676
- f"Features in the input dataframe : {input_cols}\n"
677
- )
678
- input_df.columns = getattr(estimator, "feature_names_in_")
679
- else:
680
- # Just rename the column names to unquoted identifiers.
681
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
666
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
667
+ missing_features = []
668
+ features_in_dataset = set(dataset.columns)
669
+ columns_to_select = []
670
+ for i, f in enumerate(features_required_by_estimator):
671
+ if (
672
+ i >= len(input_cols)
673
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
674
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
675
+ and quoted_input_cols[i] not in features_in_dataset)
676
+ ):
677
+ missing_features.append(f)
678
+ elif input_cols[i] in features_in_dataset:
679
+ columns_to_select.append(input_cols[i])
680
+ elif unquoted_input_cols[i] in features_in_dataset:
681
+ columns_to_select.append(unquoted_input_cols[i])
682
+ else:
683
+ columns_to_select.append(quoted_input_cols[i])
684
+
685
+ if len(missing_features) > 0:
686
+ raise ValueError(
687
+ "The feature names should match with those that were passed during fit.\n"
688
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
689
+ f"Features in the input dataframe : {input_cols}\n"
690
+ )
691
+ input_df = dataset[columns_to_select]
692
+ input_df.columns = features_required_by_estimator
682
693
 
683
694
  transformed_numpy_array = getattr(estimator, inference_method)(
684
695
  input_df
@@ -656,26 +656,37 @@ class AdaBoostRegressor(BaseTransformer):
656
656
  # input cols need to match unquoted / quoted
657
657
  input_cols = self.input_cols
658
658
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
659
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
659
660
 
660
661
  estimator = self._sklearn_object
661
662
 
662
- input_df = dataset[input_cols] # Select input columns with quoted column names.
663
- if hasattr(estimator, "feature_names_in_"):
664
- missing_features = []
665
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
666
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
667
- missing_features.append(f)
668
-
669
- if len(missing_features) > 0:
670
- raise ValueError(
671
- "The feature names should match with those that were passed during fit.\n"
672
- f"Features seen during fit call but not present in the input: {missing_features}\n"
673
- f"Features in the input dataframe : {input_cols}\n"
674
- )
675
- input_df.columns = getattr(estimator, "feature_names_in_")
676
- else:
677
- # Just rename the column names to unquoted identifiers.
678
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
663
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
664
+ missing_features = []
665
+ features_in_dataset = set(dataset.columns)
666
+ columns_to_select = []
667
+ for i, f in enumerate(features_required_by_estimator):
668
+ if (
669
+ i >= len(input_cols)
670
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
671
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
672
+ and quoted_input_cols[i] not in features_in_dataset)
673
+ ):
674
+ missing_features.append(f)
675
+ elif input_cols[i] in features_in_dataset:
676
+ columns_to_select.append(input_cols[i])
677
+ elif unquoted_input_cols[i] in features_in_dataset:
678
+ columns_to_select.append(unquoted_input_cols[i])
679
+ else:
680
+ columns_to_select.append(quoted_input_cols[i])
681
+
682
+ if len(missing_features) > 0:
683
+ raise ValueError(
684
+ "The feature names should match with those that were passed during fit.\n"
685
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
686
+ f"Features in the input dataframe : {input_cols}\n"
687
+ )
688
+ input_df = dataset[columns_to_select]
689
+ input_df.columns = features_required_by_estimator
679
690
 
680
691
  transformed_numpy_array = getattr(estimator, inference_method)(
681
692
  input_df
@@ -691,26 +691,37 @@ class BaggingClassifier(BaseTransformer):
691
691
  # input cols need to match unquoted / quoted
692
692
  input_cols = self.input_cols
693
693
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
694
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
694
695
 
695
696
  estimator = self._sklearn_object
696
697
 
697
- input_df = dataset[input_cols] # Select input columns with quoted column names.
698
- if hasattr(estimator, "feature_names_in_"):
699
- missing_features = []
700
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
701
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
702
- missing_features.append(f)
703
-
704
- if len(missing_features) > 0:
705
- raise ValueError(
706
- "The feature names should match with those that were passed during fit.\n"
707
- f"Features seen during fit call but not present in the input: {missing_features}\n"
708
- f"Features in the input dataframe : {input_cols}\n"
709
- )
710
- input_df.columns = getattr(estimator, "feature_names_in_")
711
- else:
712
- # Just rename the column names to unquoted identifiers.
713
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
698
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
699
+ missing_features = []
700
+ features_in_dataset = set(dataset.columns)
701
+ columns_to_select = []
702
+ for i, f in enumerate(features_required_by_estimator):
703
+ if (
704
+ i >= len(input_cols)
705
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
706
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
707
+ and quoted_input_cols[i] not in features_in_dataset)
708
+ ):
709
+ missing_features.append(f)
710
+ elif input_cols[i] in features_in_dataset:
711
+ columns_to_select.append(input_cols[i])
712
+ elif unquoted_input_cols[i] in features_in_dataset:
713
+ columns_to_select.append(unquoted_input_cols[i])
714
+ else:
715
+ columns_to_select.append(quoted_input_cols[i])
716
+
717
+ if len(missing_features) > 0:
718
+ raise ValueError(
719
+ "The feature names should match with those that were passed during fit.\n"
720
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
721
+ f"Features in the input dataframe : {input_cols}\n"
722
+ )
723
+ input_df = dataset[columns_to_select]
724
+ input_df.columns = features_required_by_estimator
714
725
 
715
726
  transformed_numpy_array = getattr(estimator, inference_method)(
716
727
  input_df
@@ -691,26 +691,37 @@ class BaggingRegressor(BaseTransformer):
691
691
  # input cols need to match unquoted / quoted
692
692
  input_cols = self.input_cols
693
693
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
694
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
694
695
 
695
696
  estimator = self._sklearn_object
696
697
 
697
- input_df = dataset[input_cols] # Select input columns with quoted column names.
698
- if hasattr(estimator, "feature_names_in_"):
699
- missing_features = []
700
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
701
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
702
- missing_features.append(f)
703
-
704
- if len(missing_features) > 0:
705
- raise ValueError(
706
- "The feature names should match with those that were passed during fit.\n"
707
- f"Features seen during fit call but not present in the input: {missing_features}\n"
708
- f"Features in the input dataframe : {input_cols}\n"
709
- )
710
- input_df.columns = getattr(estimator, "feature_names_in_")
711
- else:
712
- # Just rename the column names to unquoted identifiers.
713
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
698
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
699
+ missing_features = []
700
+ features_in_dataset = set(dataset.columns)
701
+ columns_to_select = []
702
+ for i, f in enumerate(features_required_by_estimator):
703
+ if (
704
+ i >= len(input_cols)
705
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
706
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
707
+ and quoted_input_cols[i] not in features_in_dataset)
708
+ ):
709
+ missing_features.append(f)
710
+ elif input_cols[i] in features_in_dataset:
711
+ columns_to_select.append(input_cols[i])
712
+ elif unquoted_input_cols[i] in features_in_dataset:
713
+ columns_to_select.append(unquoted_input_cols[i])
714
+ else:
715
+ columns_to_select.append(quoted_input_cols[i])
716
+
717
+ if len(missing_features) > 0:
718
+ raise ValueError(
719
+ "The feature names should match with those that were passed during fit.\n"
720
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
721
+ f"Features in the input dataframe : {input_cols}\n"
722
+ )
723
+ input_df = dataset[columns_to_select]
724
+ input_df.columns = features_required_by_estimator
714
725
 
715
726
  transformed_numpy_array = getattr(estimator, inference_method)(
716
727
  input_df
@@ -793,26 +793,37 @@ class ExtraTreesClassifier(BaseTransformer):
793
793
  # input cols need to match unquoted / quoted
794
794
  input_cols = self.input_cols
795
795
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
796
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
796
797
 
797
798
  estimator = self._sklearn_object
798
799
 
799
- input_df = dataset[input_cols] # Select input columns with quoted column names.
800
- if hasattr(estimator, "feature_names_in_"):
801
- missing_features = []
802
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
803
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
804
- missing_features.append(f)
805
-
806
- if len(missing_features) > 0:
807
- raise ValueError(
808
- "The feature names should match with those that were passed during fit.\n"
809
- f"Features seen during fit call but not present in the input: {missing_features}\n"
810
- f"Features in the input dataframe : {input_cols}\n"
811
- )
812
- input_df.columns = getattr(estimator, "feature_names_in_")
813
- else:
814
- # Just rename the column names to unquoted identifiers.
815
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
800
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
801
+ missing_features = []
802
+ features_in_dataset = set(dataset.columns)
803
+ columns_to_select = []
804
+ for i, f in enumerate(features_required_by_estimator):
805
+ if (
806
+ i >= len(input_cols)
807
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
808
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
809
+ and quoted_input_cols[i] not in features_in_dataset)
810
+ ):
811
+ missing_features.append(f)
812
+ elif input_cols[i] in features_in_dataset:
813
+ columns_to_select.append(input_cols[i])
814
+ elif unquoted_input_cols[i] in features_in_dataset:
815
+ columns_to_select.append(unquoted_input_cols[i])
816
+ else:
817
+ columns_to_select.append(quoted_input_cols[i])
818
+
819
+ if len(missing_features) > 0:
820
+ raise ValueError(
821
+ "The feature names should match with those that were passed during fit.\n"
822
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
823
+ f"Features in the input dataframe : {input_cols}\n"
824
+ )
825
+ input_df = dataset[columns_to_select]
826
+ input_df.columns = features_required_by_estimator
816
827
 
817
828
  transformed_numpy_array = getattr(estimator, inference_method)(
818
829
  input_df
@@ -772,26 +772,37 @@ class ExtraTreesRegressor(BaseTransformer):
772
772
  # input cols need to match unquoted / quoted
773
773
  input_cols = self.input_cols
774
774
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
775
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
775
776
 
776
777
  estimator = self._sklearn_object
777
778
 
778
- input_df = dataset[input_cols] # Select input columns with quoted column names.
779
- if hasattr(estimator, "feature_names_in_"):
780
- missing_features = []
781
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
782
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
783
- missing_features.append(f)
784
-
785
- if len(missing_features) > 0:
786
- raise ValueError(
787
- "The feature names should match with those that were passed during fit.\n"
788
- f"Features seen during fit call but not present in the input: {missing_features}\n"
789
- f"Features in the input dataframe : {input_cols}\n"
790
- )
791
- input_df.columns = getattr(estimator, "feature_names_in_")
792
- else:
793
- # Just rename the column names to unquoted identifiers.
794
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
779
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
780
+ missing_features = []
781
+ features_in_dataset = set(dataset.columns)
782
+ columns_to_select = []
783
+ for i, f in enumerate(features_required_by_estimator):
784
+ if (
785
+ i >= len(input_cols)
786
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
787
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
788
+ and quoted_input_cols[i] not in features_in_dataset)
789
+ ):
790
+ missing_features.append(f)
791
+ elif input_cols[i] in features_in_dataset:
792
+ columns_to_select.append(input_cols[i])
793
+ elif unquoted_input_cols[i] in features_in_dataset:
794
+ columns_to_select.append(unquoted_input_cols[i])
795
+ else:
796
+ columns_to_select.append(quoted_input_cols[i])
797
+
798
+ if len(missing_features) > 0:
799
+ raise ValueError(
800
+ "The feature names should match with those that were passed during fit.\n"
801
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
802
+ f"Features in the input dataframe : {input_cols}\n"
803
+ )
804
+ input_df = dataset[columns_to_select]
805
+ input_df.columns = features_required_by_estimator
795
806
 
796
807
  transformed_numpy_array = getattr(estimator, inference_method)(
797
808
  input_df
@@ -807,26 +807,37 @@ class GradientBoostingClassifier(BaseTransformer):
807
807
  # input cols need to match unquoted / quoted
808
808
  input_cols = self.input_cols
809
809
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
810
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
810
811
 
811
812
  estimator = self._sklearn_object
812
813
 
813
- input_df = dataset[input_cols] # Select input columns with quoted column names.
814
- if hasattr(estimator, "feature_names_in_"):
815
- missing_features = []
816
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
817
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
818
- missing_features.append(f)
819
-
820
- if len(missing_features) > 0:
821
- raise ValueError(
822
- "The feature names should match with those that were passed during fit.\n"
823
- f"Features seen during fit call but not present in the input: {missing_features}\n"
824
- f"Features in the input dataframe : {input_cols}\n"
825
- )
826
- input_df.columns = getattr(estimator, "feature_names_in_")
827
- else:
828
- # Just rename the column names to unquoted identifiers.
829
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
814
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
815
+ missing_features = []
816
+ features_in_dataset = set(dataset.columns)
817
+ columns_to_select = []
818
+ for i, f in enumerate(features_required_by_estimator):
819
+ if (
820
+ i >= len(input_cols)
821
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
822
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
823
+ and quoted_input_cols[i] not in features_in_dataset)
824
+ ):
825
+ missing_features.append(f)
826
+ elif input_cols[i] in features_in_dataset:
827
+ columns_to_select.append(input_cols[i])
828
+ elif unquoted_input_cols[i] in features_in_dataset:
829
+ columns_to_select.append(unquoted_input_cols[i])
830
+ else:
831
+ columns_to_select.append(quoted_input_cols[i])
832
+
833
+ if len(missing_features) > 0:
834
+ raise ValueError(
835
+ "The feature names should match with those that were passed during fit.\n"
836
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
837
+ f"Features in the input dataframe : {input_cols}\n"
838
+ )
839
+ input_df = dataset[columns_to_select]
840
+ input_df.columns = features_required_by_estimator
830
841
 
831
842
  transformed_numpy_array = getattr(estimator, inference_method)(
832
843
  input_df
@@ -816,26 +816,37 @@ class GradientBoostingRegressor(BaseTransformer):
816
816
  # input cols need to match unquoted / quoted
817
817
  input_cols = self.input_cols
818
818
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
819
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
819
820
 
820
821
  estimator = self._sklearn_object
821
822
 
822
- input_df = dataset[input_cols] # Select input columns with quoted column names.
823
- if hasattr(estimator, "feature_names_in_"):
824
- missing_features = []
825
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
826
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
827
- missing_features.append(f)
828
-
829
- if len(missing_features) > 0:
830
- raise ValueError(
831
- "The feature names should match with those that were passed during fit.\n"
832
- f"Features seen during fit call but not present in the input: {missing_features}\n"
833
- f"Features in the input dataframe : {input_cols}\n"
834
- )
835
- input_df.columns = getattr(estimator, "feature_names_in_")
836
- else:
837
- # Just rename the column names to unquoted identifiers.
838
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
823
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
824
+ missing_features = []
825
+ features_in_dataset = set(dataset.columns)
826
+ columns_to_select = []
827
+ for i, f in enumerate(features_required_by_estimator):
828
+ if (
829
+ i >= len(input_cols)
830
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
831
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
832
+ and quoted_input_cols[i] not in features_in_dataset)
833
+ ):
834
+ missing_features.append(f)
835
+ elif input_cols[i] in features_in_dataset:
836
+ columns_to_select.append(input_cols[i])
837
+ elif unquoted_input_cols[i] in features_in_dataset:
838
+ columns_to_select.append(unquoted_input_cols[i])
839
+ else:
840
+ columns_to_select.append(quoted_input_cols[i])
841
+
842
+ if len(missing_features) > 0:
843
+ raise ValueError(
844
+ "The feature names should match with those that were passed during fit.\n"
845
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
846
+ f"Features in the input dataframe : {input_cols}\n"
847
+ )
848
+ input_df = dataset[columns_to_select]
849
+ input_df.columns = features_required_by_estimator
839
850
 
840
851
  transformed_numpy_array = getattr(estimator, inference_method)(
841
852
  input_df
@@ -785,26 +785,37 @@ class HistGradientBoostingClassifier(BaseTransformer):
785
785
  # input cols need to match unquoted / quoted
786
786
  input_cols = self.input_cols
787
787
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
788
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
788
789
 
789
790
  estimator = self._sklearn_object
790
791
 
791
- input_df = dataset[input_cols] # Select input columns with quoted column names.
792
- if hasattr(estimator, "feature_names_in_"):
793
- missing_features = []
794
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
795
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
796
- missing_features.append(f)
797
-
798
- if len(missing_features) > 0:
799
- raise ValueError(
800
- "The feature names should match with those that were passed during fit.\n"
801
- f"Features seen during fit call but not present in the input: {missing_features}\n"
802
- f"Features in the input dataframe : {input_cols}\n"
803
- )
804
- input_df.columns = getattr(estimator, "feature_names_in_")
805
- else:
806
- # Just rename the column names to unquoted identifiers.
807
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
792
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
793
+ missing_features = []
794
+ features_in_dataset = set(dataset.columns)
795
+ columns_to_select = []
796
+ for i, f in enumerate(features_required_by_estimator):
797
+ if (
798
+ i >= len(input_cols)
799
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
800
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
801
+ and quoted_input_cols[i] not in features_in_dataset)
802
+ ):
803
+ missing_features.append(f)
804
+ elif input_cols[i] in features_in_dataset:
805
+ columns_to_select.append(input_cols[i])
806
+ elif unquoted_input_cols[i] in features_in_dataset:
807
+ columns_to_select.append(unquoted_input_cols[i])
808
+ else:
809
+ columns_to_select.append(quoted_input_cols[i])
810
+
811
+ if len(missing_features) > 0:
812
+ raise ValueError(
813
+ "The feature names should match with those that were passed during fit.\n"
814
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
815
+ f"Features in the input dataframe : {input_cols}\n"
816
+ )
817
+ input_df = dataset[columns_to_select]
818
+ input_df.columns = features_required_by_estimator
808
819
 
809
820
  transformed_numpy_array = getattr(estimator, inference_method)(
810
821
  input_df