snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -640,26 +640,37 @@ class PolynomialCountSketch(BaseTransformer):
640
640
  # input cols need to match unquoted / quoted
641
641
  input_cols = self.input_cols
642
642
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
643
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
643
644
 
644
645
  estimator = self._sklearn_object
645
646
 
646
- input_df = dataset[input_cols] # Select input columns with quoted column names.
647
- if hasattr(estimator, "feature_names_in_"):
648
- missing_features = []
649
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
650
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
651
- missing_features.append(f)
652
-
653
- if len(missing_features) > 0:
654
- raise ValueError(
655
- "The feature names should match with those that were passed during fit.\n"
656
- f"Features seen during fit call but not present in the input: {missing_features}\n"
657
- f"Features in the input dataframe : {input_cols}\n"
658
- )
659
- input_df.columns = getattr(estimator, "feature_names_in_")
660
- else:
661
- # Just rename the column names to unquoted identifiers.
662
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
647
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
648
+ missing_features = []
649
+ features_in_dataset = set(dataset.columns)
650
+ columns_to_select = []
651
+ for i, f in enumerate(features_required_by_estimator):
652
+ if (
653
+ i >= len(input_cols)
654
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
655
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
656
+ and quoted_input_cols[i] not in features_in_dataset)
657
+ ):
658
+ missing_features.append(f)
659
+ elif input_cols[i] in features_in_dataset:
660
+ columns_to_select.append(input_cols[i])
661
+ elif unquoted_input_cols[i] in features_in_dataset:
662
+ columns_to_select.append(unquoted_input_cols[i])
663
+ else:
664
+ columns_to_select.append(quoted_input_cols[i])
665
+
666
+ if len(missing_features) > 0:
667
+ raise ValueError(
668
+ "The feature names should match with those that were passed during fit.\n"
669
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
670
+ f"Features in the input dataframe : {input_cols}\n"
671
+ )
672
+ input_df = dataset[columns_to_select]
673
+ input_df.columns = features_required_by_estimator
663
674
 
664
675
  transformed_numpy_array = getattr(estimator, inference_method)(
665
676
  input_df
@@ -627,26 +627,37 @@ class RBFSampler(BaseTransformer):
627
627
  # input cols need to match unquoted / quoted
628
628
  input_cols = self.input_cols
629
629
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
630
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
630
631
 
631
632
  estimator = self._sklearn_object
632
633
 
633
- input_df = dataset[input_cols] # Select input columns with quoted column names.
634
- if hasattr(estimator, "feature_names_in_"):
635
- missing_features = []
636
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
637
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
638
- missing_features.append(f)
639
-
640
- if len(missing_features) > 0:
641
- raise ValueError(
642
- "The feature names should match with those that were passed during fit.\n"
643
- f"Features seen during fit call but not present in the input: {missing_features}\n"
644
- f"Features in the input dataframe : {input_cols}\n"
645
- )
646
- input_df.columns = getattr(estimator, "feature_names_in_")
647
- else:
648
- # Just rename the column names to unquoted identifiers.
649
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
634
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
635
+ missing_features = []
636
+ features_in_dataset = set(dataset.columns)
637
+ columns_to_select = []
638
+ for i, f in enumerate(features_required_by_estimator):
639
+ if (
640
+ i >= len(input_cols)
641
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
642
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
643
+ and quoted_input_cols[i] not in features_in_dataset)
644
+ ):
645
+ missing_features.append(f)
646
+ elif input_cols[i] in features_in_dataset:
647
+ columns_to_select.append(input_cols[i])
648
+ elif unquoted_input_cols[i] in features_in_dataset:
649
+ columns_to_select.append(unquoted_input_cols[i])
650
+ else:
651
+ columns_to_select.append(quoted_input_cols[i])
652
+
653
+ if len(missing_features) > 0:
654
+ raise ValueError(
655
+ "The feature names should match with those that were passed during fit.\n"
656
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
657
+ f"Features in the input dataframe : {input_cols}\n"
658
+ )
659
+ input_df = dataset[columns_to_select]
660
+ input_df.columns = features_required_by_estimator
650
661
 
651
662
  transformed_numpy_array = getattr(estimator, inference_method)(
652
663
  input_df
@@ -625,26 +625,37 @@ class SkewedChi2Sampler(BaseTransformer):
625
625
  # input cols need to match unquoted / quoted
626
626
  input_cols = self.input_cols
627
627
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
628
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
628
629
 
629
630
  estimator = self._sklearn_object
630
631
 
631
- input_df = dataset[input_cols] # Select input columns with quoted column names.
632
- if hasattr(estimator, "feature_names_in_"):
633
- missing_features = []
634
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
635
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
636
- missing_features.append(f)
637
-
638
- if len(missing_features) > 0:
639
- raise ValueError(
640
- "The feature names should match with those that were passed during fit.\n"
641
- f"Features seen during fit call but not present in the input: {missing_features}\n"
642
- f"Features in the input dataframe : {input_cols}\n"
643
- )
644
- input_df.columns = getattr(estimator, "feature_names_in_")
645
- else:
646
- # Just rename the column names to unquoted identifiers.
647
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
632
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
633
+ missing_features = []
634
+ features_in_dataset = set(dataset.columns)
635
+ columns_to_select = []
636
+ for i, f in enumerate(features_required_by_estimator):
637
+ if (
638
+ i >= len(input_cols)
639
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
640
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
641
+ and quoted_input_cols[i] not in features_in_dataset)
642
+ ):
643
+ missing_features.append(f)
644
+ elif input_cols[i] in features_in_dataset:
645
+ columns_to_select.append(input_cols[i])
646
+ elif unquoted_input_cols[i] in features_in_dataset:
647
+ columns_to_select.append(unquoted_input_cols[i])
648
+ else:
649
+ columns_to_select.append(quoted_input_cols[i])
650
+
651
+ if len(missing_features) > 0:
652
+ raise ValueError(
653
+ "The feature names should match with those that were passed during fit.\n"
654
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
655
+ f"Features in the input dataframe : {input_cols}\n"
656
+ )
657
+ input_df = dataset[columns_to_select]
658
+ input_df.columns = features_required_by_estimator
648
659
 
649
660
  transformed_numpy_array = getattr(estimator, inference_method)(
650
661
  input_df
@@ -659,26 +659,37 @@ class KernelRidge(BaseTransformer):
659
659
  # input cols need to match unquoted / quoted
660
660
  input_cols = self.input_cols
661
661
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
662
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
662
663
 
663
664
  estimator = self._sklearn_object
664
665
 
665
- input_df = dataset[input_cols] # Select input columns with quoted column names.
666
- if hasattr(estimator, "feature_names_in_"):
667
- missing_features = []
668
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
669
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
670
- missing_features.append(f)
671
-
672
- if len(missing_features) > 0:
673
- raise ValueError(
674
- "The feature names should match with those that were passed during fit.\n"
675
- f"Features seen during fit call but not present in the input: {missing_features}\n"
676
- f"Features in the input dataframe : {input_cols}\n"
677
- )
678
- input_df.columns = getattr(estimator, "feature_names_in_")
679
- else:
680
- # Just rename the column names to unquoted identifiers.
681
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
666
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
667
+ missing_features = []
668
+ features_in_dataset = set(dataset.columns)
669
+ columns_to_select = []
670
+ for i, f in enumerate(features_required_by_estimator):
671
+ if (
672
+ i >= len(input_cols)
673
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
674
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
675
+ and quoted_input_cols[i] not in features_in_dataset)
676
+ ):
677
+ missing_features.append(f)
678
+ elif input_cols[i] in features_in_dataset:
679
+ columns_to_select.append(input_cols[i])
680
+ elif unquoted_input_cols[i] in features_in_dataset:
681
+ columns_to_select.append(unquoted_input_cols[i])
682
+ else:
683
+ columns_to_select.append(quoted_input_cols[i])
684
+
685
+ if len(missing_features) > 0:
686
+ raise ValueError(
687
+ "The feature names should match with those that were passed during fit.\n"
688
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
689
+ f"Features in the input dataframe : {input_cols}\n"
690
+ )
691
+ input_df = dataset[columns_to_select]
692
+ input_df.columns = features_required_by_estimator
682
693
 
683
694
  transformed_numpy_array = getattr(estimator, inference_method)(
684
695
  input_df
@@ -648,26 +648,37 @@ class LGBMClassifier(BaseTransformer):
648
648
  # input cols need to match unquoted / quoted
649
649
  input_cols = self.input_cols
650
650
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
651
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
651
652
 
652
653
  estimator = self._sklearn_object
653
654
 
654
- input_df = dataset[input_cols] # Select input columns with quoted column names.
655
- if hasattr(estimator, "feature_names_in_"):
656
- missing_features = []
657
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
658
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
659
- missing_features.append(f)
660
-
661
- if len(missing_features) > 0:
662
- raise ValueError(
663
- "The feature names should match with those that were passed during fit.\n"
664
- f"Features seen during fit call but not present in the input: {missing_features}\n"
665
- f"Features in the input dataframe : {input_cols}\n"
666
- )
667
- input_df.columns = getattr(estimator, "feature_names_in_")
668
- else:
669
- # Just rename the column names to unquoted identifiers.
670
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
655
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
656
+ missing_features = []
657
+ features_in_dataset = set(dataset.columns)
658
+ columns_to_select = []
659
+ for i, f in enumerate(features_required_by_estimator):
660
+ if (
661
+ i >= len(input_cols)
662
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
663
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
664
+ and quoted_input_cols[i] not in features_in_dataset)
665
+ ):
666
+ missing_features.append(f)
667
+ elif input_cols[i] in features_in_dataset:
668
+ columns_to_select.append(input_cols[i])
669
+ elif unquoted_input_cols[i] in features_in_dataset:
670
+ columns_to_select.append(unquoted_input_cols[i])
671
+ else:
672
+ columns_to_select.append(quoted_input_cols[i])
673
+
674
+ if len(missing_features) > 0:
675
+ raise ValueError(
676
+ "The feature names should match with those that were passed during fit.\n"
677
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
678
+ f"Features in the input dataframe : {input_cols}\n"
679
+ )
680
+ input_df = dataset[columns_to_select]
681
+ input_df.columns = features_required_by_estimator
671
682
 
672
683
  transformed_numpy_array = getattr(estimator, inference_method)(
673
684
  input_df
@@ -648,26 +648,37 @@ class LGBMRegressor(BaseTransformer):
648
648
  # input cols need to match unquoted / quoted
649
649
  input_cols = self.input_cols
650
650
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
651
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
651
652
 
652
653
  estimator = self._sklearn_object
653
654
 
654
- input_df = dataset[input_cols] # Select input columns with quoted column names.
655
- if hasattr(estimator, "feature_names_in_"):
656
- missing_features = []
657
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
658
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
659
- missing_features.append(f)
660
-
661
- if len(missing_features) > 0:
662
- raise ValueError(
663
- "The feature names should match with those that were passed during fit.\n"
664
- f"Features seen during fit call but not present in the input: {missing_features}\n"
665
- f"Features in the input dataframe : {input_cols}\n"
666
- )
667
- input_df.columns = getattr(estimator, "feature_names_in_")
668
- else:
669
- # Just rename the column names to unquoted identifiers.
670
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
655
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
656
+ missing_features = []
657
+ features_in_dataset = set(dataset.columns)
658
+ columns_to_select = []
659
+ for i, f in enumerate(features_required_by_estimator):
660
+ if (
661
+ i >= len(input_cols)
662
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
663
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
664
+ and quoted_input_cols[i] not in features_in_dataset)
665
+ ):
666
+ missing_features.append(f)
667
+ elif input_cols[i] in features_in_dataset:
668
+ columns_to_select.append(input_cols[i])
669
+ elif unquoted_input_cols[i] in features_in_dataset:
670
+ columns_to_select.append(unquoted_input_cols[i])
671
+ else:
672
+ columns_to_select.append(quoted_input_cols[i])
673
+
674
+ if len(missing_features) > 0:
675
+ raise ValueError(
676
+ "The feature names should match with those that were passed during fit.\n"
677
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
678
+ f"Features in the input dataframe : {input_cols}\n"
679
+ )
680
+ input_df = dataset[columns_to_select]
681
+ input_df.columns = features_required_by_estimator
671
682
 
672
683
  transformed_numpy_array = getattr(estimator, inference_method)(
673
684
  input_df
@@ -668,26 +668,37 @@ class ARDRegression(BaseTransformer):
668
668
  # input cols need to match unquoted / quoted
669
669
  input_cols = self.input_cols
670
670
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
671
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
671
672
 
672
673
  estimator = self._sklearn_object
673
674
 
674
- input_df = dataset[input_cols] # Select input columns with quoted column names.
675
- if hasattr(estimator, "feature_names_in_"):
676
- missing_features = []
677
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
678
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
679
- missing_features.append(f)
680
-
681
- if len(missing_features) > 0:
682
- raise ValueError(
683
- "The feature names should match with those that were passed during fit.\n"
684
- f"Features seen during fit call but not present in the input: {missing_features}\n"
685
- f"Features in the input dataframe : {input_cols}\n"
686
- )
687
- input_df.columns = getattr(estimator, "feature_names_in_")
688
- else:
689
- # Just rename the column names to unquoted identifiers.
690
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
675
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
676
+ missing_features = []
677
+ features_in_dataset = set(dataset.columns)
678
+ columns_to_select = []
679
+ for i, f in enumerate(features_required_by_estimator):
680
+ if (
681
+ i >= len(input_cols)
682
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
683
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
684
+ and quoted_input_cols[i] not in features_in_dataset)
685
+ ):
686
+ missing_features.append(f)
687
+ elif input_cols[i] in features_in_dataset:
688
+ columns_to_select.append(input_cols[i])
689
+ elif unquoted_input_cols[i] in features_in_dataset:
690
+ columns_to_select.append(unquoted_input_cols[i])
691
+ else:
692
+ columns_to_select.append(quoted_input_cols[i])
693
+
694
+ if len(missing_features) > 0:
695
+ raise ValueError(
696
+ "The feature names should match with those that were passed during fit.\n"
697
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
698
+ f"Features in the input dataframe : {input_cols}\n"
699
+ )
700
+ input_df = dataset[columns_to_select]
701
+ input_df.columns = features_required_by_estimator
691
702
 
692
703
  transformed_numpy_array = getattr(estimator, inference_method)(
693
704
  input_df
@@ -677,26 +677,37 @@ class BayesianRidge(BaseTransformer):
677
677
  # input cols need to match unquoted / quoted
678
678
  input_cols = self.input_cols
679
679
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
680
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
680
681
 
681
682
  estimator = self._sklearn_object
682
683
 
683
- input_df = dataset[input_cols] # Select input columns with quoted column names.
684
- if hasattr(estimator, "feature_names_in_"):
685
- missing_features = []
686
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
687
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
688
- missing_features.append(f)
689
-
690
- if len(missing_features) > 0:
691
- raise ValueError(
692
- "The feature names should match with those that were passed during fit.\n"
693
- f"Features seen during fit call but not present in the input: {missing_features}\n"
694
- f"Features in the input dataframe : {input_cols}\n"
695
- )
696
- input_df.columns = getattr(estimator, "feature_names_in_")
697
- else:
698
- # Just rename the column names to unquoted identifiers.
699
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
684
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
685
+ missing_features = []
686
+ features_in_dataset = set(dataset.columns)
687
+ columns_to_select = []
688
+ for i, f in enumerate(features_required_by_estimator):
689
+ if (
690
+ i >= len(input_cols)
691
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
692
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
693
+ and quoted_input_cols[i] not in features_in_dataset)
694
+ ):
695
+ missing_features.append(f)
696
+ elif input_cols[i] in features_in_dataset:
697
+ columns_to_select.append(input_cols[i])
698
+ elif unquoted_input_cols[i] in features_in_dataset:
699
+ columns_to_select.append(unquoted_input_cols[i])
700
+ else:
701
+ columns_to_select.append(quoted_input_cols[i])
702
+
703
+ if len(missing_features) > 0:
704
+ raise ValueError(
705
+ "The feature names should match with those that were passed during fit.\n"
706
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
707
+ f"Features in the input dataframe : {input_cols}\n"
708
+ )
709
+ input_df = dataset[columns_to_select]
710
+ input_df.columns = features_required_by_estimator
700
711
 
701
712
  transformed_numpy_array = getattr(estimator, inference_method)(
702
713
  input_df
@@ -683,26 +683,37 @@ class ElasticNet(BaseTransformer):
683
683
  # input cols need to match unquoted / quoted
684
684
  input_cols = self.input_cols
685
685
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
686
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
686
687
 
687
688
  estimator = self._sklearn_object
688
689
 
689
- input_df = dataset[input_cols] # Select input columns with quoted column names.
690
- if hasattr(estimator, "feature_names_in_"):
691
- missing_features = []
692
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
693
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
694
- missing_features.append(f)
695
-
696
- if len(missing_features) > 0:
697
- raise ValueError(
698
- "The feature names should match with those that were passed during fit.\n"
699
- f"Features seen during fit call but not present in the input: {missing_features}\n"
700
- f"Features in the input dataframe : {input_cols}\n"
701
- )
702
- input_df.columns = getattr(estimator, "feature_names_in_")
703
- else:
704
- # Just rename the column names to unquoted identifiers.
705
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
690
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
691
+ missing_features = []
692
+ features_in_dataset = set(dataset.columns)
693
+ columns_to_select = []
694
+ for i, f in enumerate(features_required_by_estimator):
695
+ if (
696
+ i >= len(input_cols)
697
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
698
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
699
+ and quoted_input_cols[i] not in features_in_dataset)
700
+ ):
701
+ missing_features.append(f)
702
+ elif input_cols[i] in features_in_dataset:
703
+ columns_to_select.append(input_cols[i])
704
+ elif unquoted_input_cols[i] in features_in_dataset:
705
+ columns_to_select.append(unquoted_input_cols[i])
706
+ else:
707
+ columns_to_select.append(quoted_input_cols[i])
708
+
709
+ if len(missing_features) > 0:
710
+ raise ValueError(
711
+ "The feature names should match with those that were passed during fit.\n"
712
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
713
+ f"Features in the input dataframe : {input_cols}\n"
714
+ )
715
+ input_df = dataset[columns_to_select]
716
+ input_df.columns = features_required_by_estimator
706
717
 
707
718
  transformed_numpy_array = getattr(estimator, inference_method)(
708
719
  input_df
@@ -719,26 +719,37 @@ class ElasticNetCV(BaseTransformer):
719
719
  # input cols need to match unquoted / quoted
720
720
  input_cols = self.input_cols
721
721
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
722
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
722
723
 
723
724
  estimator = self._sklearn_object
724
725
 
725
- input_df = dataset[input_cols] # Select input columns with quoted column names.
726
- if hasattr(estimator, "feature_names_in_"):
727
- missing_features = []
728
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
729
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
730
- missing_features.append(f)
731
-
732
- if len(missing_features) > 0:
733
- raise ValueError(
734
- "The feature names should match with those that were passed during fit.\n"
735
- f"Features seen during fit call but not present in the input: {missing_features}\n"
736
- f"Features in the input dataframe : {input_cols}\n"
737
- )
738
- input_df.columns = getattr(estimator, "feature_names_in_")
739
- else:
740
- # Just rename the column names to unquoted identifiers.
741
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
726
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
727
+ missing_features = []
728
+ features_in_dataset = set(dataset.columns)
729
+ columns_to_select = []
730
+ for i, f in enumerate(features_required_by_estimator):
731
+ if (
732
+ i >= len(input_cols)
733
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
734
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
735
+ and quoted_input_cols[i] not in features_in_dataset)
736
+ ):
737
+ missing_features.append(f)
738
+ elif input_cols[i] in features_in_dataset:
739
+ columns_to_select.append(input_cols[i])
740
+ elif unquoted_input_cols[i] in features_in_dataset:
741
+ columns_to_select.append(unquoted_input_cols[i])
742
+ else:
743
+ columns_to_select.append(quoted_input_cols[i])
744
+
745
+ if len(missing_features) > 0:
746
+ raise ValueError(
747
+ "The feature names should match with those that were passed during fit.\n"
748
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
749
+ f"Features in the input dataframe : {input_cols}\n"
750
+ )
751
+ input_df = dataset[columns_to_select]
752
+ input_df.columns = features_required_by_estimator
742
753
 
743
754
  transformed_numpy_array = getattr(estimator, inference_method)(
744
755
  input_df
@@ -664,26 +664,37 @@ class GammaRegressor(BaseTransformer):
664
664
  # input cols need to match unquoted / quoted
665
665
  input_cols = self.input_cols
666
666
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
667
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
667
668
 
668
669
  estimator = self._sklearn_object
669
670
 
670
- input_df = dataset[input_cols] # Select input columns with quoted column names.
671
- if hasattr(estimator, "feature_names_in_"):
672
- missing_features = []
673
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
674
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
675
- missing_features.append(f)
676
-
677
- if len(missing_features) > 0:
678
- raise ValueError(
679
- "The feature names should match with those that were passed during fit.\n"
680
- f"Features seen during fit call but not present in the input: {missing_features}\n"
681
- f"Features in the input dataframe : {input_cols}\n"
682
- )
683
- input_df.columns = getattr(estimator, "feature_names_in_")
684
- else:
685
- # Just rename the column names to unquoted identifiers.
686
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
671
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
672
+ missing_features = []
673
+ features_in_dataset = set(dataset.columns)
674
+ columns_to_select = []
675
+ for i, f in enumerate(features_required_by_estimator):
676
+ if (
677
+ i >= len(input_cols)
678
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
679
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
680
+ and quoted_input_cols[i] not in features_in_dataset)
681
+ ):
682
+ missing_features.append(f)
683
+ elif input_cols[i] in features_in_dataset:
684
+ columns_to_select.append(input_cols[i])
685
+ elif unquoted_input_cols[i] in features_in_dataset:
686
+ columns_to_select.append(unquoted_input_cols[i])
687
+ else:
688
+ columns_to_select.append(quoted_input_cols[i])
689
+
690
+ if len(missing_features) > 0:
691
+ raise ValueError(
692
+ "The feature names should match with those that were passed during fit.\n"
693
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
694
+ f"Features in the input dataframe : {input_cols}\n"
695
+ )
696
+ input_df = dataset[columns_to_select]
697
+ input_df.columns = features_required_by_estimator
687
698
 
688
699
  transformed_numpy_array = getattr(estimator, inference_method)(
689
700
  input_df