snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -736,26 +736,37 @@ class SpectralClustering(BaseTransformer):
736
736
  # input cols need to match unquoted / quoted
737
737
  input_cols = self.input_cols
738
738
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
739
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
739
740
 
740
741
  estimator = self._sklearn_object
741
742
 
742
- input_df = dataset[input_cols] # Select input columns with quoted column names.
743
- if hasattr(estimator, "feature_names_in_"):
744
- missing_features = []
745
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
746
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
747
- missing_features.append(f)
748
-
749
- if len(missing_features) > 0:
750
- raise ValueError(
751
- "The feature names should match with those that were passed during fit.\n"
752
- f"Features seen during fit call but not present in the input: {missing_features}\n"
753
- f"Features in the input dataframe : {input_cols}\n"
754
- )
755
- input_df.columns = getattr(estimator, "feature_names_in_")
756
- else:
757
- # Just rename the column names to unquoted identifiers.
758
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
743
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
744
+ missing_features = []
745
+ features_in_dataset = set(dataset.columns)
746
+ columns_to_select = []
747
+ for i, f in enumerate(features_required_by_estimator):
748
+ if (
749
+ i >= len(input_cols)
750
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
751
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
752
+ and quoted_input_cols[i] not in features_in_dataset)
753
+ ):
754
+ missing_features.append(f)
755
+ elif input_cols[i] in features_in_dataset:
756
+ columns_to_select.append(input_cols[i])
757
+ elif unquoted_input_cols[i] in features_in_dataset:
758
+ columns_to_select.append(unquoted_input_cols[i])
759
+ else:
760
+ columns_to_select.append(quoted_input_cols[i])
761
+
762
+ if len(missing_features) > 0:
763
+ raise ValueError(
764
+ "The feature names should match with those that were passed during fit.\n"
765
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
766
+ f"Features in the input dataframe : {input_cols}\n"
767
+ )
768
+ input_df = dataset[columns_to_select]
769
+ input_df.columns = features_required_by_estimator
759
770
 
760
771
  transformed_numpy_array = getattr(estimator, inference_method)(
761
772
  input_df
@@ -657,26 +657,37 @@ class SpectralCoclustering(BaseTransformer):
657
657
  # input cols need to match unquoted / quoted
658
658
  input_cols = self.input_cols
659
659
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
660
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
660
661
 
661
662
  estimator = self._sklearn_object
662
663
 
663
- input_df = dataset[input_cols] # Select input columns with quoted column names.
664
- if hasattr(estimator, "feature_names_in_"):
665
- missing_features = []
666
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
667
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
668
- missing_features.append(f)
669
-
670
- if len(missing_features) > 0:
671
- raise ValueError(
672
- "The feature names should match with those that were passed during fit.\n"
673
- f"Features seen during fit call but not present in the input: {missing_features}\n"
674
- f"Features in the input dataframe : {input_cols}\n"
675
- )
676
- input_df.columns = getattr(estimator, "feature_names_in_")
677
- else:
678
- # Just rename the column names to unquoted identifiers.
679
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
664
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
665
+ missing_features = []
666
+ features_in_dataset = set(dataset.columns)
667
+ columns_to_select = []
668
+ for i, f in enumerate(features_required_by_estimator):
669
+ if (
670
+ i >= len(input_cols)
671
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
672
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
673
+ and quoted_input_cols[i] not in features_in_dataset)
674
+ ):
675
+ missing_features.append(f)
676
+ elif input_cols[i] in features_in_dataset:
677
+ columns_to_select.append(input_cols[i])
678
+ elif unquoted_input_cols[i] in features_in_dataset:
679
+ columns_to_select.append(unquoted_input_cols[i])
680
+ else:
681
+ columns_to_select.append(quoted_input_cols[i])
682
+
683
+ if len(missing_features) > 0:
684
+ raise ValueError(
685
+ "The feature names should match with those that were passed during fit.\n"
686
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
687
+ f"Features in the input dataframe : {input_cols}\n"
688
+ )
689
+ input_df = dataset[columns_to_select]
690
+ input_df.columns = features_required_by_estimator
680
691
 
681
692
  transformed_numpy_array = getattr(estimator, inference_method)(
682
693
  input_df
@@ -687,26 +687,37 @@ class ColumnTransformer(BaseTransformer):
687
687
  # input cols need to match unquoted / quoted
688
688
  input_cols = self.input_cols
689
689
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
690
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
690
691
 
691
692
  estimator = self._sklearn_object
692
693
 
693
- input_df = dataset[input_cols] # Select input columns with quoted column names.
694
- if hasattr(estimator, "feature_names_in_"):
695
- missing_features = []
696
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
697
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
698
- missing_features.append(f)
699
-
700
- if len(missing_features) > 0:
701
- raise ValueError(
702
- "The feature names should match with those that were passed during fit.\n"
703
- f"Features seen during fit call but not present in the input: {missing_features}\n"
704
- f"Features in the input dataframe : {input_cols}\n"
705
- )
706
- input_df.columns = getattr(estimator, "feature_names_in_")
707
- else:
708
- # Just rename the column names to unquoted identifiers.
709
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
694
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
695
+ missing_features = []
696
+ features_in_dataset = set(dataset.columns)
697
+ columns_to_select = []
698
+ for i, f in enumerate(features_required_by_estimator):
699
+ if (
700
+ i >= len(input_cols)
701
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
702
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
703
+ and quoted_input_cols[i] not in features_in_dataset)
704
+ ):
705
+ missing_features.append(f)
706
+ elif input_cols[i] in features_in_dataset:
707
+ columns_to_select.append(input_cols[i])
708
+ elif unquoted_input_cols[i] in features_in_dataset:
709
+ columns_to_select.append(unquoted_input_cols[i])
710
+ else:
711
+ columns_to_select.append(quoted_input_cols[i])
712
+
713
+ if len(missing_features) > 0:
714
+ raise ValueError(
715
+ "The feature names should match with those that were passed during fit.\n"
716
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
717
+ f"Features in the input dataframe : {input_cols}\n"
718
+ )
719
+ input_df = dataset[columns_to_select]
720
+ input_df.columns = features_required_by_estimator
710
721
 
711
722
  transformed_numpy_array = getattr(estimator, inference_method)(
712
723
  input_df
@@ -646,26 +646,37 @@ class TransformedTargetRegressor(BaseTransformer):
646
646
  # input cols need to match unquoted / quoted
647
647
  input_cols = self.input_cols
648
648
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
649
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
649
650
 
650
651
  estimator = self._sklearn_object
651
652
 
652
- input_df = dataset[input_cols] # Select input columns with quoted column names.
653
- if hasattr(estimator, "feature_names_in_"):
654
- missing_features = []
655
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
656
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
657
- missing_features.append(f)
658
-
659
- if len(missing_features) > 0:
660
- raise ValueError(
661
- "The feature names should match with those that were passed during fit.\n"
662
- f"Features seen during fit call but not present in the input: {missing_features}\n"
663
- f"Features in the input dataframe : {input_cols}\n"
664
- )
665
- input_df.columns = getattr(estimator, "feature_names_in_")
666
- else:
667
- # Just rename the column names to unquoted identifiers.
668
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
653
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
654
+ missing_features = []
655
+ features_in_dataset = set(dataset.columns)
656
+ columns_to_select = []
657
+ for i, f in enumerate(features_required_by_estimator):
658
+ if (
659
+ i >= len(input_cols)
660
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
661
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
662
+ and quoted_input_cols[i] not in features_in_dataset)
663
+ ):
664
+ missing_features.append(f)
665
+ elif input_cols[i] in features_in_dataset:
666
+ columns_to_select.append(input_cols[i])
667
+ elif unquoted_input_cols[i] in features_in_dataset:
668
+ columns_to_select.append(unquoted_input_cols[i])
669
+ else:
670
+ columns_to_select.append(quoted_input_cols[i])
671
+
672
+ if len(missing_features) > 0:
673
+ raise ValueError(
674
+ "The feature names should match with those that were passed during fit.\n"
675
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
676
+ f"Features in the input dataframe : {input_cols}\n"
677
+ )
678
+ input_df = dataset[columns_to_select]
679
+ input_df.columns = features_required_by_estimator
669
680
 
670
681
  transformed_numpy_array = getattr(estimator, inference_method)(
671
682
  input_df
@@ -643,26 +643,37 @@ class EllipticEnvelope(BaseTransformer):
643
643
  # input cols need to match unquoted / quoted
644
644
  input_cols = self.input_cols
645
645
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
646
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
646
647
 
647
648
  estimator = self._sklearn_object
648
649
 
649
- input_df = dataset[input_cols] # Select input columns with quoted column names.
650
- if hasattr(estimator, "feature_names_in_"):
651
- missing_features = []
652
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
653
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
654
- missing_features.append(f)
655
-
656
- if len(missing_features) > 0:
657
- raise ValueError(
658
- "The feature names should match with those that were passed during fit.\n"
659
- f"Features seen during fit call but not present in the input: {missing_features}\n"
660
- f"Features in the input dataframe : {input_cols}\n"
661
- )
662
- input_df.columns = getattr(estimator, "feature_names_in_")
663
- else:
664
- # Just rename the column names to unquoted identifiers.
665
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
650
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
651
+ missing_features = []
652
+ features_in_dataset = set(dataset.columns)
653
+ columns_to_select = []
654
+ for i, f in enumerate(features_required_by_estimator):
655
+ if (
656
+ i >= len(input_cols)
657
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
658
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
659
+ and quoted_input_cols[i] not in features_in_dataset)
660
+ ):
661
+ missing_features.append(f)
662
+ elif input_cols[i] in features_in_dataset:
663
+ columns_to_select.append(input_cols[i])
664
+ elif unquoted_input_cols[i] in features_in_dataset:
665
+ columns_to_select.append(unquoted_input_cols[i])
666
+ else:
667
+ columns_to_select.append(quoted_input_cols[i])
668
+
669
+ if len(missing_features) > 0:
670
+ raise ValueError(
671
+ "The feature names should match with those that were passed during fit.\n"
672
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
673
+ f"Features in the input dataframe : {input_cols}\n"
674
+ )
675
+ input_df = dataset[columns_to_select]
676
+ input_df.columns = features_required_by_estimator
666
677
 
667
678
  transformed_numpy_array = getattr(estimator, inference_method)(
668
679
  input_df
@@ -619,26 +619,37 @@ class EmpiricalCovariance(BaseTransformer):
619
619
  # input cols need to match unquoted / quoted
620
620
  input_cols = self.input_cols
621
621
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
622
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
622
623
 
623
624
  estimator = self._sklearn_object
624
625
 
625
- input_df = dataset[input_cols] # Select input columns with quoted column names.
626
- if hasattr(estimator, "feature_names_in_"):
627
- missing_features = []
628
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
629
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
630
- missing_features.append(f)
631
-
632
- if len(missing_features) > 0:
633
- raise ValueError(
634
- "The feature names should match with those that were passed during fit.\n"
635
- f"Features seen during fit call but not present in the input: {missing_features}\n"
636
- f"Features in the input dataframe : {input_cols}\n"
637
- )
638
- input_df.columns = getattr(estimator, "feature_names_in_")
639
- else:
640
- # Just rename the column names to unquoted identifiers.
641
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
626
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
627
+ missing_features = []
628
+ features_in_dataset = set(dataset.columns)
629
+ columns_to_select = []
630
+ for i, f in enumerate(features_required_by_estimator):
631
+ if (
632
+ i >= len(input_cols)
633
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
634
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
635
+ and quoted_input_cols[i] not in features_in_dataset)
636
+ ):
637
+ missing_features.append(f)
638
+ elif input_cols[i] in features_in_dataset:
639
+ columns_to_select.append(input_cols[i])
640
+ elif unquoted_input_cols[i] in features_in_dataset:
641
+ columns_to_select.append(unquoted_input_cols[i])
642
+ else:
643
+ columns_to_select.append(quoted_input_cols[i])
644
+
645
+ if len(missing_features) > 0:
646
+ raise ValueError(
647
+ "The feature names should match with those that were passed during fit.\n"
648
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
649
+ f"Features in the input dataframe : {input_cols}\n"
650
+ )
651
+ input_df = dataset[columns_to_select]
652
+ input_df.columns = features_required_by_estimator
642
653
 
643
654
  transformed_numpy_array = getattr(estimator, inference_method)(
644
655
  input_df
@@ -653,26 +653,37 @@ class GraphicalLasso(BaseTransformer):
653
653
  # input cols need to match unquoted / quoted
654
654
  input_cols = self.input_cols
655
655
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
656
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
656
657
 
657
658
  estimator = self._sklearn_object
658
659
 
659
- input_df = dataset[input_cols] # Select input columns with quoted column names.
660
- if hasattr(estimator, "feature_names_in_"):
661
- missing_features = []
662
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
663
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
664
- missing_features.append(f)
665
-
666
- if len(missing_features) > 0:
667
- raise ValueError(
668
- "The feature names should match with those that were passed during fit.\n"
669
- f"Features seen during fit call but not present in the input: {missing_features}\n"
670
- f"Features in the input dataframe : {input_cols}\n"
671
- )
672
- input_df.columns = getattr(estimator, "feature_names_in_")
673
- else:
674
- # Just rename the column names to unquoted identifiers.
675
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
660
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
661
+ missing_features = []
662
+ features_in_dataset = set(dataset.columns)
663
+ columns_to_select = []
664
+ for i, f in enumerate(features_required_by_estimator):
665
+ if (
666
+ i >= len(input_cols)
667
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
668
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
669
+ and quoted_input_cols[i] not in features_in_dataset)
670
+ ):
671
+ missing_features.append(f)
672
+ elif input_cols[i] in features_in_dataset:
673
+ columns_to_select.append(input_cols[i])
674
+ elif unquoted_input_cols[i] in features_in_dataset:
675
+ columns_to_select.append(unquoted_input_cols[i])
676
+ else:
677
+ columns_to_select.append(quoted_input_cols[i])
678
+
679
+ if len(missing_features) > 0:
680
+ raise ValueError(
681
+ "The feature names should match with those that were passed during fit.\n"
682
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
683
+ f"Features in the input dataframe : {input_cols}\n"
684
+ )
685
+ input_df = dataset[columns_to_select]
686
+ input_df.columns = features_required_by_estimator
676
687
 
677
688
  transformed_numpy_array = getattr(estimator, inference_method)(
678
689
  input_df
@@ -686,26 +686,37 @@ class GraphicalLassoCV(BaseTransformer):
686
686
  # input cols need to match unquoted / quoted
687
687
  input_cols = self.input_cols
688
688
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
689
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
689
690
 
690
691
  estimator = self._sklearn_object
691
692
 
692
- input_df = dataset[input_cols] # Select input columns with quoted column names.
693
- if hasattr(estimator, "feature_names_in_"):
694
- missing_features = []
695
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
696
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
697
- missing_features.append(f)
698
-
699
- if len(missing_features) > 0:
700
- raise ValueError(
701
- "The feature names should match with those that were passed during fit.\n"
702
- f"Features seen during fit call but not present in the input: {missing_features}\n"
703
- f"Features in the input dataframe : {input_cols}\n"
704
- )
705
- input_df.columns = getattr(estimator, "feature_names_in_")
706
- else:
707
- # Just rename the column names to unquoted identifiers.
708
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
693
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
694
+ missing_features = []
695
+ features_in_dataset = set(dataset.columns)
696
+ columns_to_select = []
697
+ for i, f in enumerate(features_required_by_estimator):
698
+ if (
699
+ i >= len(input_cols)
700
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
701
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
702
+ and quoted_input_cols[i] not in features_in_dataset)
703
+ ):
704
+ missing_features.append(f)
705
+ elif input_cols[i] in features_in_dataset:
706
+ columns_to_select.append(input_cols[i])
707
+ elif unquoted_input_cols[i] in features_in_dataset:
708
+ columns_to_select.append(unquoted_input_cols[i])
709
+ else:
710
+ columns_to_select.append(quoted_input_cols[i])
711
+
712
+ if len(missing_features) > 0:
713
+ raise ValueError(
714
+ "The feature names should match with those that were passed during fit.\n"
715
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
716
+ f"Features in the input dataframe : {input_cols}\n"
717
+ )
718
+ input_df = dataset[columns_to_select]
719
+ input_df.columns = features_required_by_estimator
709
720
 
710
721
  transformed_numpy_array = getattr(estimator, inference_method)(
711
722
  input_df
@@ -626,26 +626,37 @@ class LedoitWolf(BaseTransformer):
626
626
  # input cols need to match unquoted / quoted
627
627
  input_cols = self.input_cols
628
628
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
629
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
629
630
 
630
631
  estimator = self._sklearn_object
631
632
 
632
- input_df = dataset[input_cols] # Select input columns with quoted column names.
633
- if hasattr(estimator, "feature_names_in_"):
634
- missing_features = []
635
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
636
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
637
- missing_features.append(f)
638
-
639
- if len(missing_features) > 0:
640
- raise ValueError(
641
- "The feature names should match with those that were passed during fit.\n"
642
- f"Features seen during fit call but not present in the input: {missing_features}\n"
643
- f"Features in the input dataframe : {input_cols}\n"
644
- )
645
- input_df.columns = getattr(estimator, "feature_names_in_")
646
- else:
647
- # Just rename the column names to unquoted identifiers.
648
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
633
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
634
+ missing_features = []
635
+ features_in_dataset = set(dataset.columns)
636
+ columns_to_select = []
637
+ for i, f in enumerate(features_required_by_estimator):
638
+ if (
639
+ i >= len(input_cols)
640
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
641
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
642
+ and quoted_input_cols[i] not in features_in_dataset)
643
+ ):
644
+ missing_features.append(f)
645
+ elif input_cols[i] in features_in_dataset:
646
+ columns_to_select.append(input_cols[i])
647
+ elif unquoted_input_cols[i] in features_in_dataset:
648
+ columns_to_select.append(unquoted_input_cols[i])
649
+ else:
650
+ columns_to_select.append(quoted_input_cols[i])
651
+
652
+ if len(missing_features) > 0:
653
+ raise ValueError(
654
+ "The feature names should match with those that were passed during fit.\n"
655
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
656
+ f"Features in the input dataframe : {input_cols}\n"
657
+ )
658
+ input_df = dataset[columns_to_select]
659
+ input_df.columns = features_required_by_estimator
649
660
 
650
661
  transformed_numpy_array = getattr(estimator, inference_method)(
651
662
  input_df
@@ -638,26 +638,37 @@ class MinCovDet(BaseTransformer):
638
638
  # input cols need to match unquoted / quoted
639
639
  input_cols = self.input_cols
640
640
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
641
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
641
642
 
642
643
  estimator = self._sklearn_object
643
644
 
644
- input_df = dataset[input_cols] # Select input columns with quoted column names.
645
- if hasattr(estimator, "feature_names_in_"):
646
- missing_features = []
647
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
648
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
649
- missing_features.append(f)
650
-
651
- if len(missing_features) > 0:
652
- raise ValueError(
653
- "The feature names should match with those that were passed during fit.\n"
654
- f"Features seen during fit call but not present in the input: {missing_features}\n"
655
- f"Features in the input dataframe : {input_cols}\n"
656
- )
657
- input_df.columns = getattr(estimator, "feature_names_in_")
658
- else:
659
- # Just rename the column names to unquoted identifiers.
660
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
645
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
646
+ missing_features = []
647
+ features_in_dataset = set(dataset.columns)
648
+ columns_to_select = []
649
+ for i, f in enumerate(features_required_by_estimator):
650
+ if (
651
+ i >= len(input_cols)
652
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
653
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
654
+ and quoted_input_cols[i] not in features_in_dataset)
655
+ ):
656
+ missing_features.append(f)
657
+ elif input_cols[i] in features_in_dataset:
658
+ columns_to_select.append(input_cols[i])
659
+ elif unquoted_input_cols[i] in features_in_dataset:
660
+ columns_to_select.append(unquoted_input_cols[i])
661
+ else:
662
+ columns_to_select.append(quoted_input_cols[i])
663
+
664
+ if len(missing_features) > 0:
665
+ raise ValueError(
666
+ "The feature names should match with those that were passed during fit.\n"
667
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
668
+ f"Features in the input dataframe : {input_cols}\n"
669
+ )
670
+ input_df = dataset[columns_to_select]
671
+ input_df.columns = features_required_by_estimator
661
672
 
662
673
  transformed_numpy_array = getattr(estimator, inference_method)(
663
674
  input_df
@@ -619,26 +619,37 @@ class OAS(BaseTransformer):
619
619
  # input cols need to match unquoted / quoted
620
620
  input_cols = self.input_cols
621
621
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
622
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
622
623
 
623
624
  estimator = self._sklearn_object
624
625
 
625
- input_df = dataset[input_cols] # Select input columns with quoted column names.
626
- if hasattr(estimator, "feature_names_in_"):
627
- missing_features = []
628
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
629
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
630
- missing_features.append(f)
631
-
632
- if len(missing_features) > 0:
633
- raise ValueError(
634
- "The feature names should match with those that were passed during fit.\n"
635
- f"Features seen during fit call but not present in the input: {missing_features}\n"
636
- f"Features in the input dataframe : {input_cols}\n"
637
- )
638
- input_df.columns = getattr(estimator, "feature_names_in_")
639
- else:
640
- # Just rename the column names to unquoted identifiers.
641
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
626
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
627
+ missing_features = []
628
+ features_in_dataset = set(dataset.columns)
629
+ columns_to_select = []
630
+ for i, f in enumerate(features_required_by_estimator):
631
+ if (
632
+ i >= len(input_cols)
633
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
634
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
635
+ and quoted_input_cols[i] not in features_in_dataset)
636
+ ):
637
+ missing_features.append(f)
638
+ elif input_cols[i] in features_in_dataset:
639
+ columns_to_select.append(input_cols[i])
640
+ elif unquoted_input_cols[i] in features_in_dataset:
641
+ columns_to_select.append(unquoted_input_cols[i])
642
+ else:
643
+ columns_to_select.append(quoted_input_cols[i])
644
+
645
+ if len(missing_features) > 0:
646
+ raise ValueError(
647
+ "The feature names should match with those that were passed during fit.\n"
648
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
649
+ f"Features in the input dataframe : {input_cols}\n"
650
+ )
651
+ input_df = dataset[columns_to_select]
652
+ input_df.columns = features_required_by_estimator
642
653
 
643
654
  transformed_numpy_array = getattr(estimator, inference_method)(
644
655
  input_df