snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -625,26 +625,37 @@ class ShrunkCovariance(BaseTransformer):
625
625
  # input cols need to match unquoted / quoted
626
626
  input_cols = self.input_cols
627
627
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
628
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
628
629
 
629
630
  estimator = self._sklearn_object
630
631
 
631
- input_df = dataset[input_cols] # Select input columns with quoted column names.
632
- if hasattr(estimator, "feature_names_in_"):
633
- missing_features = []
634
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
635
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
636
- missing_features.append(f)
637
-
638
- if len(missing_features) > 0:
639
- raise ValueError(
640
- "The feature names should match with those that were passed during fit.\n"
641
- f"Features seen during fit call but not present in the input: {missing_features}\n"
642
- f"Features in the input dataframe : {input_cols}\n"
643
- )
644
- input_df.columns = getattr(estimator, "feature_names_in_")
645
- else:
646
- # Just rename the column names to unquoted identifiers.
647
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
632
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
633
+ missing_features = []
634
+ features_in_dataset = set(dataset.columns)
635
+ columns_to_select = []
636
+ for i, f in enumerate(features_required_by_estimator):
637
+ if (
638
+ i >= len(input_cols)
639
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
640
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
641
+ and quoted_input_cols[i] not in features_in_dataset)
642
+ ):
643
+ missing_features.append(f)
644
+ elif input_cols[i] in features_in_dataset:
645
+ columns_to_select.append(input_cols[i])
646
+ elif unquoted_input_cols[i] in features_in_dataset:
647
+ columns_to_select.append(unquoted_input_cols[i])
648
+ else:
649
+ columns_to_select.append(quoted_input_cols[i])
650
+
651
+ if len(missing_features) > 0:
652
+ raise ValueError(
653
+ "The feature names should match with those that were passed during fit.\n"
654
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
655
+ f"Features in the input dataframe : {input_cols}\n"
656
+ )
657
+ input_df = dataset[columns_to_select]
658
+ input_df.columns = features_required_by_estimator
648
659
 
649
660
  transformed_numpy_array = getattr(estimator, inference_method)(
650
661
  input_df
@@ -726,26 +726,37 @@ class DictionaryLearning(BaseTransformer):
726
726
  # input cols need to match unquoted / quoted
727
727
  input_cols = self.input_cols
728
728
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
729
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
729
730
 
730
731
  estimator = self._sklearn_object
731
732
 
732
- input_df = dataset[input_cols] # Select input columns with quoted column names.
733
- if hasattr(estimator, "feature_names_in_"):
734
- missing_features = []
735
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
736
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
737
- missing_features.append(f)
738
-
739
- if len(missing_features) > 0:
740
- raise ValueError(
741
- "The feature names should match with those that were passed during fit.\n"
742
- f"Features seen during fit call but not present in the input: {missing_features}\n"
743
- f"Features in the input dataframe : {input_cols}\n"
744
- )
745
- input_df.columns = getattr(estimator, "feature_names_in_")
746
- else:
747
- # Just rename the column names to unquoted identifiers.
748
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
733
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
734
+ missing_features = []
735
+ features_in_dataset = set(dataset.columns)
736
+ columns_to_select = []
737
+ for i, f in enumerate(features_required_by_estimator):
738
+ if (
739
+ i >= len(input_cols)
740
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
741
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
742
+ and quoted_input_cols[i] not in features_in_dataset)
743
+ ):
744
+ missing_features.append(f)
745
+ elif input_cols[i] in features_in_dataset:
746
+ columns_to_select.append(input_cols[i])
747
+ elif unquoted_input_cols[i] in features_in_dataset:
748
+ columns_to_select.append(unquoted_input_cols[i])
749
+ else:
750
+ columns_to_select.append(quoted_input_cols[i])
751
+
752
+ if len(missing_features) > 0:
753
+ raise ValueError(
754
+ "The feature names should match with those that were passed during fit.\n"
755
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
756
+ f"Features in the input dataframe : {input_cols}\n"
757
+ )
758
+ input_df = dataset[columns_to_select]
759
+ input_df.columns = features_required_by_estimator
749
760
 
750
761
  transformed_numpy_array = getattr(estimator, inference_method)(
751
762
  input_df
@@ -668,26 +668,37 @@ class FactorAnalysis(BaseTransformer):
668
668
  # input cols need to match unquoted / quoted
669
669
  input_cols = self.input_cols
670
670
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
671
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
671
672
 
672
673
  estimator = self._sklearn_object
673
674
 
674
- input_df = dataset[input_cols] # Select input columns with quoted column names.
675
- if hasattr(estimator, "feature_names_in_"):
676
- missing_features = []
677
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
678
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
679
- missing_features.append(f)
680
-
681
- if len(missing_features) > 0:
682
- raise ValueError(
683
- "The feature names should match with those that were passed during fit.\n"
684
- f"Features seen during fit call but not present in the input: {missing_features}\n"
685
- f"Features in the input dataframe : {input_cols}\n"
686
- )
687
- input_df.columns = getattr(estimator, "feature_names_in_")
688
- else:
689
- # Just rename the column names to unquoted identifiers.
690
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
675
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
676
+ missing_features = []
677
+ features_in_dataset = set(dataset.columns)
678
+ columns_to_select = []
679
+ for i, f in enumerate(features_required_by_estimator):
680
+ if (
681
+ i >= len(input_cols)
682
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
683
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
684
+ and quoted_input_cols[i] not in features_in_dataset)
685
+ ):
686
+ missing_features.append(f)
687
+ elif input_cols[i] in features_in_dataset:
688
+ columns_to_select.append(input_cols[i])
689
+ elif unquoted_input_cols[i] in features_in_dataset:
690
+ columns_to_select.append(unquoted_input_cols[i])
691
+ else:
692
+ columns_to_select.append(quoted_input_cols[i])
693
+
694
+ if len(missing_features) > 0:
695
+ raise ValueError(
696
+ "The feature names should match with those that were passed during fit.\n"
697
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
698
+ f"Features in the input dataframe : {input_cols}\n"
699
+ )
700
+ input_df = dataset[columns_to_select]
701
+ input_df.columns = features_required_by_estimator
691
702
 
692
703
  transformed_numpy_array = getattr(estimator, inference_method)(
693
704
  input_df
@@ -686,26 +686,37 @@ class FastICA(BaseTransformer):
686
686
  # input cols need to match unquoted / quoted
687
687
  input_cols = self.input_cols
688
688
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
689
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
689
690
 
690
691
  estimator = self._sklearn_object
691
692
 
692
- input_df = dataset[input_cols] # Select input columns with quoted column names.
693
- if hasattr(estimator, "feature_names_in_"):
694
- missing_features = []
695
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
696
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
697
- missing_features.append(f)
698
-
699
- if len(missing_features) > 0:
700
- raise ValueError(
701
- "The feature names should match with those that were passed during fit.\n"
702
- f"Features seen during fit call but not present in the input: {missing_features}\n"
703
- f"Features in the input dataframe : {input_cols}\n"
704
- )
705
- input_df.columns = getattr(estimator, "feature_names_in_")
706
- else:
707
- # Just rename the column names to unquoted identifiers.
708
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
693
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
694
+ missing_features = []
695
+ features_in_dataset = set(dataset.columns)
696
+ columns_to_select = []
697
+ for i, f in enumerate(features_required_by_estimator):
698
+ if (
699
+ i >= len(input_cols)
700
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
701
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
702
+ and quoted_input_cols[i] not in features_in_dataset)
703
+ ):
704
+ missing_features.append(f)
705
+ elif input_cols[i] in features_in_dataset:
706
+ columns_to_select.append(input_cols[i])
707
+ elif unquoted_input_cols[i] in features_in_dataset:
708
+ columns_to_select.append(unquoted_input_cols[i])
709
+ else:
710
+ columns_to_select.append(quoted_input_cols[i])
711
+
712
+ if len(missing_features) > 0:
713
+ raise ValueError(
714
+ "The feature names should match with those that were passed during fit.\n"
715
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
716
+ f"Features in the input dataframe : {input_cols}\n"
717
+ )
718
+ input_df = dataset[columns_to_select]
719
+ input_df.columns = features_required_by_estimator
709
720
 
710
721
  transformed_numpy_array = getattr(estimator, inference_method)(
711
722
  input_df
@@ -638,26 +638,37 @@ class IncrementalPCA(BaseTransformer):
638
638
  # input cols need to match unquoted / quoted
639
639
  input_cols = self.input_cols
640
640
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
641
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
641
642
 
642
643
  estimator = self._sklearn_object
643
644
 
644
- input_df = dataset[input_cols] # Select input columns with quoted column names.
645
- if hasattr(estimator, "feature_names_in_"):
646
- missing_features = []
647
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
648
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
649
- missing_features.append(f)
650
-
651
- if len(missing_features) > 0:
652
- raise ValueError(
653
- "The feature names should match with those that were passed during fit.\n"
654
- f"Features seen during fit call but not present in the input: {missing_features}\n"
655
- f"Features in the input dataframe : {input_cols}\n"
656
- )
657
- input_df.columns = getattr(estimator, "feature_names_in_")
658
- else:
659
- # Just rename the column names to unquoted identifiers.
660
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
645
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
646
+ missing_features = []
647
+ features_in_dataset = set(dataset.columns)
648
+ columns_to_select = []
649
+ for i, f in enumerate(features_required_by_estimator):
650
+ if (
651
+ i >= len(input_cols)
652
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
653
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
654
+ and quoted_input_cols[i] not in features_in_dataset)
655
+ ):
656
+ missing_features.append(f)
657
+ elif input_cols[i] in features_in_dataset:
658
+ columns_to_select.append(input_cols[i])
659
+ elif unquoted_input_cols[i] in features_in_dataset:
660
+ columns_to_select.append(unquoted_input_cols[i])
661
+ else:
662
+ columns_to_select.append(quoted_input_cols[i])
663
+
664
+ if len(missing_features) > 0:
665
+ raise ValueError(
666
+ "The feature names should match with those that were passed during fit.\n"
667
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
668
+ f"Features in the input dataframe : {input_cols}\n"
669
+ )
670
+ input_df = dataset[columns_to_select]
671
+ input_df.columns = features_required_by_estimator
661
672
 
662
673
  transformed_numpy_array = getattr(estimator, inference_method)(
663
674
  input_df
@@ -734,26 +734,37 @@ class KernelPCA(BaseTransformer):
734
734
  # input cols need to match unquoted / quoted
735
735
  input_cols = self.input_cols
736
736
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
737
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
737
738
 
738
739
  estimator = self._sklearn_object
739
740
 
740
- input_df = dataset[input_cols] # Select input columns with quoted column names.
741
- if hasattr(estimator, "feature_names_in_"):
742
- missing_features = []
743
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
744
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
745
- missing_features.append(f)
746
-
747
- if len(missing_features) > 0:
748
- raise ValueError(
749
- "The feature names should match with those that were passed during fit.\n"
750
- f"Features seen during fit call but not present in the input: {missing_features}\n"
751
- f"Features in the input dataframe : {input_cols}\n"
752
- )
753
- input_df.columns = getattr(estimator, "feature_names_in_")
754
- else:
755
- # Just rename the column names to unquoted identifiers.
756
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
741
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
742
+ missing_features = []
743
+ features_in_dataset = set(dataset.columns)
744
+ columns_to_select = []
745
+ for i, f in enumerate(features_required_by_estimator):
746
+ if (
747
+ i >= len(input_cols)
748
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
749
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
750
+ and quoted_input_cols[i] not in features_in_dataset)
751
+ ):
752
+ missing_features.append(f)
753
+ elif input_cols[i] in features_in_dataset:
754
+ columns_to_select.append(input_cols[i])
755
+ elif unquoted_input_cols[i] in features_in_dataset:
756
+ columns_to_select.append(unquoted_input_cols[i])
757
+ else:
758
+ columns_to_select.append(quoted_input_cols[i])
759
+
760
+ if len(missing_features) > 0:
761
+ raise ValueError(
762
+ "The feature names should match with those that were passed during fit.\n"
763
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
764
+ f"Features in the input dataframe : {input_cols}\n"
765
+ )
766
+ input_df = dataset[columns_to_select]
767
+ input_df.columns = features_required_by_estimator
757
768
 
758
769
  transformed_numpy_array = getattr(estimator, inference_method)(
759
770
  input_df
@@ -756,26 +756,37 @@ class MiniBatchDictionaryLearning(BaseTransformer):
756
756
  # input cols need to match unquoted / quoted
757
757
  input_cols = self.input_cols
758
758
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
759
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
759
760
 
760
761
  estimator = self._sklearn_object
761
762
 
762
- input_df = dataset[input_cols] # Select input columns with quoted column names.
763
- if hasattr(estimator, "feature_names_in_"):
764
- missing_features = []
765
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
766
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
767
- missing_features.append(f)
768
-
769
- if len(missing_features) > 0:
770
- raise ValueError(
771
- "The feature names should match with those that were passed during fit.\n"
772
- f"Features seen during fit call but not present in the input: {missing_features}\n"
773
- f"Features in the input dataframe : {input_cols}\n"
774
- )
775
- input_df.columns = getattr(estimator, "feature_names_in_")
776
- else:
777
- # Just rename the column names to unquoted identifiers.
778
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
763
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
764
+ missing_features = []
765
+ features_in_dataset = set(dataset.columns)
766
+ columns_to_select = []
767
+ for i, f in enumerate(features_required_by_estimator):
768
+ if (
769
+ i >= len(input_cols)
770
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
771
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
772
+ and quoted_input_cols[i] not in features_in_dataset)
773
+ ):
774
+ missing_features.append(f)
775
+ elif input_cols[i] in features_in_dataset:
776
+ columns_to_select.append(input_cols[i])
777
+ elif unquoted_input_cols[i] in features_in_dataset:
778
+ columns_to_select.append(unquoted_input_cols[i])
779
+ else:
780
+ columns_to_select.append(quoted_input_cols[i])
781
+
782
+ if len(missing_features) > 0:
783
+ raise ValueError(
784
+ "The feature names should match with those that were passed during fit.\n"
785
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
786
+ f"Features in the input dataframe : {input_cols}\n"
787
+ )
788
+ input_df = dataset[columns_to_select]
789
+ input_df.columns = features_required_by_estimator
779
790
 
780
791
  transformed_numpy_array = getattr(estimator, inference_method)(
781
792
  input_df
@@ -701,26 +701,37 @@ class MiniBatchSparsePCA(BaseTransformer):
701
701
  # input cols need to match unquoted / quoted
702
702
  input_cols = self.input_cols
703
703
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
704
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
704
705
 
705
706
  estimator = self._sklearn_object
706
707
 
707
- input_df = dataset[input_cols] # Select input columns with quoted column names.
708
- if hasattr(estimator, "feature_names_in_"):
709
- missing_features = []
710
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
711
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
712
- missing_features.append(f)
713
-
714
- if len(missing_features) > 0:
715
- raise ValueError(
716
- "The feature names should match with those that were passed during fit.\n"
717
- f"Features seen during fit call but not present in the input: {missing_features}\n"
718
- f"Features in the input dataframe : {input_cols}\n"
719
- )
720
- input_df.columns = getattr(estimator, "feature_names_in_")
721
- else:
722
- # Just rename the column names to unquoted identifiers.
723
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
708
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
709
+ missing_features = []
710
+ features_in_dataset = set(dataset.columns)
711
+ columns_to_select = []
712
+ for i, f in enumerate(features_required_by_estimator):
713
+ if (
714
+ i >= len(input_cols)
715
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
716
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
717
+ and quoted_input_cols[i] not in features_in_dataset)
718
+ ):
719
+ missing_features.append(f)
720
+ elif input_cols[i] in features_in_dataset:
721
+ columns_to_select.append(input_cols[i])
722
+ elif unquoted_input_cols[i] in features_in_dataset:
723
+ columns_to_select.append(unquoted_input_cols[i])
724
+ else:
725
+ columns_to_select.append(quoted_input_cols[i])
726
+
727
+ if len(missing_features) > 0:
728
+ raise ValueError(
729
+ "The feature names should match with those that were passed during fit.\n"
730
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
731
+ f"Features in the input dataframe : {input_cols}\n"
732
+ )
733
+ input_df = dataset[columns_to_select]
734
+ input_df.columns = features_required_by_estimator
724
735
 
725
736
  transformed_numpy_array = getattr(estimator, inference_method)(
726
737
  input_df
@@ -703,26 +703,37 @@ class PCA(BaseTransformer):
703
703
  # input cols need to match unquoted / quoted
704
704
  input_cols = self.input_cols
705
705
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
706
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
706
707
 
707
708
  estimator = self._sklearn_object
708
709
 
709
- input_df = dataset[input_cols] # Select input columns with quoted column names.
710
- if hasattr(estimator, "feature_names_in_"):
711
- missing_features = []
712
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
713
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
714
- missing_features.append(f)
715
-
716
- if len(missing_features) > 0:
717
- raise ValueError(
718
- "The feature names should match with those that were passed during fit.\n"
719
- f"Features seen during fit call but not present in the input: {missing_features}\n"
720
- f"Features in the input dataframe : {input_cols}\n"
721
- )
722
- input_df.columns = getattr(estimator, "feature_names_in_")
723
- else:
724
- # Just rename the column names to unquoted identifiers.
725
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
710
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
711
+ missing_features = []
712
+ features_in_dataset = set(dataset.columns)
713
+ columns_to_select = []
714
+ for i, f in enumerate(features_required_by_estimator):
715
+ if (
716
+ i >= len(input_cols)
717
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
718
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
719
+ and quoted_input_cols[i] not in features_in_dataset)
720
+ ):
721
+ missing_features.append(f)
722
+ elif input_cols[i] in features_in_dataset:
723
+ columns_to_select.append(input_cols[i])
724
+ elif unquoted_input_cols[i] in features_in_dataset:
725
+ columns_to_select.append(unquoted_input_cols[i])
726
+ else:
727
+ columns_to_select.append(quoted_input_cols[i])
728
+
729
+ if len(missing_features) > 0:
730
+ raise ValueError(
731
+ "The feature names should match with those that were passed during fit.\n"
732
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
733
+ f"Features in the input dataframe : {input_cols}\n"
734
+ )
735
+ input_df = dataset[columns_to_select]
736
+ input_df.columns = features_required_by_estimator
726
737
 
727
738
  transformed_numpy_array = getattr(estimator, inference_method)(
728
739
  input_df
@@ -676,26 +676,37 @@ class SparsePCA(BaseTransformer):
676
676
  # input cols need to match unquoted / quoted
677
677
  input_cols = self.input_cols
678
678
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
679
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
679
680
 
680
681
  estimator = self._sklearn_object
681
682
 
682
- input_df = dataset[input_cols] # Select input columns with quoted column names.
683
- if hasattr(estimator, "feature_names_in_"):
684
- missing_features = []
685
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
686
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
687
- missing_features.append(f)
688
-
689
- if len(missing_features) > 0:
690
- raise ValueError(
691
- "The feature names should match with those that were passed during fit.\n"
692
- f"Features seen during fit call but not present in the input: {missing_features}\n"
693
- f"Features in the input dataframe : {input_cols}\n"
694
- )
695
- input_df.columns = getattr(estimator, "feature_names_in_")
696
- else:
697
- # Just rename the column names to unquoted identifiers.
698
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
683
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
684
+ missing_features = []
685
+ features_in_dataset = set(dataset.columns)
686
+ columns_to_select = []
687
+ for i, f in enumerate(features_required_by_estimator):
688
+ if (
689
+ i >= len(input_cols)
690
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
691
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
692
+ and quoted_input_cols[i] not in features_in_dataset)
693
+ ):
694
+ missing_features.append(f)
695
+ elif input_cols[i] in features_in_dataset:
696
+ columns_to_select.append(input_cols[i])
697
+ elif unquoted_input_cols[i] in features_in_dataset:
698
+ columns_to_select.append(unquoted_input_cols[i])
699
+ else:
700
+ columns_to_select.append(quoted_input_cols[i])
701
+
702
+ if len(missing_features) > 0:
703
+ raise ValueError(
704
+ "The feature names should match with those that were passed during fit.\n"
705
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
706
+ f"Features in the input dataframe : {input_cols}\n"
707
+ )
708
+ input_df = dataset[columns_to_select]
709
+ input_df.columns = features_required_by_estimator
699
710
 
700
711
  transformed_numpy_array = getattr(estimator, inference_method)(
701
712
  input_df
@@ -657,26 +657,37 @@ class TruncatedSVD(BaseTransformer):
657
657
  # input cols need to match unquoted / quoted
658
658
  input_cols = self.input_cols
659
659
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
660
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
660
661
 
661
662
  estimator = self._sklearn_object
662
663
 
663
- input_df = dataset[input_cols] # Select input columns with quoted column names.
664
- if hasattr(estimator, "feature_names_in_"):
665
- missing_features = []
666
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
667
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
668
- missing_features.append(f)
669
-
670
- if len(missing_features) > 0:
671
- raise ValueError(
672
- "The feature names should match with those that were passed during fit.\n"
673
- f"Features seen during fit call but not present in the input: {missing_features}\n"
674
- f"Features in the input dataframe : {input_cols}\n"
675
- )
676
- input_df.columns = getattr(estimator, "feature_names_in_")
677
- else:
678
- # Just rename the column names to unquoted identifiers.
679
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
664
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
665
+ missing_features = []
666
+ features_in_dataset = set(dataset.columns)
667
+ columns_to_select = []
668
+ for i, f in enumerate(features_required_by_estimator):
669
+ if (
670
+ i >= len(input_cols)
671
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
672
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
673
+ and quoted_input_cols[i] not in features_in_dataset)
674
+ ):
675
+ missing_features.append(f)
676
+ elif input_cols[i] in features_in_dataset:
677
+ columns_to_select.append(input_cols[i])
678
+ elif unquoted_input_cols[i] in features_in_dataset:
679
+ columns_to_select.append(unquoted_input_cols[i])
680
+ else:
681
+ columns_to_select.append(quoted_input_cols[i])
682
+
683
+ if len(missing_features) > 0:
684
+ raise ValueError(
685
+ "The feature names should match with those that were passed during fit.\n"
686
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
687
+ f"Features in the input dataframe : {input_cols}\n"
688
+ )
689
+ input_df = dataset[columns_to_select]
690
+ input_df.columns = features_required_by_estimator
680
691
 
681
692
  transformed_numpy_array = getattr(estimator, inference_method)(
682
693
  input_df