snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -688,26 +688,37 @@ class KNeighborsClassifier(BaseTransformer):
688
688
  # input cols need to match unquoted / quoted
689
689
  input_cols = self.input_cols
690
690
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
691
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
691
692
 
692
693
  estimator = self._sklearn_object
693
694
 
694
- input_df = dataset[input_cols] # Select input columns with quoted column names.
695
- if hasattr(estimator, "feature_names_in_"):
696
- missing_features = []
697
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
698
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
699
- missing_features.append(f)
700
-
701
- if len(missing_features) > 0:
702
- raise ValueError(
703
- "The feature names should match with those that were passed during fit.\n"
704
- f"Features seen during fit call but not present in the input: {missing_features}\n"
705
- f"Features in the input dataframe : {input_cols}\n"
706
- )
707
- input_df.columns = getattr(estimator, "feature_names_in_")
708
- else:
709
- # Just rename the column names to unquoted identifiers.
710
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
695
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
696
+ missing_features = []
697
+ features_in_dataset = set(dataset.columns)
698
+ columns_to_select = []
699
+ for i, f in enumerate(features_required_by_estimator):
700
+ if (
701
+ i >= len(input_cols)
702
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
703
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
704
+ and quoted_input_cols[i] not in features_in_dataset)
705
+ ):
706
+ missing_features.append(f)
707
+ elif input_cols[i] in features_in_dataset:
708
+ columns_to_select.append(input_cols[i])
709
+ elif unquoted_input_cols[i] in features_in_dataset:
710
+ columns_to_select.append(unquoted_input_cols[i])
711
+ else:
712
+ columns_to_select.append(quoted_input_cols[i])
713
+
714
+ if len(missing_features) > 0:
715
+ raise ValueError(
716
+ "The feature names should match with those that were passed during fit.\n"
717
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
718
+ f"Features in the input dataframe : {input_cols}\n"
719
+ )
720
+ input_df = dataset[columns_to_select]
721
+ input_df.columns = features_required_by_estimator
711
722
 
712
723
  transformed_numpy_array = getattr(estimator, inference_method)(
713
724
  input_df
@@ -690,26 +690,37 @@ class KNeighborsRegressor(BaseTransformer):
690
690
  # input cols need to match unquoted / quoted
691
691
  input_cols = self.input_cols
692
692
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
693
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
693
694
 
694
695
  estimator = self._sklearn_object
695
696
 
696
- input_df = dataset[input_cols] # Select input columns with quoted column names.
697
- if hasattr(estimator, "feature_names_in_"):
698
- missing_features = []
699
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
700
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
701
- missing_features.append(f)
702
-
703
- if len(missing_features) > 0:
704
- raise ValueError(
705
- "The feature names should match with those that were passed during fit.\n"
706
- f"Features seen during fit call but not present in the input: {missing_features}\n"
707
- f"Features in the input dataframe : {input_cols}\n"
708
- )
709
- input_df.columns = getattr(estimator, "feature_names_in_")
710
- else:
711
- # Just rename the column names to unquoted identifiers.
712
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
697
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
698
+ missing_features = []
699
+ features_in_dataset = set(dataset.columns)
700
+ columns_to_select = []
701
+ for i, f in enumerate(features_required_by_estimator):
702
+ if (
703
+ i >= len(input_cols)
704
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
705
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
706
+ and quoted_input_cols[i] not in features_in_dataset)
707
+ ):
708
+ missing_features.append(f)
709
+ elif input_cols[i] in features_in_dataset:
710
+ columns_to_select.append(input_cols[i])
711
+ elif unquoted_input_cols[i] in features_in_dataset:
712
+ columns_to_select.append(unquoted_input_cols[i])
713
+ else:
714
+ columns_to_select.append(quoted_input_cols[i])
715
+
716
+ if len(missing_features) > 0:
717
+ raise ValueError(
718
+ "The feature names should match with those that were passed during fit.\n"
719
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
720
+ f"Features in the input dataframe : {input_cols}\n"
721
+ )
722
+ input_df = dataset[columns_to_select]
723
+ input_df.columns = features_required_by_estimator
713
724
 
714
725
  transformed_numpy_array = getattr(estimator, inference_method)(
715
726
  input_df
@@ -669,26 +669,37 @@ class KernelDensity(BaseTransformer):
669
669
  # input cols need to match unquoted / quoted
670
670
  input_cols = self.input_cols
671
671
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
672
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
672
673
 
673
674
  estimator = self._sklearn_object
674
675
 
675
- input_df = dataset[input_cols] # Select input columns with quoted column names.
676
- if hasattr(estimator, "feature_names_in_"):
677
- missing_features = []
678
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
679
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
680
- missing_features.append(f)
681
-
682
- if len(missing_features) > 0:
683
- raise ValueError(
684
- "The feature names should match with those that were passed during fit.\n"
685
- f"Features seen during fit call but not present in the input: {missing_features}\n"
686
- f"Features in the input dataframe : {input_cols}\n"
687
- )
688
- input_df.columns = getattr(estimator, "feature_names_in_")
689
- else:
690
- # Just rename the column names to unquoted identifiers.
691
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
676
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
677
+ missing_features = []
678
+ features_in_dataset = set(dataset.columns)
679
+ columns_to_select = []
680
+ for i, f in enumerate(features_required_by_estimator):
681
+ if (
682
+ i >= len(input_cols)
683
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
684
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
685
+ and quoted_input_cols[i] not in features_in_dataset)
686
+ ):
687
+ missing_features.append(f)
688
+ elif input_cols[i] in features_in_dataset:
689
+ columns_to_select.append(input_cols[i])
690
+ elif unquoted_input_cols[i] in features_in_dataset:
691
+ columns_to_select.append(unquoted_input_cols[i])
692
+ else:
693
+ columns_to_select.append(quoted_input_cols[i])
694
+
695
+ if len(missing_features) > 0:
696
+ raise ValueError(
697
+ "The feature names should match with those that were passed during fit.\n"
698
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
699
+ f"Features in the input dataframe : {input_cols}\n"
700
+ )
701
+ input_df = dataset[columns_to_select]
702
+ input_df.columns = features_required_by_estimator
692
703
 
693
704
  transformed_numpy_array = getattr(estimator, inference_method)(
694
705
  input_df
@@ -697,26 +697,37 @@ class LocalOutlierFactor(BaseTransformer):
697
697
  # input cols need to match unquoted / quoted
698
698
  input_cols = self.input_cols
699
699
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
700
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
700
701
 
701
702
  estimator = self._sklearn_object
702
703
 
703
- input_df = dataset[input_cols] # Select input columns with quoted column names.
704
- if hasattr(estimator, "feature_names_in_"):
705
- missing_features = []
706
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
707
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
708
- missing_features.append(f)
709
-
710
- if len(missing_features) > 0:
711
- raise ValueError(
712
- "The feature names should match with those that were passed during fit.\n"
713
- f"Features seen during fit call but not present in the input: {missing_features}\n"
714
- f"Features in the input dataframe : {input_cols}\n"
715
- )
716
- input_df.columns = getattr(estimator, "feature_names_in_")
717
- else:
718
- # Just rename the column names to unquoted identifiers.
719
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
704
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
705
+ missing_features = []
706
+ features_in_dataset = set(dataset.columns)
707
+ columns_to_select = []
708
+ for i, f in enumerate(features_required_by_estimator):
709
+ if (
710
+ i >= len(input_cols)
711
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
712
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
713
+ and quoted_input_cols[i] not in features_in_dataset)
714
+ ):
715
+ missing_features.append(f)
716
+ elif input_cols[i] in features_in_dataset:
717
+ columns_to_select.append(input_cols[i])
718
+ elif unquoted_input_cols[i] in features_in_dataset:
719
+ columns_to_select.append(unquoted_input_cols[i])
720
+ else:
721
+ columns_to_select.append(quoted_input_cols[i])
722
+
723
+ if len(missing_features) > 0:
724
+ raise ValueError(
725
+ "The feature names should match with those that were passed during fit.\n"
726
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
727
+ f"Features in the input dataframe : {input_cols}\n"
728
+ )
729
+ input_df = dataset[columns_to_select]
730
+ input_df.columns = features_required_by_estimator
720
731
 
721
732
  transformed_numpy_array = getattr(estimator, inference_method)(
722
733
  input_df
@@ -628,26 +628,37 @@ class NearestCentroid(BaseTransformer):
628
628
  # input cols need to match unquoted / quoted
629
629
  input_cols = self.input_cols
630
630
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
631
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
631
632
 
632
633
  estimator = self._sklearn_object
633
634
 
634
- input_df = dataset[input_cols] # Select input columns with quoted column names.
635
- if hasattr(estimator, "feature_names_in_"):
636
- missing_features = []
637
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
638
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
639
- missing_features.append(f)
640
-
641
- if len(missing_features) > 0:
642
- raise ValueError(
643
- "The feature names should match with those that were passed during fit.\n"
644
- f"Features seen during fit call but not present in the input: {missing_features}\n"
645
- f"Features in the input dataframe : {input_cols}\n"
646
- )
647
- input_df.columns = getattr(estimator, "feature_names_in_")
648
- else:
649
- # Just rename the column names to unquoted identifiers.
650
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
635
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
636
+ missing_features = []
637
+ features_in_dataset = set(dataset.columns)
638
+ columns_to_select = []
639
+ for i, f in enumerate(features_required_by_estimator):
640
+ if (
641
+ i >= len(input_cols)
642
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
643
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
644
+ and quoted_input_cols[i] not in features_in_dataset)
645
+ ):
646
+ missing_features.append(f)
647
+ elif input_cols[i] in features_in_dataset:
648
+ columns_to_select.append(input_cols[i])
649
+ elif unquoted_input_cols[i] in features_in_dataset:
650
+ columns_to_select.append(unquoted_input_cols[i])
651
+ else:
652
+ columns_to_select.append(quoted_input_cols[i])
653
+
654
+ if len(missing_features) > 0:
655
+ raise ValueError(
656
+ "The feature names should match with those that were passed during fit.\n"
657
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
658
+ f"Features in the input dataframe : {input_cols}\n"
659
+ )
660
+ input_df = dataset[columns_to_select]
661
+ input_df.columns = features_required_by_estimator
651
662
 
652
663
  transformed_numpy_array = getattr(estimator, inference_method)(
653
664
  input_df
@@ -680,26 +680,37 @@ class NearestNeighbors(BaseTransformer):
680
680
  # input cols need to match unquoted / quoted
681
681
  input_cols = self.input_cols
682
682
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
683
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
683
684
 
684
685
  estimator = self._sklearn_object
685
686
 
686
- input_df = dataset[input_cols] # Select input columns with quoted column names.
687
- if hasattr(estimator, "feature_names_in_"):
688
- missing_features = []
689
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
690
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
691
- missing_features.append(f)
692
-
693
- if len(missing_features) > 0:
694
- raise ValueError(
695
- "The feature names should match with those that were passed during fit.\n"
696
- f"Features seen during fit call but not present in the input: {missing_features}\n"
697
- f"Features in the input dataframe : {input_cols}\n"
698
- )
699
- input_df.columns = getattr(estimator, "feature_names_in_")
700
- else:
701
- # Just rename the column names to unquoted identifiers.
702
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
687
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
688
+ missing_features = []
689
+ features_in_dataset = set(dataset.columns)
690
+ columns_to_select = []
691
+ for i, f in enumerate(features_required_by_estimator):
692
+ if (
693
+ i >= len(input_cols)
694
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
695
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
696
+ and quoted_input_cols[i] not in features_in_dataset)
697
+ ):
698
+ missing_features.append(f)
699
+ elif input_cols[i] in features_in_dataset:
700
+ columns_to_select.append(input_cols[i])
701
+ elif unquoted_input_cols[i] in features_in_dataset:
702
+ columns_to_select.append(unquoted_input_cols[i])
703
+ else:
704
+ columns_to_select.append(quoted_input_cols[i])
705
+
706
+ if len(missing_features) > 0:
707
+ raise ValueError(
708
+ "The feature names should match with those that were passed during fit.\n"
709
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
710
+ f"Features in the input dataframe : {input_cols}\n"
711
+ )
712
+ input_df = dataset[columns_to_select]
713
+ input_df.columns = features_required_by_estimator
703
714
 
704
715
  transformed_numpy_array = getattr(estimator, inference_method)(
705
716
  input_df
@@ -699,26 +699,37 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
699
699
  # input cols need to match unquoted / quoted
700
700
  input_cols = self.input_cols
701
701
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
702
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
702
703
 
703
704
  estimator = self._sklearn_object
704
705
 
705
- input_df = dataset[input_cols] # Select input columns with quoted column names.
706
- if hasattr(estimator, "feature_names_in_"):
707
- missing_features = []
708
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
709
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
710
- missing_features.append(f)
711
-
712
- if len(missing_features) > 0:
713
- raise ValueError(
714
- "The feature names should match with those that were passed during fit.\n"
715
- f"Features seen during fit call but not present in the input: {missing_features}\n"
716
- f"Features in the input dataframe : {input_cols}\n"
717
- )
718
- input_df.columns = getattr(estimator, "feature_names_in_")
719
- else:
720
- # Just rename the column names to unquoted identifiers.
721
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
706
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
707
+ missing_features = []
708
+ features_in_dataset = set(dataset.columns)
709
+ columns_to_select = []
710
+ for i, f in enumerate(features_required_by_estimator):
711
+ if (
712
+ i >= len(input_cols)
713
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
714
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
715
+ and quoted_input_cols[i] not in features_in_dataset)
716
+ ):
717
+ missing_features.append(f)
718
+ elif input_cols[i] in features_in_dataset:
719
+ columns_to_select.append(input_cols[i])
720
+ elif unquoted_input_cols[i] in features_in_dataset:
721
+ columns_to_select.append(unquoted_input_cols[i])
722
+ else:
723
+ columns_to_select.append(quoted_input_cols[i])
724
+
725
+ if len(missing_features) > 0:
726
+ raise ValueError(
727
+ "The feature names should match with those that were passed during fit.\n"
728
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
729
+ f"Features in the input dataframe : {input_cols}\n"
730
+ )
731
+ input_df = dataset[columns_to_select]
732
+ input_df.columns = features_required_by_estimator
722
733
 
723
734
  transformed_numpy_array = getattr(estimator, inference_method)(
724
735
  input_df
@@ -700,26 +700,37 @@ class RadiusNeighborsClassifier(BaseTransformer):
700
700
  # input cols need to match unquoted / quoted
701
701
  input_cols = self.input_cols
702
702
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
703
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
703
704
 
704
705
  estimator = self._sklearn_object
705
706
 
706
- input_df = dataset[input_cols] # Select input columns with quoted column names.
707
- if hasattr(estimator, "feature_names_in_"):
708
- missing_features = []
709
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
710
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
711
- missing_features.append(f)
712
-
713
- if len(missing_features) > 0:
714
- raise ValueError(
715
- "The feature names should match with those that were passed during fit.\n"
716
- f"Features seen during fit call but not present in the input: {missing_features}\n"
717
- f"Features in the input dataframe : {input_cols}\n"
718
- )
719
- input_df.columns = getattr(estimator, "feature_names_in_")
720
- else:
721
- # Just rename the column names to unquoted identifiers.
722
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
707
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
708
+ missing_features = []
709
+ features_in_dataset = set(dataset.columns)
710
+ columns_to_select = []
711
+ for i, f in enumerate(features_required_by_estimator):
712
+ if (
713
+ i >= len(input_cols)
714
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
715
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
716
+ and quoted_input_cols[i] not in features_in_dataset)
717
+ ):
718
+ missing_features.append(f)
719
+ elif input_cols[i] in features_in_dataset:
720
+ columns_to_select.append(input_cols[i])
721
+ elif unquoted_input_cols[i] in features_in_dataset:
722
+ columns_to_select.append(unquoted_input_cols[i])
723
+ else:
724
+ columns_to_select.append(quoted_input_cols[i])
725
+
726
+ if len(missing_features) > 0:
727
+ raise ValueError(
728
+ "The feature names should match with those that were passed during fit.\n"
729
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
730
+ f"Features in the input dataframe : {input_cols}\n"
731
+ )
732
+ input_df = dataset[columns_to_select]
733
+ input_df.columns = features_required_by_estimator
723
734
 
724
735
  transformed_numpy_array = getattr(estimator, inference_method)(
725
736
  input_df
@@ -690,26 +690,37 @@ class RadiusNeighborsRegressor(BaseTransformer):
690
690
  # input cols need to match unquoted / quoted
691
691
  input_cols = self.input_cols
692
692
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
693
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
693
694
 
694
695
  estimator = self._sklearn_object
695
696
 
696
- input_df = dataset[input_cols] # Select input columns with quoted column names.
697
- if hasattr(estimator, "feature_names_in_"):
698
- missing_features = []
699
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
700
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
701
- missing_features.append(f)
702
-
703
- if len(missing_features) > 0:
704
- raise ValueError(
705
- "The feature names should match with those that were passed during fit.\n"
706
- f"Features seen during fit call but not present in the input: {missing_features}\n"
707
- f"Features in the input dataframe : {input_cols}\n"
708
- )
709
- input_df.columns = getattr(estimator, "feature_names_in_")
710
- else:
711
- # Just rename the column names to unquoted identifiers.
712
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
697
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
698
+ missing_features = []
699
+ features_in_dataset = set(dataset.columns)
700
+ columns_to_select = []
701
+ for i, f in enumerate(features_required_by_estimator):
702
+ if (
703
+ i >= len(input_cols)
704
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
705
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
706
+ and quoted_input_cols[i] not in features_in_dataset)
707
+ ):
708
+ missing_features.append(f)
709
+ elif input_cols[i] in features_in_dataset:
710
+ columns_to_select.append(input_cols[i])
711
+ elif unquoted_input_cols[i] in features_in_dataset:
712
+ columns_to_select.append(unquoted_input_cols[i])
713
+ else:
714
+ columns_to_select.append(quoted_input_cols[i])
715
+
716
+ if len(missing_features) > 0:
717
+ raise ValueError(
718
+ "The feature names should match with those that were passed during fit.\n"
719
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
720
+ f"Features in the input dataframe : {input_cols}\n"
721
+ )
722
+ input_df = dataset[columns_to_select]
723
+ input_df.columns = features_required_by_estimator
713
724
 
714
725
  transformed_numpy_array = getattr(estimator, inference_method)(
715
726
  input_df
@@ -649,26 +649,37 @@ class BernoulliRBM(BaseTransformer):
649
649
  # input cols need to match unquoted / quoted
650
650
  input_cols = self.input_cols
651
651
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
652
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
652
653
 
653
654
  estimator = self._sklearn_object
654
655
 
655
- input_df = dataset[input_cols] # Select input columns with quoted column names.
656
- if hasattr(estimator, "feature_names_in_"):
657
- missing_features = []
658
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
659
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
660
- missing_features.append(f)
661
-
662
- if len(missing_features) > 0:
663
- raise ValueError(
664
- "The feature names should match with those that were passed during fit.\n"
665
- f"Features seen during fit call but not present in the input: {missing_features}\n"
666
- f"Features in the input dataframe : {input_cols}\n"
667
- )
668
- input_df.columns = getattr(estimator, "feature_names_in_")
669
- else:
670
- # Just rename the column names to unquoted identifiers.
671
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
656
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
657
+ missing_features = []
658
+ features_in_dataset = set(dataset.columns)
659
+ columns_to_select = []
660
+ for i, f in enumerate(features_required_by_estimator):
661
+ if (
662
+ i >= len(input_cols)
663
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
664
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
665
+ and quoted_input_cols[i] not in features_in_dataset)
666
+ ):
667
+ missing_features.append(f)
668
+ elif input_cols[i] in features_in_dataset:
669
+ columns_to_select.append(input_cols[i])
670
+ elif unquoted_input_cols[i] in features_in_dataset:
671
+ columns_to_select.append(unquoted_input_cols[i])
672
+ else:
673
+ columns_to_select.append(quoted_input_cols[i])
674
+
675
+ if len(missing_features) > 0:
676
+ raise ValueError(
677
+ "The feature names should match with those that were passed during fit.\n"
678
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
679
+ f"Features in the input dataframe : {input_cols}\n"
680
+ )
681
+ input_df = dataset[columns_to_select]
682
+ input_df.columns = features_required_by_estimator
672
683
 
673
684
  transformed_numpy_array = getattr(estimator, inference_method)(
674
685
  input_df
@@ -802,26 +802,37 @@ class MLPClassifier(BaseTransformer):
802
802
  # input cols need to match unquoted / quoted
803
803
  input_cols = self.input_cols
804
804
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
805
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
805
806
 
806
807
  estimator = self._sklearn_object
807
808
 
808
- input_df = dataset[input_cols] # Select input columns with quoted column names.
809
- if hasattr(estimator, "feature_names_in_"):
810
- missing_features = []
811
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
812
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
813
- missing_features.append(f)
814
-
815
- if len(missing_features) > 0:
816
- raise ValueError(
817
- "The feature names should match with those that were passed during fit.\n"
818
- f"Features seen during fit call but not present in the input: {missing_features}\n"
819
- f"Features in the input dataframe : {input_cols}\n"
820
- )
821
- input_df.columns = getattr(estimator, "feature_names_in_")
822
- else:
823
- # Just rename the column names to unquoted identifiers.
824
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
809
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
810
+ missing_features = []
811
+ features_in_dataset = set(dataset.columns)
812
+ columns_to_select = []
813
+ for i, f in enumerate(features_required_by_estimator):
814
+ if (
815
+ i >= len(input_cols)
816
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
817
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
818
+ and quoted_input_cols[i] not in features_in_dataset)
819
+ ):
820
+ missing_features.append(f)
821
+ elif input_cols[i] in features_in_dataset:
822
+ columns_to_select.append(input_cols[i])
823
+ elif unquoted_input_cols[i] in features_in_dataset:
824
+ columns_to_select.append(unquoted_input_cols[i])
825
+ else:
826
+ columns_to_select.append(quoted_input_cols[i])
827
+
828
+ if len(missing_features) > 0:
829
+ raise ValueError(
830
+ "The feature names should match with those that were passed during fit.\n"
831
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
832
+ f"Features in the input dataframe : {input_cols}\n"
833
+ )
834
+ input_df = dataset[columns_to_select]
835
+ input_df.columns = features_required_by_estimator
825
836
 
826
837
  transformed_numpy_array = getattr(estimator, inference_method)(
827
838
  input_df