snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -697,26 +697,37 @@ class RidgeClassifier(BaseTransformer):
697
697
  # input cols need to match unquoted / quoted
698
698
  input_cols = self.input_cols
699
699
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
700
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
700
701
 
701
702
  estimator = self._sklearn_object
702
703
 
703
- input_df = dataset[input_cols] # Select input columns with quoted column names.
704
- if hasattr(estimator, "feature_names_in_"):
705
- missing_features = []
706
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
707
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
708
- missing_features.append(f)
709
-
710
- if len(missing_features) > 0:
711
- raise ValueError(
712
- "The feature names should match with those that were passed during fit.\n"
713
- f"Features seen during fit call but not present in the input: {missing_features}\n"
714
- f"Features in the input dataframe : {input_cols}\n"
715
- )
716
- input_df.columns = getattr(estimator, "feature_names_in_")
717
- else:
718
- # Just rename the column names to unquoted identifiers.
719
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
704
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
705
+ missing_features = []
706
+ features_in_dataset = set(dataset.columns)
707
+ columns_to_select = []
708
+ for i, f in enumerate(features_required_by_estimator):
709
+ if (
710
+ i >= len(input_cols)
711
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
712
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
713
+ and quoted_input_cols[i] not in features_in_dataset)
714
+ ):
715
+ missing_features.append(f)
716
+ elif input_cols[i] in features_in_dataset:
717
+ columns_to_select.append(input_cols[i])
718
+ elif unquoted_input_cols[i] in features_in_dataset:
719
+ columns_to_select.append(unquoted_input_cols[i])
720
+ else:
721
+ columns_to_select.append(quoted_input_cols[i])
722
+
723
+ if len(missing_features) > 0:
724
+ raise ValueError(
725
+ "The feature names should match with those that were passed during fit.\n"
726
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
727
+ f"Features in the input dataframe : {input_cols}\n"
728
+ )
729
+ input_df = dataset[columns_to_select]
730
+ input_df.columns = features_required_by_estimator
720
731
 
721
732
  transformed_numpy_array = getattr(estimator, inference_method)(
722
733
  input_df
@@ -663,26 +663,37 @@ class RidgeClassifierCV(BaseTransformer):
663
663
  # input cols need to match unquoted / quoted
664
664
  input_cols = self.input_cols
665
665
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
666
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
666
667
 
667
668
  estimator = self._sklearn_object
668
669
 
669
- input_df = dataset[input_cols] # Select input columns with quoted column names.
670
- if hasattr(estimator, "feature_names_in_"):
671
- missing_features = []
672
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
673
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
674
- missing_features.append(f)
675
-
676
- if len(missing_features) > 0:
677
- raise ValueError(
678
- "The feature names should match with those that were passed during fit.\n"
679
- f"Features seen during fit call but not present in the input: {missing_features}\n"
680
- f"Features in the input dataframe : {input_cols}\n"
681
- )
682
- input_df.columns = getattr(estimator, "feature_names_in_")
683
- else:
684
- # Just rename the column names to unquoted identifiers.
685
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
670
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
671
+ missing_features = []
672
+ features_in_dataset = set(dataset.columns)
673
+ columns_to_select = []
674
+ for i, f in enumerate(features_required_by_estimator):
675
+ if (
676
+ i >= len(input_cols)
677
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
678
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
679
+ and quoted_input_cols[i] not in features_in_dataset)
680
+ ):
681
+ missing_features.append(f)
682
+ elif input_cols[i] in features_in_dataset:
683
+ columns_to_select.append(input_cols[i])
684
+ elif unquoted_input_cols[i] in features_in_dataset:
685
+ columns_to_select.append(unquoted_input_cols[i])
686
+ else:
687
+ columns_to_select.append(quoted_input_cols[i])
688
+
689
+ if len(missing_features) > 0:
690
+ raise ValueError(
691
+ "The feature names should match with those that were passed during fit.\n"
692
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
693
+ f"Features in the input dataframe : {input_cols}\n"
694
+ )
695
+ input_df = dataset[columns_to_select]
696
+ input_df.columns = features_required_by_estimator
686
697
 
687
698
  transformed_numpy_array = getattr(estimator, inference_method)(
688
699
  input_df
@@ -684,26 +684,37 @@ class RidgeCV(BaseTransformer):
684
684
  # input cols need to match unquoted / quoted
685
685
  input_cols = self.input_cols
686
686
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
687
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
687
688
 
688
689
  estimator = self._sklearn_object
689
690
 
690
- input_df = dataset[input_cols] # Select input columns with quoted column names.
691
- if hasattr(estimator, "feature_names_in_"):
692
- missing_features = []
693
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
694
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
695
- missing_features.append(f)
696
-
697
- if len(missing_features) > 0:
698
- raise ValueError(
699
- "The feature names should match with those that were passed during fit.\n"
700
- f"Features seen during fit call but not present in the input: {missing_features}\n"
701
- f"Features in the input dataframe : {input_cols}\n"
702
- )
703
- input_df.columns = getattr(estimator, "feature_names_in_")
704
- else:
705
- # Just rename the column names to unquoted identifiers.
706
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
691
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
692
+ missing_features = []
693
+ features_in_dataset = set(dataset.columns)
694
+ columns_to_select = []
695
+ for i, f in enumerate(features_required_by_estimator):
696
+ if (
697
+ i >= len(input_cols)
698
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
699
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
700
+ and quoted_input_cols[i] not in features_in_dataset)
701
+ ):
702
+ missing_features.append(f)
703
+ elif input_cols[i] in features_in_dataset:
704
+ columns_to_select.append(input_cols[i])
705
+ elif unquoted_input_cols[i] in features_in_dataset:
706
+ columns_to_select.append(unquoted_input_cols[i])
707
+ else:
708
+ columns_to_select.append(quoted_input_cols[i])
709
+
710
+ if len(missing_features) > 0:
711
+ raise ValueError(
712
+ "The feature names should match with those that were passed during fit.\n"
713
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
714
+ f"Features in the input dataframe : {input_cols}\n"
715
+ )
716
+ input_df = dataset[columns_to_select]
717
+ input_df.columns = features_required_by_estimator
707
718
 
708
719
  transformed_numpy_array = getattr(estimator, inference_method)(
709
720
  input_df
@@ -803,26 +803,37 @@ class SGDClassifier(BaseTransformer):
803
803
  # input cols need to match unquoted / quoted
804
804
  input_cols = self.input_cols
805
805
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
806
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
806
807
 
807
808
  estimator = self._sklearn_object
808
809
 
809
- input_df = dataset[input_cols] # Select input columns with quoted column names.
810
- if hasattr(estimator, "feature_names_in_"):
811
- missing_features = []
812
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
813
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
814
- missing_features.append(f)
815
-
816
- if len(missing_features) > 0:
817
- raise ValueError(
818
- "The feature names should match with those that were passed during fit.\n"
819
- f"Features seen during fit call but not present in the input: {missing_features}\n"
820
- f"Features in the input dataframe : {input_cols}\n"
821
- )
822
- input_df.columns = getattr(estimator, "feature_names_in_")
823
- else:
824
- # Just rename the column names to unquoted identifiers.
825
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
810
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
811
+ missing_features = []
812
+ features_in_dataset = set(dataset.columns)
813
+ columns_to_select = []
814
+ for i, f in enumerate(features_required_by_estimator):
815
+ if (
816
+ i >= len(input_cols)
817
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
818
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
819
+ and quoted_input_cols[i] not in features_in_dataset)
820
+ ):
821
+ missing_features.append(f)
822
+ elif input_cols[i] in features_in_dataset:
823
+ columns_to_select.append(input_cols[i])
824
+ elif unquoted_input_cols[i] in features_in_dataset:
825
+ columns_to_select.append(unquoted_input_cols[i])
826
+ else:
827
+ columns_to_select.append(quoted_input_cols[i])
828
+
829
+ if len(missing_features) > 0:
830
+ raise ValueError(
831
+ "The feature names should match with those that were passed during fit.\n"
832
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
833
+ f"Features in the input dataframe : {input_cols}\n"
834
+ )
835
+ input_df = dataset[columns_to_select]
836
+ input_df.columns = features_required_by_estimator
826
837
 
827
838
  transformed_numpy_array = getattr(estimator, inference_method)(
828
839
  input_df
@@ -703,26 +703,37 @@ class SGDOneClassSVM(BaseTransformer):
703
703
  # input cols need to match unquoted / quoted
704
704
  input_cols = self.input_cols
705
705
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
706
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
706
707
 
707
708
  estimator = self._sklearn_object
708
709
 
709
- input_df = dataset[input_cols] # Select input columns with quoted column names.
710
- if hasattr(estimator, "feature_names_in_"):
711
- missing_features = []
712
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
713
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
714
- missing_features.append(f)
715
-
716
- if len(missing_features) > 0:
717
- raise ValueError(
718
- "The feature names should match with those that were passed during fit.\n"
719
- f"Features seen during fit call but not present in the input: {missing_features}\n"
720
- f"Features in the input dataframe : {input_cols}\n"
721
- )
722
- input_df.columns = getattr(estimator, "feature_names_in_")
723
- else:
724
- # Just rename the column names to unquoted identifiers.
725
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
710
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
711
+ missing_features = []
712
+ features_in_dataset = set(dataset.columns)
713
+ columns_to_select = []
714
+ for i, f in enumerate(features_required_by_estimator):
715
+ if (
716
+ i >= len(input_cols)
717
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
718
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
719
+ and quoted_input_cols[i] not in features_in_dataset)
720
+ ):
721
+ missing_features.append(f)
722
+ elif input_cols[i] in features_in_dataset:
723
+ columns_to_select.append(input_cols[i])
724
+ elif unquoted_input_cols[i] in features_in_dataset:
725
+ columns_to_select.append(unquoted_input_cols[i])
726
+ else:
727
+ columns_to_select.append(quoted_input_cols[i])
728
+
729
+ if len(missing_features) > 0:
730
+ raise ValueError(
731
+ "The feature names should match with those that were passed during fit.\n"
732
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
733
+ f"Features in the input dataframe : {input_cols}\n"
734
+ )
735
+ input_df = dataset[columns_to_select]
736
+ input_df.columns = features_required_by_estimator
726
737
 
727
738
  transformed_numpy_array = getattr(estimator, inference_method)(
728
739
  input_df
@@ -769,26 +769,37 @@ class SGDRegressor(BaseTransformer):
769
769
  # input cols need to match unquoted / quoted
770
770
  input_cols = self.input_cols
771
771
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
772
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
772
773
 
773
774
  estimator = self._sklearn_object
774
775
 
775
- input_df = dataset[input_cols] # Select input columns with quoted column names.
776
- if hasattr(estimator, "feature_names_in_"):
777
- missing_features = []
778
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
779
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
780
- missing_features.append(f)
781
-
782
- if len(missing_features) > 0:
783
- raise ValueError(
784
- "The feature names should match with those that were passed during fit.\n"
785
- f"Features seen during fit call but not present in the input: {missing_features}\n"
786
- f"Features in the input dataframe : {input_cols}\n"
787
- )
788
- input_df.columns = getattr(estimator, "feature_names_in_")
789
- else:
790
- # Just rename the column names to unquoted identifiers.
791
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
776
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
777
+ missing_features = []
778
+ features_in_dataset = set(dataset.columns)
779
+ columns_to_select = []
780
+ for i, f in enumerate(features_required_by_estimator):
781
+ if (
782
+ i >= len(input_cols)
783
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
784
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
785
+ and quoted_input_cols[i] not in features_in_dataset)
786
+ ):
787
+ missing_features.append(f)
788
+ elif input_cols[i] in features_in_dataset:
789
+ columns_to_select.append(input_cols[i])
790
+ elif unquoted_input_cols[i] in features_in_dataset:
791
+ columns_to_select.append(unquoted_input_cols[i])
792
+ else:
793
+ columns_to_select.append(quoted_input_cols[i])
794
+
795
+ if len(missing_features) > 0:
796
+ raise ValueError(
797
+ "The feature names should match with those that were passed during fit.\n"
798
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
799
+ f"Features in the input dataframe : {input_cols}\n"
800
+ )
801
+ input_df = dataset[columns_to_select]
802
+ input_df.columns = features_required_by_estimator
792
803
 
793
804
  transformed_numpy_array = getattr(estimator, inference_method)(
794
805
  input_df
@@ -671,26 +671,37 @@ class TheilSenRegressor(BaseTransformer):
671
671
  # input cols need to match unquoted / quoted
672
672
  input_cols = self.input_cols
673
673
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
674
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
674
675
 
675
676
  estimator = self._sklearn_object
676
677
 
677
- input_df = dataset[input_cols] # Select input columns with quoted column names.
678
- if hasattr(estimator, "feature_names_in_"):
679
- missing_features = []
680
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
681
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
682
- missing_features.append(f)
683
-
684
- if len(missing_features) > 0:
685
- raise ValueError(
686
- "The feature names should match with those that were passed during fit.\n"
687
- f"Features seen during fit call but not present in the input: {missing_features}\n"
688
- f"Features in the input dataframe : {input_cols}\n"
689
- )
690
- input_df.columns = getattr(estimator, "feature_names_in_")
691
- else:
692
- # Just rename the column names to unquoted identifiers.
693
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
678
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
679
+ missing_features = []
680
+ features_in_dataset = set(dataset.columns)
681
+ columns_to_select = []
682
+ for i, f in enumerate(features_required_by_estimator):
683
+ if (
684
+ i >= len(input_cols)
685
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
686
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
687
+ and quoted_input_cols[i] not in features_in_dataset)
688
+ ):
689
+ missing_features.append(f)
690
+ elif input_cols[i] in features_in_dataset:
691
+ columns_to_select.append(input_cols[i])
692
+ elif unquoted_input_cols[i] in features_in_dataset:
693
+ columns_to_select.append(unquoted_input_cols[i])
694
+ else:
695
+ columns_to_select.append(quoted_input_cols[i])
696
+
697
+ if len(missing_features) > 0:
698
+ raise ValueError(
699
+ "The feature names should match with those that were passed during fit.\n"
700
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
701
+ f"Features in the input dataframe : {input_cols}\n"
702
+ )
703
+ input_df = dataset[columns_to_select]
704
+ input_df.columns = features_required_by_estimator
694
705
 
695
706
  transformed_numpy_array = getattr(estimator, inference_method)(
696
707
  input_df
@@ -697,26 +697,37 @@ class TweedieRegressor(BaseTransformer):
697
697
  # input cols need to match unquoted / quoted
698
698
  input_cols = self.input_cols
699
699
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
700
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
700
701
 
701
702
  estimator = self._sklearn_object
702
703
 
703
- input_df = dataset[input_cols] # Select input columns with quoted column names.
704
- if hasattr(estimator, "feature_names_in_"):
705
- missing_features = []
706
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
707
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
708
- missing_features.append(f)
709
-
710
- if len(missing_features) > 0:
711
- raise ValueError(
712
- "The feature names should match with those that were passed during fit.\n"
713
- f"Features seen during fit call but not present in the input: {missing_features}\n"
714
- f"Features in the input dataframe : {input_cols}\n"
715
- )
716
- input_df.columns = getattr(estimator, "feature_names_in_")
717
- else:
718
- # Just rename the column names to unquoted identifiers.
719
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
704
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
705
+ missing_features = []
706
+ features_in_dataset = set(dataset.columns)
707
+ columns_to_select = []
708
+ for i, f in enumerate(features_required_by_estimator):
709
+ if (
710
+ i >= len(input_cols)
711
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
712
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
713
+ and quoted_input_cols[i] not in features_in_dataset)
714
+ ):
715
+ missing_features.append(f)
716
+ elif input_cols[i] in features_in_dataset:
717
+ columns_to_select.append(input_cols[i])
718
+ elif unquoted_input_cols[i] in features_in_dataset:
719
+ columns_to_select.append(unquoted_input_cols[i])
720
+ else:
721
+ columns_to_select.append(quoted_input_cols[i])
722
+
723
+ if len(missing_features) > 0:
724
+ raise ValueError(
725
+ "The feature names should match with those that were passed during fit.\n"
726
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
727
+ f"Features in the input dataframe : {input_cols}\n"
728
+ )
729
+ input_df = dataset[columns_to_select]
730
+ input_df.columns = features_required_by_estimator
720
731
 
721
732
  transformed_numpy_array = getattr(estimator, inference_method)(
722
733
  input_df
@@ -695,26 +695,37 @@ class Isomap(BaseTransformer):
695
695
  # input cols need to match unquoted / quoted
696
696
  input_cols = self.input_cols
697
697
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
698
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
698
699
 
699
700
  estimator = self._sklearn_object
700
701
 
701
- input_df = dataset[input_cols] # Select input columns with quoted column names.
702
- if hasattr(estimator, "feature_names_in_"):
703
- missing_features = []
704
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
705
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
706
- missing_features.append(f)
707
-
708
- if len(missing_features) > 0:
709
- raise ValueError(
710
- "The feature names should match with those that were passed during fit.\n"
711
- f"Features seen during fit call but not present in the input: {missing_features}\n"
712
- f"Features in the input dataframe : {input_cols}\n"
713
- )
714
- input_df.columns = getattr(estimator, "feature_names_in_")
715
- else:
716
- # Just rename the column names to unquoted identifiers.
717
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
702
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
703
+ missing_features = []
704
+ features_in_dataset = set(dataset.columns)
705
+ columns_to_select = []
706
+ for i, f in enumerate(features_required_by_estimator):
707
+ if (
708
+ i >= len(input_cols)
709
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
710
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
711
+ and quoted_input_cols[i] not in features_in_dataset)
712
+ ):
713
+ missing_features.append(f)
714
+ elif input_cols[i] in features_in_dataset:
715
+ columns_to_select.append(input_cols[i])
716
+ elif unquoted_input_cols[i] in features_in_dataset:
717
+ columns_to_select.append(unquoted_input_cols[i])
718
+ else:
719
+ columns_to_select.append(quoted_input_cols[i])
720
+
721
+ if len(missing_features) > 0:
722
+ raise ValueError(
723
+ "The feature names should match with those that were passed during fit.\n"
724
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
725
+ f"Features in the input dataframe : {input_cols}\n"
726
+ )
727
+ input_df = dataset[columns_to_select]
728
+ input_df.columns = features_required_by_estimator
718
729
 
719
730
  transformed_numpy_array = getattr(estimator, inference_method)(
720
731
  input_df
@@ -678,26 +678,37 @@ class MDS(BaseTransformer):
678
678
  # input cols need to match unquoted / quoted
679
679
  input_cols = self.input_cols
680
680
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
681
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
681
682
 
682
683
  estimator = self._sklearn_object
683
684
 
684
- input_df = dataset[input_cols] # Select input columns with quoted column names.
685
- if hasattr(estimator, "feature_names_in_"):
686
- missing_features = []
687
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
688
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
689
- missing_features.append(f)
690
-
691
- if len(missing_features) > 0:
692
- raise ValueError(
693
- "The feature names should match with those that were passed during fit.\n"
694
- f"Features seen during fit call but not present in the input: {missing_features}\n"
695
- f"Features in the input dataframe : {input_cols}\n"
696
- )
697
- input_df.columns = getattr(estimator, "feature_names_in_")
698
- else:
699
- # Just rename the column names to unquoted identifiers.
700
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
685
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
686
+ missing_features = []
687
+ features_in_dataset = set(dataset.columns)
688
+ columns_to_select = []
689
+ for i, f in enumerate(features_required_by_estimator):
690
+ if (
691
+ i >= len(input_cols)
692
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
693
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
694
+ and quoted_input_cols[i] not in features_in_dataset)
695
+ ):
696
+ missing_features.append(f)
697
+ elif input_cols[i] in features_in_dataset:
698
+ columns_to_select.append(input_cols[i])
699
+ elif unquoted_input_cols[i] in features_in_dataset:
700
+ columns_to_select.append(unquoted_input_cols[i])
701
+ else:
702
+ columns_to_select.append(quoted_input_cols[i])
703
+
704
+ if len(missing_features) > 0:
705
+ raise ValueError(
706
+ "The feature names should match with those that were passed during fit.\n"
707
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
708
+ f"Features in the input dataframe : {input_cols}\n"
709
+ )
710
+ input_df = dataset[columns_to_select]
711
+ input_df.columns = features_required_by_estimator
701
712
 
702
713
  transformed_numpy_array = getattr(estimator, inference_method)(
703
714
  input_df
@@ -680,26 +680,37 @@ class SpectralEmbedding(BaseTransformer):
680
680
  # input cols need to match unquoted / quoted
681
681
  input_cols = self.input_cols
682
682
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
683
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
683
684
 
684
685
  estimator = self._sklearn_object
685
686
 
686
- input_df = dataset[input_cols] # Select input columns with quoted column names.
687
- if hasattr(estimator, "feature_names_in_"):
688
- missing_features = []
689
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
690
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
691
- missing_features.append(f)
692
-
693
- if len(missing_features) > 0:
694
- raise ValueError(
695
- "The feature names should match with those that were passed during fit.\n"
696
- f"Features seen during fit call but not present in the input: {missing_features}\n"
697
- f"Features in the input dataframe : {input_cols}\n"
698
- )
699
- input_df.columns = getattr(estimator, "feature_names_in_")
700
- else:
701
- # Just rename the column names to unquoted identifiers.
702
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
687
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
688
+ missing_features = []
689
+ features_in_dataset = set(dataset.columns)
690
+ columns_to_select = []
691
+ for i, f in enumerate(features_required_by_estimator):
692
+ if (
693
+ i >= len(input_cols)
694
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
695
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
696
+ and quoted_input_cols[i] not in features_in_dataset)
697
+ ):
698
+ missing_features.append(f)
699
+ elif input_cols[i] in features_in_dataset:
700
+ columns_to_select.append(input_cols[i])
701
+ elif unquoted_input_cols[i] in features_in_dataset:
702
+ columns_to_select.append(unquoted_input_cols[i])
703
+ else:
704
+ columns_to_select.append(quoted_input_cols[i])
705
+
706
+ if len(missing_features) > 0:
707
+ raise ValueError(
708
+ "The feature names should match with those that were passed during fit.\n"
709
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
710
+ f"Features in the input dataframe : {input_cols}\n"
711
+ )
712
+ input_df = dataset[columns_to_select]
713
+ input_df.columns = features_required_by_estimator
703
714
 
704
715
  transformed_numpy_array = getattr(estimator, inference_method)(
705
716
  input_df