snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -667,26 +667,37 @@ class MultiTaskElasticNet(BaseTransformer):
667
667
  # input cols need to match unquoted / quoted
668
668
  input_cols = self.input_cols
669
669
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
670
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
670
671
 
671
672
  estimator = self._sklearn_object
672
673
 
673
- input_df = dataset[input_cols] # Select input columns with quoted column names.
674
- if hasattr(estimator, "feature_names_in_"):
675
- missing_features = []
676
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
677
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
678
- missing_features.append(f)
679
-
680
- if len(missing_features) > 0:
681
- raise ValueError(
682
- "The feature names should match with those that were passed during fit.\n"
683
- f"Features seen during fit call but not present in the input: {missing_features}\n"
684
- f"Features in the input dataframe : {input_cols}\n"
685
- )
686
- input_df.columns = getattr(estimator, "feature_names_in_")
687
- else:
688
- # Just rename the column names to unquoted identifiers.
689
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
674
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
675
+ missing_features = []
676
+ features_in_dataset = set(dataset.columns)
677
+ columns_to_select = []
678
+ for i, f in enumerate(features_required_by_estimator):
679
+ if (
680
+ i >= len(input_cols)
681
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
682
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
683
+ and quoted_input_cols[i] not in features_in_dataset)
684
+ ):
685
+ missing_features.append(f)
686
+ elif input_cols[i] in features_in_dataset:
687
+ columns_to_select.append(input_cols[i])
688
+ elif unquoted_input_cols[i] in features_in_dataset:
689
+ columns_to_select.append(unquoted_input_cols[i])
690
+ else:
691
+ columns_to_select.append(quoted_input_cols[i])
692
+
693
+ if len(missing_features) > 0:
694
+ raise ValueError(
695
+ "The feature names should match with those that were passed during fit.\n"
696
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
697
+ f"Features in the input dataframe : {input_cols}\n"
698
+ )
699
+ input_df = dataset[columns_to_select]
700
+ input_df.columns = features_required_by_estimator
690
701
 
691
702
  transformed_numpy_array = getattr(estimator, inference_method)(
692
703
  input_df
@@ -708,26 +708,37 @@ class MultiTaskElasticNetCV(BaseTransformer):
708
708
  # input cols need to match unquoted / quoted
709
709
  input_cols = self.input_cols
710
710
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
711
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
711
712
 
712
713
  estimator = self._sklearn_object
713
714
 
714
- input_df = dataset[input_cols] # Select input columns with quoted column names.
715
- if hasattr(estimator, "feature_names_in_"):
716
- missing_features = []
717
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
718
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
719
- missing_features.append(f)
720
-
721
- if len(missing_features) > 0:
722
- raise ValueError(
723
- "The feature names should match with those that were passed during fit.\n"
724
- f"Features seen during fit call but not present in the input: {missing_features}\n"
725
- f"Features in the input dataframe : {input_cols}\n"
726
- )
727
- input_df.columns = getattr(estimator, "feature_names_in_")
728
- else:
729
- # Just rename the column names to unquoted identifiers.
730
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
715
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
716
+ missing_features = []
717
+ features_in_dataset = set(dataset.columns)
718
+ columns_to_select = []
719
+ for i, f in enumerate(features_required_by_estimator):
720
+ if (
721
+ i >= len(input_cols)
722
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
723
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
724
+ and quoted_input_cols[i] not in features_in_dataset)
725
+ ):
726
+ missing_features.append(f)
727
+ elif input_cols[i] in features_in_dataset:
728
+ columns_to_select.append(input_cols[i])
729
+ elif unquoted_input_cols[i] in features_in_dataset:
730
+ columns_to_select.append(unquoted_input_cols[i])
731
+ else:
732
+ columns_to_select.append(quoted_input_cols[i])
733
+
734
+ if len(missing_features) > 0:
735
+ raise ValueError(
736
+ "The feature names should match with those that were passed during fit.\n"
737
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
738
+ f"Features in the input dataframe : {input_cols}\n"
739
+ )
740
+ input_df = dataset[columns_to_select]
741
+ input_df.columns = features_required_by_estimator
731
742
 
732
743
  transformed_numpy_array = getattr(estimator, inference_method)(
733
744
  input_df
@@ -659,26 +659,37 @@ class MultiTaskLasso(BaseTransformer):
659
659
  # input cols need to match unquoted / quoted
660
660
  input_cols = self.input_cols
661
661
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
662
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
662
663
 
663
664
  estimator = self._sklearn_object
664
665
 
665
- input_df = dataset[input_cols] # Select input columns with quoted column names.
666
- if hasattr(estimator, "feature_names_in_"):
667
- missing_features = []
668
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
669
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
670
- missing_features.append(f)
671
-
672
- if len(missing_features) > 0:
673
- raise ValueError(
674
- "The feature names should match with those that were passed during fit.\n"
675
- f"Features seen during fit call but not present in the input: {missing_features}\n"
676
- f"Features in the input dataframe : {input_cols}\n"
677
- )
678
- input_df.columns = getattr(estimator, "feature_names_in_")
679
- else:
680
- # Just rename the column names to unquoted identifiers.
681
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
666
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
667
+ missing_features = []
668
+ features_in_dataset = set(dataset.columns)
669
+ columns_to_select = []
670
+ for i, f in enumerate(features_required_by_estimator):
671
+ if (
672
+ i >= len(input_cols)
673
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
674
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
675
+ and quoted_input_cols[i] not in features_in_dataset)
676
+ ):
677
+ missing_features.append(f)
678
+ elif input_cols[i] in features_in_dataset:
679
+ columns_to_select.append(input_cols[i])
680
+ elif unquoted_input_cols[i] in features_in_dataset:
681
+ columns_to_select.append(unquoted_input_cols[i])
682
+ else:
683
+ columns_to_select.append(quoted_input_cols[i])
684
+
685
+ if len(missing_features) > 0:
686
+ raise ValueError(
687
+ "The feature names should match with those that were passed during fit.\n"
688
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
689
+ f"Features in the input dataframe : {input_cols}\n"
690
+ )
691
+ input_df = dataset[columns_to_select]
692
+ input_df.columns = features_required_by_estimator
682
693
 
683
694
  transformed_numpy_array = getattr(estimator, inference_method)(
684
695
  input_df
@@ -694,26 +694,37 @@ class MultiTaskLassoCV(BaseTransformer):
694
694
  # input cols need to match unquoted / quoted
695
695
  input_cols = self.input_cols
696
696
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
697
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
697
698
 
698
699
  estimator = self._sklearn_object
699
700
 
700
- input_df = dataset[input_cols] # Select input columns with quoted column names.
701
- if hasattr(estimator, "feature_names_in_"):
702
- missing_features = []
703
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
704
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
705
- missing_features.append(f)
706
-
707
- if len(missing_features) > 0:
708
- raise ValueError(
709
- "The feature names should match with those that were passed during fit.\n"
710
- f"Features seen during fit call but not present in the input: {missing_features}\n"
711
- f"Features in the input dataframe : {input_cols}\n"
712
- )
713
- input_df.columns = getattr(estimator, "feature_names_in_")
714
- else:
715
- # Just rename the column names to unquoted identifiers.
716
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
701
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
702
+ missing_features = []
703
+ features_in_dataset = set(dataset.columns)
704
+ columns_to_select = []
705
+ for i, f in enumerate(features_required_by_estimator):
706
+ if (
707
+ i >= len(input_cols)
708
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
709
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
710
+ and quoted_input_cols[i] not in features_in_dataset)
711
+ ):
712
+ missing_features.append(f)
713
+ elif input_cols[i] in features_in_dataset:
714
+ columns_to_select.append(input_cols[i])
715
+ elif unquoted_input_cols[i] in features_in_dataset:
716
+ columns_to_select.append(unquoted_input_cols[i])
717
+ else:
718
+ columns_to_select.append(quoted_input_cols[i])
719
+
720
+ if len(missing_features) > 0:
721
+ raise ValueError(
722
+ "The feature names should match with those that were passed during fit.\n"
723
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
724
+ f"Features in the input dataframe : {input_cols}\n"
725
+ )
726
+ input_df = dataset[columns_to_select]
727
+ input_df.columns = features_required_by_estimator
717
728
 
718
729
  transformed_numpy_array = getattr(estimator, inference_method)(
719
730
  input_df
@@ -642,26 +642,37 @@ class OrthogonalMatchingPursuit(BaseTransformer):
642
642
  # input cols need to match unquoted / quoted
643
643
  input_cols = self.input_cols
644
644
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
645
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
645
646
 
646
647
  estimator = self._sklearn_object
647
648
 
648
- input_df = dataset[input_cols] # Select input columns with quoted column names.
649
- if hasattr(estimator, "feature_names_in_"):
650
- missing_features = []
651
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
652
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
653
- missing_features.append(f)
654
-
655
- if len(missing_features) > 0:
656
- raise ValueError(
657
- "The feature names should match with those that were passed during fit.\n"
658
- f"Features seen during fit call but not present in the input: {missing_features}\n"
659
- f"Features in the input dataframe : {input_cols}\n"
660
- )
661
- input_df.columns = getattr(estimator, "feature_names_in_")
662
- else:
663
- # Just rename the column names to unquoted identifiers.
664
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
649
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
650
+ missing_features = []
651
+ features_in_dataset = set(dataset.columns)
652
+ columns_to_select = []
653
+ for i, f in enumerate(features_required_by_estimator):
654
+ if (
655
+ i >= len(input_cols)
656
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
657
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
658
+ and quoted_input_cols[i] not in features_in_dataset)
659
+ ):
660
+ missing_features.append(f)
661
+ elif input_cols[i] in features_in_dataset:
662
+ columns_to_select.append(input_cols[i])
663
+ elif unquoted_input_cols[i] in features_in_dataset:
664
+ columns_to_select.append(unquoted_input_cols[i])
665
+ else:
666
+ columns_to_select.append(quoted_input_cols[i])
667
+
668
+ if len(missing_features) > 0:
669
+ raise ValueError(
670
+ "The feature names should match with those that were passed during fit.\n"
671
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
672
+ f"Features in the input dataframe : {input_cols}\n"
673
+ )
674
+ input_df = dataset[columns_to_select]
675
+ input_df.columns = features_required_by_estimator
665
676
 
666
677
  transformed_numpy_array = getattr(estimator, inference_method)(
667
678
  input_df
@@ -716,26 +716,37 @@ class PassiveAggressiveClassifier(BaseTransformer):
716
716
  # input cols need to match unquoted / quoted
717
717
  input_cols = self.input_cols
718
718
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
719
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
719
720
 
720
721
  estimator = self._sklearn_object
721
722
 
722
- input_df = dataset[input_cols] # Select input columns with quoted column names.
723
- if hasattr(estimator, "feature_names_in_"):
724
- missing_features = []
725
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
726
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
727
- missing_features.append(f)
728
-
729
- if len(missing_features) > 0:
730
- raise ValueError(
731
- "The feature names should match with those that were passed during fit.\n"
732
- f"Features seen during fit call but not present in the input: {missing_features}\n"
733
- f"Features in the input dataframe : {input_cols}\n"
734
- )
735
- input_df.columns = getattr(estimator, "feature_names_in_")
736
- else:
737
- # Just rename the column names to unquoted identifiers.
738
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
723
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
724
+ missing_features = []
725
+ features_in_dataset = set(dataset.columns)
726
+ columns_to_select = []
727
+ for i, f in enumerate(features_required_by_estimator):
728
+ if (
729
+ i >= len(input_cols)
730
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
731
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
732
+ and quoted_input_cols[i] not in features_in_dataset)
733
+ ):
734
+ missing_features.append(f)
735
+ elif input_cols[i] in features_in_dataset:
736
+ columns_to_select.append(input_cols[i])
737
+ elif unquoted_input_cols[i] in features_in_dataset:
738
+ columns_to_select.append(unquoted_input_cols[i])
739
+ else:
740
+ columns_to_select.append(quoted_input_cols[i])
741
+
742
+ if len(missing_features) > 0:
743
+ raise ValueError(
744
+ "The feature names should match with those that were passed during fit.\n"
745
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
746
+ f"Features in the input dataframe : {input_cols}\n"
747
+ )
748
+ input_df = dataset[columns_to_select]
749
+ input_df.columns = features_required_by_estimator
739
750
 
740
751
  transformed_numpy_array = getattr(estimator, inference_method)(
741
752
  input_df
@@ -703,26 +703,37 @@ class PassiveAggressiveRegressor(BaseTransformer):
703
703
  # input cols need to match unquoted / quoted
704
704
  input_cols = self.input_cols
705
705
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
706
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
706
707
 
707
708
  estimator = self._sklearn_object
708
709
 
709
- input_df = dataset[input_cols] # Select input columns with quoted column names.
710
- if hasattr(estimator, "feature_names_in_"):
711
- missing_features = []
712
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
713
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
714
- missing_features.append(f)
715
-
716
- if len(missing_features) > 0:
717
- raise ValueError(
718
- "The feature names should match with those that were passed during fit.\n"
719
- f"Features seen during fit call but not present in the input: {missing_features}\n"
720
- f"Features in the input dataframe : {input_cols}\n"
721
- )
722
- input_df.columns = getattr(estimator, "feature_names_in_")
723
- else:
724
- # Just rename the column names to unquoted identifiers.
725
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
710
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
711
+ missing_features = []
712
+ features_in_dataset = set(dataset.columns)
713
+ columns_to_select = []
714
+ for i, f in enumerate(features_required_by_estimator):
715
+ if (
716
+ i >= len(input_cols)
717
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
718
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
719
+ and quoted_input_cols[i] not in features_in_dataset)
720
+ ):
721
+ missing_features.append(f)
722
+ elif input_cols[i] in features_in_dataset:
723
+ columns_to_select.append(input_cols[i])
724
+ elif unquoted_input_cols[i] in features_in_dataset:
725
+ columns_to_select.append(unquoted_input_cols[i])
726
+ else:
727
+ columns_to_select.append(quoted_input_cols[i])
728
+
729
+ if len(missing_features) > 0:
730
+ raise ValueError(
731
+ "The feature names should match with those that were passed during fit.\n"
732
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
733
+ f"Features in the input dataframe : {input_cols}\n"
734
+ )
735
+ input_df = dataset[columns_to_select]
736
+ input_df.columns = features_required_by_estimator
726
737
 
727
738
  transformed_numpy_array = getattr(estimator, inference_method)(
728
739
  input_df
@@ -716,26 +716,37 @@ class Perceptron(BaseTransformer):
716
716
  # input cols need to match unquoted / quoted
717
717
  input_cols = self.input_cols
718
718
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
719
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
719
720
 
720
721
  estimator = self._sklearn_object
721
722
 
722
- input_df = dataset[input_cols] # Select input columns with quoted column names.
723
- if hasattr(estimator, "feature_names_in_"):
724
- missing_features = []
725
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
726
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
727
- missing_features.append(f)
728
-
729
- if len(missing_features) > 0:
730
- raise ValueError(
731
- "The feature names should match with those that were passed during fit.\n"
732
- f"Features seen during fit call but not present in the input: {missing_features}\n"
733
- f"Features in the input dataframe : {input_cols}\n"
734
- )
735
- input_df.columns = getattr(estimator, "feature_names_in_")
736
- else:
737
- # Just rename the column names to unquoted identifiers.
738
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
723
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
724
+ missing_features = []
725
+ features_in_dataset = set(dataset.columns)
726
+ columns_to_select = []
727
+ for i, f in enumerate(features_required_by_estimator):
728
+ if (
729
+ i >= len(input_cols)
730
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
731
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
732
+ and quoted_input_cols[i] not in features_in_dataset)
733
+ ):
734
+ missing_features.append(f)
735
+ elif input_cols[i] in features_in_dataset:
736
+ columns_to_select.append(input_cols[i])
737
+ elif unquoted_input_cols[i] in features_in_dataset:
738
+ columns_to_select.append(unquoted_input_cols[i])
739
+ else:
740
+ columns_to_select.append(quoted_input_cols[i])
741
+
742
+ if len(missing_features) > 0:
743
+ raise ValueError(
744
+ "The feature names should match with those that were passed during fit.\n"
745
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
746
+ f"Features in the input dataframe : {input_cols}\n"
747
+ )
748
+ input_df = dataset[columns_to_select]
749
+ input_df.columns = features_required_by_estimator
739
750
 
740
751
  transformed_numpy_array = getattr(estimator, inference_method)(
741
752
  input_df
@@ -664,26 +664,37 @@ class PoissonRegressor(BaseTransformer):
664
664
  # input cols need to match unquoted / quoted
665
665
  input_cols = self.input_cols
666
666
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
667
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
667
668
 
668
669
  estimator = self._sklearn_object
669
670
 
670
- input_df = dataset[input_cols] # Select input columns with quoted column names.
671
- if hasattr(estimator, "feature_names_in_"):
672
- missing_features = []
673
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
674
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
675
- missing_features.append(f)
676
-
677
- if len(missing_features) > 0:
678
- raise ValueError(
679
- "The feature names should match with those that were passed during fit.\n"
680
- f"Features seen during fit call but not present in the input: {missing_features}\n"
681
- f"Features in the input dataframe : {input_cols}\n"
682
- )
683
- input_df.columns = getattr(estimator, "feature_names_in_")
684
- else:
685
- # Just rename the column names to unquoted identifiers.
686
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
671
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
672
+ missing_features = []
673
+ features_in_dataset = set(dataset.columns)
674
+ columns_to_select = []
675
+ for i, f in enumerate(features_required_by_estimator):
676
+ if (
677
+ i >= len(input_cols)
678
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
679
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
680
+ and quoted_input_cols[i] not in features_in_dataset)
681
+ ):
682
+ missing_features.append(f)
683
+ elif input_cols[i] in features_in_dataset:
684
+ columns_to_select.append(input_cols[i])
685
+ elif unquoted_input_cols[i] in features_in_dataset:
686
+ columns_to_select.append(unquoted_input_cols[i])
687
+ else:
688
+ columns_to_select.append(quoted_input_cols[i])
689
+
690
+ if len(missing_features) > 0:
691
+ raise ValueError(
692
+ "The feature names should match with those that were passed during fit.\n"
693
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
694
+ f"Features in the input dataframe : {input_cols}\n"
695
+ )
696
+ input_df = dataset[columns_to_select]
697
+ input_df.columns = features_required_by_estimator
687
698
 
688
699
  transformed_numpy_array = getattr(estimator, inference_method)(
689
700
  input_df
@@ -727,26 +727,37 @@ class RANSACRegressor(BaseTransformer):
727
727
  # input cols need to match unquoted / quoted
728
728
  input_cols = self.input_cols
729
729
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
730
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
730
731
 
731
732
  estimator = self._sklearn_object
732
733
 
733
- input_df = dataset[input_cols] # Select input columns with quoted column names.
734
- if hasattr(estimator, "feature_names_in_"):
735
- missing_features = []
736
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
737
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
738
- missing_features.append(f)
739
-
740
- if len(missing_features) > 0:
741
- raise ValueError(
742
- "The feature names should match with those that were passed during fit.\n"
743
- f"Features seen during fit call but not present in the input: {missing_features}\n"
744
- f"Features in the input dataframe : {input_cols}\n"
745
- )
746
- input_df.columns = getattr(estimator, "feature_names_in_")
747
- else:
748
- # Just rename the column names to unquoted identifiers.
749
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
734
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
735
+ missing_features = []
736
+ features_in_dataset = set(dataset.columns)
737
+ columns_to_select = []
738
+ for i, f in enumerate(features_required_by_estimator):
739
+ if (
740
+ i >= len(input_cols)
741
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
742
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
743
+ and quoted_input_cols[i] not in features_in_dataset)
744
+ ):
745
+ missing_features.append(f)
746
+ elif input_cols[i] in features_in_dataset:
747
+ columns_to_select.append(input_cols[i])
748
+ elif unquoted_input_cols[i] in features_in_dataset:
749
+ columns_to_select.append(unquoted_input_cols[i])
750
+ else:
751
+ columns_to_select.append(quoted_input_cols[i])
752
+
753
+ if len(missing_features) > 0:
754
+ raise ValueError(
755
+ "The feature names should match with those that were passed during fit.\n"
756
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
757
+ f"Features in the input dataframe : {input_cols}\n"
758
+ )
759
+ input_df = dataset[columns_to_select]
760
+ input_df.columns = features_required_by_estimator
750
761
 
751
762
  transformed_numpy_array = getattr(estimator, inference_method)(
752
763
  input_df
@@ -697,26 +697,37 @@ class Ridge(BaseTransformer):
697
697
  # input cols need to match unquoted / quoted
698
698
  input_cols = self.input_cols
699
699
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
700
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
700
701
 
701
702
  estimator = self._sklearn_object
702
703
 
703
- input_df = dataset[input_cols] # Select input columns with quoted column names.
704
- if hasattr(estimator, "feature_names_in_"):
705
- missing_features = []
706
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
707
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
708
- missing_features.append(f)
709
-
710
- if len(missing_features) > 0:
711
- raise ValueError(
712
- "The feature names should match with those that were passed during fit.\n"
713
- f"Features seen during fit call but not present in the input: {missing_features}\n"
714
- f"Features in the input dataframe : {input_cols}\n"
715
- )
716
- input_df.columns = getattr(estimator, "feature_names_in_")
717
- else:
718
- # Just rename the column names to unquoted identifiers.
719
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
704
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
705
+ missing_features = []
706
+ features_in_dataset = set(dataset.columns)
707
+ columns_to_select = []
708
+ for i, f in enumerate(features_required_by_estimator):
709
+ if (
710
+ i >= len(input_cols)
711
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
712
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
713
+ and quoted_input_cols[i] not in features_in_dataset)
714
+ ):
715
+ missing_features.append(f)
716
+ elif input_cols[i] in features_in_dataset:
717
+ columns_to_select.append(input_cols[i])
718
+ elif unquoted_input_cols[i] in features_in_dataset:
719
+ columns_to_select.append(unquoted_input_cols[i])
720
+ else:
721
+ columns_to_select.append(quoted_input_cols[i])
722
+
723
+ if len(missing_features) > 0:
724
+ raise ValueError(
725
+ "The feature names should match with those that were passed during fit.\n"
726
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
727
+ f"Features in the input dataframe : {input_cols}\n"
728
+ )
729
+ input_df = dataset[columns_to_select]
730
+ input_df.columns = features_required_by_estimator
720
731
 
721
732
  transformed_numpy_array = getattr(estimator, inference_method)(
722
733
  input_df