snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/ml/_internal/file_utils.py +8 -35
  2. snowflake/ml/_internal/utils/identifier.py +74 -7
  3. snowflake/ml/model/_core_requirements.py +1 -1
  4. snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
  5. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
  6. snowflake/ml/model/_handlers/_base.py +3 -1
  7. snowflake/ml/model/_handlers/sklearn.py +1 -0
  8. snowflake/ml/model/_handlers/xgboost.py +1 -1
  9. snowflake/ml/model/_model.py +24 -19
  10. snowflake/ml/model/_model_meta.py +24 -15
  11. snowflake/ml/model/type_hints.py +5 -11
  12. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
  13. snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
  14. snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
  15. snowflake/ml/modeling/cluster/birch.py +28 -17
  16. snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
  17. snowflake/ml/modeling/cluster/dbscan.py +28 -17
  18. snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
  19. snowflake/ml/modeling/cluster/k_means.py +28 -17
  20. snowflake/ml/modeling/cluster/mean_shift.py +28 -17
  21. snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
  22. snowflake/ml/modeling/cluster/optics.py +28 -17
  23. snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
  24. snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
  25. snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
  26. snowflake/ml/modeling/compose/column_transformer.py +28 -17
  27. snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
  28. snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
  29. snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
  30. snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
  31. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
  32. snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
  33. snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
  34. snowflake/ml/modeling/covariance/oas.py +28 -17
  35. snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
  36. snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
  37. snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
  38. snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
  39. snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
  40. snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
  41. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
  42. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
  43. snowflake/ml/modeling/decomposition/pca.py +28 -17
  44. snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
  45. snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
  46. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
  47. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
  48. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
  49. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
  50. snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
  51. snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
  52. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
  53. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
  54. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
  55. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
  56. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
  57. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
  58. snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
  59. snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
  60. snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
  61. snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
  62. snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
  63. snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
  64. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
  65. snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
  66. snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
  67. snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
  68. snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
  69. snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
  70. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
  71. snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
  72. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
  73. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
  74. snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
  75. snowflake/ml/modeling/impute/knn_imputer.py +28 -17
  76. snowflake/ml/modeling/impute/missing_indicator.py +28 -17
  77. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
  78. snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
  79. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
  80. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
  81. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
  83. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
  84. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
  85. snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
  86. snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
  87. snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
  88. snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
  89. snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
  90. snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
  91. snowflake/ml/modeling/linear_model/lars.py +28 -17
  92. snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
  93. snowflake/ml/modeling/linear_model/lasso.py +28 -17
  94. snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
  95. snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
  96. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
  97. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
  98. snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
  99. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
  100. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
  101. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
  102. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
  103. snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
  104. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
  105. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
  106. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
  107. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
  108. snowflake/ml/modeling/linear_model/perceptron.py +28 -17
  109. snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
  110. snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
  111. snowflake/ml/modeling/linear_model/ridge.py +28 -17
  112. snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
  113. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
  114. snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
  115. snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
  116. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
  117. snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
  118. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
  119. snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
  120. snowflake/ml/modeling/manifold/isomap.py +28 -17
  121. snowflake/ml/modeling/manifold/mds.py +28 -17
  122. snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
  123. snowflake/ml/modeling/manifold/tsne.py +28 -17
  124. snowflake/ml/modeling/metrics/classification.py +6 -1
  125. snowflake/ml/modeling/metrics/regression.py +517 -9
  126. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
  127. snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
  128. snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
  129. snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
  130. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
  131. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
  132. snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
  133. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
  134. snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
  135. snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
  136. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
  140. snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
  141. snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
  142. snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
  143. snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
  144. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
  145. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
  146. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
  147. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
  148. snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
  149. snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
  150. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  151. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  152. snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
  153. snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
  154. snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
  155. snowflake/ml/modeling/svm/linear_svc.py +28 -17
  156. snowflake/ml/modeling/svm/linear_svr.py +28 -17
  157. snowflake/ml/modeling/svm/nu_svc.py +28 -17
  158. snowflake/ml/modeling/svm/nu_svr.py +28 -17
  159. snowflake/ml/modeling/svm/svc.py +28 -17
  160. snowflake/ml/modeling/svm/svr.py +28 -17
  161. snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
  162. snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
  163. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
  164. snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
  165. snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
  166. snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
  167. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
  168. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
  169. snowflake/ml/registry/model_registry.py +49 -65
  170. snowflake/ml/version.py +1 -1
  171. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
  172. snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
  173. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  174. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -647,26 +647,37 @@ class HuberRegressor(BaseTransformer):
647
647
  # input cols need to match unquoted / quoted
648
648
  input_cols = self.input_cols
649
649
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
650
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
650
651
 
651
652
  estimator = self._sklearn_object
652
653
 
653
- input_df = dataset[input_cols] # Select input columns with quoted column names.
654
- if hasattr(estimator, "feature_names_in_"):
655
- missing_features = []
656
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
657
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
658
- missing_features.append(f)
659
-
660
- if len(missing_features) > 0:
661
- raise ValueError(
662
- "The feature names should match with those that were passed during fit.\n"
663
- f"Features seen during fit call but not present in the input: {missing_features}\n"
664
- f"Features in the input dataframe : {input_cols}\n"
665
- )
666
- input_df.columns = getattr(estimator, "feature_names_in_")
667
- else:
668
- # Just rename the column names to unquoted identifiers.
669
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
654
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
655
+ missing_features = []
656
+ features_in_dataset = set(dataset.columns)
657
+ columns_to_select = []
658
+ for i, f in enumerate(features_required_by_estimator):
659
+ if (
660
+ i >= len(input_cols)
661
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
662
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
663
+ and quoted_input_cols[i] not in features_in_dataset)
664
+ ):
665
+ missing_features.append(f)
666
+ elif input_cols[i] in features_in_dataset:
667
+ columns_to_select.append(input_cols[i])
668
+ elif unquoted_input_cols[i] in features_in_dataset:
669
+ columns_to_select.append(unquoted_input_cols[i])
670
+ else:
671
+ columns_to_select.append(quoted_input_cols[i])
672
+
673
+ if len(missing_features) > 0:
674
+ raise ValueError(
675
+ "The feature names should match with those that were passed during fit.\n"
676
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
677
+ f"Features in the input dataframe : {input_cols}\n"
678
+ )
679
+ input_df = dataset[columns_to_select]
680
+ input_df.columns = features_required_by_estimator
670
681
 
671
682
  transformed_numpy_array = getattr(estimator, inference_method)(
672
683
  input_df
@@ -676,26 +676,37 @@ class Lars(BaseTransformer):
676
676
  # input cols need to match unquoted / quoted
677
677
  input_cols = self.input_cols
678
678
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
679
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
679
680
 
680
681
  estimator = self._sklearn_object
681
682
 
682
- input_df = dataset[input_cols] # Select input columns with quoted column names.
683
- if hasattr(estimator, "feature_names_in_"):
684
- missing_features = []
685
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
686
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
687
- missing_features.append(f)
688
-
689
- if len(missing_features) > 0:
690
- raise ValueError(
691
- "The feature names should match with those that were passed during fit.\n"
692
- f"Features seen during fit call but not present in the input: {missing_features}\n"
693
- f"Features in the input dataframe : {input_cols}\n"
694
- )
695
- input_df.columns = getattr(estimator, "feature_names_in_")
696
- else:
697
- # Just rename the column names to unquoted identifiers.
698
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
683
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
684
+ missing_features = []
685
+ features_in_dataset = set(dataset.columns)
686
+ columns_to_select = []
687
+ for i, f in enumerate(features_required_by_estimator):
688
+ if (
689
+ i >= len(input_cols)
690
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
691
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
692
+ and quoted_input_cols[i] not in features_in_dataset)
693
+ ):
694
+ missing_features.append(f)
695
+ elif input_cols[i] in features_in_dataset:
696
+ columns_to_select.append(input_cols[i])
697
+ elif unquoted_input_cols[i] in features_in_dataset:
698
+ columns_to_select.append(unquoted_input_cols[i])
699
+ else:
700
+ columns_to_select.append(quoted_input_cols[i])
701
+
702
+ if len(missing_features) > 0:
703
+ raise ValueError(
704
+ "The feature names should match with those that were passed during fit.\n"
705
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
706
+ f"Features in the input dataframe : {input_cols}\n"
707
+ )
708
+ input_df = dataset[columns_to_select]
709
+ input_df.columns = features_required_by_estimator
699
710
 
700
711
  transformed_numpy_array = getattr(estimator, inference_method)(
701
712
  input_df
@@ -684,26 +684,37 @@ class LarsCV(BaseTransformer):
684
684
  # input cols need to match unquoted / quoted
685
685
  input_cols = self.input_cols
686
686
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
687
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
687
688
 
688
689
  estimator = self._sklearn_object
689
690
 
690
- input_df = dataset[input_cols] # Select input columns with quoted column names.
691
- if hasattr(estimator, "feature_names_in_"):
692
- missing_features = []
693
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
694
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
695
- missing_features.append(f)
696
-
697
- if len(missing_features) > 0:
698
- raise ValueError(
699
- "The feature names should match with those that were passed during fit.\n"
700
- f"Features seen during fit call but not present in the input: {missing_features}\n"
701
- f"Features in the input dataframe : {input_cols}\n"
702
- )
703
- input_df.columns = getattr(estimator, "feature_names_in_")
704
- else:
705
- # Just rename the column names to unquoted identifiers.
706
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
691
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
692
+ missing_features = []
693
+ features_in_dataset = set(dataset.columns)
694
+ columns_to_select = []
695
+ for i, f in enumerate(features_required_by_estimator):
696
+ if (
697
+ i >= len(input_cols)
698
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
699
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
700
+ and quoted_input_cols[i] not in features_in_dataset)
701
+ ):
702
+ missing_features.append(f)
703
+ elif input_cols[i] in features_in_dataset:
704
+ columns_to_select.append(input_cols[i])
705
+ elif unquoted_input_cols[i] in features_in_dataset:
706
+ columns_to_select.append(unquoted_input_cols[i])
707
+ else:
708
+ columns_to_select.append(quoted_input_cols[i])
709
+
710
+ if len(missing_features) > 0:
711
+ raise ValueError(
712
+ "The feature names should match with those that were passed during fit.\n"
713
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
714
+ f"Features in the input dataframe : {input_cols}\n"
715
+ )
716
+ input_df = dataset[columns_to_select]
717
+ input_df.columns = features_required_by_estimator
707
718
 
708
719
  transformed_numpy_array = getattr(estimator, inference_method)(
709
720
  input_df
@@ -677,26 +677,37 @@ class Lasso(BaseTransformer):
677
677
  # input cols need to match unquoted / quoted
678
678
  input_cols = self.input_cols
679
679
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
680
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
680
681
 
681
682
  estimator = self._sklearn_object
682
683
 
683
- input_df = dataset[input_cols] # Select input columns with quoted column names.
684
- if hasattr(estimator, "feature_names_in_"):
685
- missing_features = []
686
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
687
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
688
- missing_features.append(f)
689
-
690
- if len(missing_features) > 0:
691
- raise ValueError(
692
- "The feature names should match with those that were passed during fit.\n"
693
- f"Features seen during fit call but not present in the input: {missing_features}\n"
694
- f"Features in the input dataframe : {input_cols}\n"
695
- )
696
- input_df.columns = getattr(estimator, "feature_names_in_")
697
- else:
698
- # Just rename the column names to unquoted identifiers.
699
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
684
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
685
+ missing_features = []
686
+ features_in_dataset = set(dataset.columns)
687
+ columns_to_select = []
688
+ for i, f in enumerate(features_required_by_estimator):
689
+ if (
690
+ i >= len(input_cols)
691
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
692
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
693
+ and quoted_input_cols[i] not in features_in_dataset)
694
+ ):
695
+ missing_features.append(f)
696
+ elif input_cols[i] in features_in_dataset:
697
+ columns_to_select.append(input_cols[i])
698
+ elif unquoted_input_cols[i] in features_in_dataset:
699
+ columns_to_select.append(unquoted_input_cols[i])
700
+ else:
701
+ columns_to_select.append(quoted_input_cols[i])
702
+
703
+ if len(missing_features) > 0:
704
+ raise ValueError(
705
+ "The feature names should match with those that were passed during fit.\n"
706
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
707
+ f"Features in the input dataframe : {input_cols}\n"
708
+ )
709
+ input_df = dataset[columns_to_select]
710
+ input_df.columns = features_required_by_estimator
700
711
 
701
712
  transformed_numpy_array = getattr(estimator, inference_method)(
702
713
  input_df
@@ -705,26 +705,37 @@ class LassoCV(BaseTransformer):
705
705
  # input cols need to match unquoted / quoted
706
706
  input_cols = self.input_cols
707
707
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
708
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
708
709
 
709
710
  estimator = self._sklearn_object
710
711
 
711
- input_df = dataset[input_cols] # Select input columns with quoted column names.
712
- if hasattr(estimator, "feature_names_in_"):
713
- missing_features = []
714
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
715
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
716
- missing_features.append(f)
717
-
718
- if len(missing_features) > 0:
719
- raise ValueError(
720
- "The feature names should match with those that were passed during fit.\n"
721
- f"Features seen during fit call but not present in the input: {missing_features}\n"
722
- f"Features in the input dataframe : {input_cols}\n"
723
- )
724
- input_df.columns = getattr(estimator, "feature_names_in_")
725
- else:
726
- # Just rename the column names to unquoted identifiers.
727
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
712
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
713
+ missing_features = []
714
+ features_in_dataset = set(dataset.columns)
715
+ columns_to_select = []
716
+ for i, f in enumerate(features_required_by_estimator):
717
+ if (
718
+ i >= len(input_cols)
719
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
720
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
721
+ and quoted_input_cols[i] not in features_in_dataset)
722
+ ):
723
+ missing_features.append(f)
724
+ elif input_cols[i] in features_in_dataset:
725
+ columns_to_select.append(input_cols[i])
726
+ elif unquoted_input_cols[i] in features_in_dataset:
727
+ columns_to_select.append(unquoted_input_cols[i])
728
+ else:
729
+ columns_to_select.append(quoted_input_cols[i])
730
+
731
+ if len(missing_features) > 0:
732
+ raise ValueError(
733
+ "The feature names should match with those that were passed during fit.\n"
734
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
735
+ f"Features in the input dataframe : {input_cols}\n"
736
+ )
737
+ input_df = dataset[columns_to_select]
738
+ input_df.columns = features_required_by_estimator
728
739
 
729
740
  transformed_numpy_array = getattr(estimator, inference_method)(
730
741
  input_df
@@ -697,26 +697,37 @@ class LassoLars(BaseTransformer):
697
697
  # input cols need to match unquoted / quoted
698
698
  input_cols = self.input_cols
699
699
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
700
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
700
701
 
701
702
  estimator = self._sklearn_object
702
703
 
703
- input_df = dataset[input_cols] # Select input columns with quoted column names.
704
- if hasattr(estimator, "feature_names_in_"):
705
- missing_features = []
706
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
707
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
708
- missing_features.append(f)
709
-
710
- if len(missing_features) > 0:
711
- raise ValueError(
712
- "The feature names should match with those that were passed during fit.\n"
713
- f"Features seen during fit call but not present in the input: {missing_features}\n"
714
- f"Features in the input dataframe : {input_cols}\n"
715
- )
716
- input_df.columns = getattr(estimator, "feature_names_in_")
717
- else:
718
- # Just rename the column names to unquoted identifiers.
719
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
704
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
705
+ missing_features = []
706
+ features_in_dataset = set(dataset.columns)
707
+ columns_to_select = []
708
+ for i, f in enumerate(features_required_by_estimator):
709
+ if (
710
+ i >= len(input_cols)
711
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
712
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
713
+ and quoted_input_cols[i] not in features_in_dataset)
714
+ ):
715
+ missing_features.append(f)
716
+ elif input_cols[i] in features_in_dataset:
717
+ columns_to_select.append(input_cols[i])
718
+ elif unquoted_input_cols[i] in features_in_dataset:
719
+ columns_to_select.append(unquoted_input_cols[i])
720
+ else:
721
+ columns_to_select.append(quoted_input_cols[i])
722
+
723
+ if len(missing_features) > 0:
724
+ raise ValueError(
725
+ "The feature names should match with those that were passed during fit.\n"
726
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
727
+ f"Features in the input dataframe : {input_cols}\n"
728
+ )
729
+ input_df = dataset[columns_to_select]
730
+ input_df.columns = features_required_by_estimator
720
731
 
721
732
  transformed_numpy_array = getattr(estimator, inference_method)(
722
733
  input_df
@@ -698,26 +698,37 @@ class LassoLarsCV(BaseTransformer):
698
698
  # input cols need to match unquoted / quoted
699
699
  input_cols = self.input_cols
700
700
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
701
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
701
702
 
702
703
  estimator = self._sklearn_object
703
704
 
704
- input_df = dataset[input_cols] # Select input columns with quoted column names.
705
- if hasattr(estimator, "feature_names_in_"):
706
- missing_features = []
707
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
708
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
709
- missing_features.append(f)
710
-
711
- if len(missing_features) > 0:
712
- raise ValueError(
713
- "The feature names should match with those that were passed during fit.\n"
714
- f"Features seen during fit call but not present in the input: {missing_features}\n"
715
- f"Features in the input dataframe : {input_cols}\n"
716
- )
717
- input_df.columns = getattr(estimator, "feature_names_in_")
718
- else:
719
- # Just rename the column names to unquoted identifiers.
720
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
705
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
706
+ missing_features = []
707
+ features_in_dataset = set(dataset.columns)
708
+ columns_to_select = []
709
+ for i, f in enumerate(features_required_by_estimator):
710
+ if (
711
+ i >= len(input_cols)
712
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
713
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
714
+ and quoted_input_cols[i] not in features_in_dataset)
715
+ ):
716
+ missing_features.append(f)
717
+ elif input_cols[i] in features_in_dataset:
718
+ columns_to_select.append(input_cols[i])
719
+ elif unquoted_input_cols[i] in features_in_dataset:
720
+ columns_to_select.append(unquoted_input_cols[i])
721
+ else:
722
+ columns_to_select.append(quoted_input_cols[i])
723
+
724
+ if len(missing_features) > 0:
725
+ raise ValueError(
726
+ "The feature names should match with those that were passed during fit.\n"
727
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
728
+ f"Features in the input dataframe : {input_cols}\n"
729
+ )
730
+ input_df = dataset[columns_to_select]
731
+ input_df.columns = features_required_by_estimator
721
732
 
722
733
  transformed_numpy_array = getattr(estimator, inference_method)(
723
734
  input_df
@@ -681,26 +681,37 @@ class LassoLarsIC(BaseTransformer):
681
681
  # input cols need to match unquoted / quoted
682
682
  input_cols = self.input_cols
683
683
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
684
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
684
685
 
685
686
  estimator = self._sklearn_object
686
687
 
687
- input_df = dataset[input_cols] # Select input columns with quoted column names.
688
- if hasattr(estimator, "feature_names_in_"):
689
- missing_features = []
690
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
691
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
692
- missing_features.append(f)
693
-
694
- if len(missing_features) > 0:
695
- raise ValueError(
696
- "The feature names should match with those that were passed during fit.\n"
697
- f"Features seen during fit call but not present in the input: {missing_features}\n"
698
- f"Features in the input dataframe : {input_cols}\n"
699
- )
700
- input_df.columns = getattr(estimator, "feature_names_in_")
701
- else:
702
- # Just rename the column names to unquoted identifiers.
703
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
688
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
689
+ missing_features = []
690
+ features_in_dataset = set(dataset.columns)
691
+ columns_to_select = []
692
+ for i, f in enumerate(features_required_by_estimator):
693
+ if (
694
+ i >= len(input_cols)
695
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
696
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
697
+ and quoted_input_cols[i] not in features_in_dataset)
698
+ ):
699
+ missing_features.append(f)
700
+ elif input_cols[i] in features_in_dataset:
701
+ columns_to_select.append(input_cols[i])
702
+ elif unquoted_input_cols[i] in features_in_dataset:
703
+ columns_to_select.append(unquoted_input_cols[i])
704
+ else:
705
+ columns_to_select.append(quoted_input_cols[i])
706
+
707
+ if len(missing_features) > 0:
708
+ raise ValueError(
709
+ "The feature names should match with those that were passed during fit.\n"
710
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
711
+ f"Features in the input dataframe : {input_cols}\n"
712
+ )
713
+ input_df = dataset[columns_to_select]
714
+ input_df.columns = features_required_by_estimator
704
715
 
705
716
  transformed_numpy_array = getattr(estimator, inference_method)(
706
717
  input_df
@@ -634,26 +634,37 @@ class LinearRegression(BaseTransformer):
634
634
  # input cols need to match unquoted / quoted
635
635
  input_cols = self.input_cols
636
636
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
637
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
637
638
 
638
639
  estimator = self._sklearn_object
639
640
 
640
- input_df = dataset[input_cols] # Select input columns with quoted column names.
641
- if hasattr(estimator, "feature_names_in_"):
642
- missing_features = []
643
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
644
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
645
- missing_features.append(f)
646
-
647
- if len(missing_features) > 0:
648
- raise ValueError(
649
- "The feature names should match with those that were passed during fit.\n"
650
- f"Features seen during fit call but not present in the input: {missing_features}\n"
651
- f"Features in the input dataframe : {input_cols}\n"
652
- )
653
- input_df.columns = getattr(estimator, "feature_names_in_")
654
- else:
655
- # Just rename the column names to unquoted identifiers.
656
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
641
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
642
+ missing_features = []
643
+ features_in_dataset = set(dataset.columns)
644
+ columns_to_select = []
645
+ for i, f in enumerate(features_required_by_estimator):
646
+ if (
647
+ i >= len(input_cols)
648
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
649
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
650
+ and quoted_input_cols[i] not in features_in_dataset)
651
+ ):
652
+ missing_features.append(f)
653
+ elif input_cols[i] in features_in_dataset:
654
+ columns_to_select.append(input_cols[i])
655
+ elif unquoted_input_cols[i] in features_in_dataset:
656
+ columns_to_select.append(unquoted_input_cols[i])
657
+ else:
658
+ columns_to_select.append(quoted_input_cols[i])
659
+
660
+ if len(missing_features) > 0:
661
+ raise ValueError(
662
+ "The feature names should match with those that were passed during fit.\n"
663
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
664
+ f"Features in the input dataframe : {input_cols}\n"
665
+ )
666
+ input_df = dataset[columns_to_select]
667
+ input_df.columns = features_required_by_estimator
657
668
 
658
669
  transformed_numpy_array = getattr(estimator, inference_method)(
659
670
  input_df
@@ -748,26 +748,37 @@ class LogisticRegression(BaseTransformer):
748
748
  # input cols need to match unquoted / quoted
749
749
  input_cols = self.input_cols
750
750
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
751
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
751
752
 
752
753
  estimator = self._sklearn_object
753
754
 
754
- input_df = dataset[input_cols] # Select input columns with quoted column names.
755
- if hasattr(estimator, "feature_names_in_"):
756
- missing_features = []
757
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
758
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
759
- missing_features.append(f)
760
-
761
- if len(missing_features) > 0:
762
- raise ValueError(
763
- "The feature names should match with those that were passed during fit.\n"
764
- f"Features seen during fit call but not present in the input: {missing_features}\n"
765
- f"Features in the input dataframe : {input_cols}\n"
766
- )
767
- input_df.columns = getattr(estimator, "feature_names_in_")
768
- else:
769
- # Just rename the column names to unquoted identifiers.
770
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
755
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
756
+ missing_features = []
757
+ features_in_dataset = set(dataset.columns)
758
+ columns_to_select = []
759
+ for i, f in enumerate(features_required_by_estimator):
760
+ if (
761
+ i >= len(input_cols)
762
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
763
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
764
+ and quoted_input_cols[i] not in features_in_dataset)
765
+ ):
766
+ missing_features.append(f)
767
+ elif input_cols[i] in features_in_dataset:
768
+ columns_to_select.append(input_cols[i])
769
+ elif unquoted_input_cols[i] in features_in_dataset:
770
+ columns_to_select.append(unquoted_input_cols[i])
771
+ else:
772
+ columns_to_select.append(quoted_input_cols[i])
773
+
774
+ if len(missing_features) > 0:
775
+ raise ValueError(
776
+ "The feature names should match with those that were passed during fit.\n"
777
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
778
+ f"Features in the input dataframe : {input_cols}\n"
779
+ )
780
+ input_df = dataset[columns_to_select]
781
+ input_df.columns = features_required_by_estimator
771
782
 
772
783
  transformed_numpy_array = getattr(estimator, inference_method)(
773
784
  input_df
@@ -769,26 +769,37 @@ class LogisticRegressionCV(BaseTransformer):
769
769
  # input cols need to match unquoted / quoted
770
770
  input_cols = self.input_cols
771
771
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
772
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
772
773
 
773
774
  estimator = self._sklearn_object
774
775
 
775
- input_df = dataset[input_cols] # Select input columns with quoted column names.
776
- if hasattr(estimator, "feature_names_in_"):
777
- missing_features = []
778
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
779
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
780
- missing_features.append(f)
781
-
782
- if len(missing_features) > 0:
783
- raise ValueError(
784
- "The feature names should match with those that were passed during fit.\n"
785
- f"Features seen during fit call but not present in the input: {missing_features}\n"
786
- f"Features in the input dataframe : {input_cols}\n"
787
- )
788
- input_df.columns = getattr(estimator, "feature_names_in_")
789
- else:
790
- # Just rename the column names to unquoted identifiers.
791
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
776
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
777
+ missing_features = []
778
+ features_in_dataset = set(dataset.columns)
779
+ columns_to_select = []
780
+ for i, f in enumerate(features_required_by_estimator):
781
+ if (
782
+ i >= len(input_cols)
783
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
784
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
785
+ and quoted_input_cols[i] not in features_in_dataset)
786
+ ):
787
+ missing_features.append(f)
788
+ elif input_cols[i] in features_in_dataset:
789
+ columns_to_select.append(input_cols[i])
790
+ elif unquoted_input_cols[i] in features_in_dataset:
791
+ columns_to_select.append(unquoted_input_cols[i])
792
+ else:
793
+ columns_to_select.append(quoted_input_cols[i])
794
+
795
+ if len(missing_features) > 0:
796
+ raise ValueError(
797
+ "The feature names should match with those that were passed during fit.\n"
798
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
799
+ f"Features in the input dataframe : {input_cols}\n"
800
+ )
801
+ input_df = dataset[columns_to_select]
802
+ input_df.columns = features_required_by_estimator
792
803
 
793
804
  transformed_numpy_array = getattr(estimator, inference_method)(
794
805
  input_df