PyPI - snowflake-ml-python - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (174) hide show

snowflake/ml/_internal/file_utils.py +8 -35
snowflake/ml/_internal/utils/identifier.py +74 -7
snowflake/ml/model/_core_requirements.py +1 -1
snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
snowflake/ml/model/_handlers/_base.py +3 -1
snowflake/ml/model/_handlers/sklearn.py +1 -0
snowflake/ml/model/_handlers/xgboost.py +1 -1
snowflake/ml/model/_model.py +24 -19
snowflake/ml/model/_model_meta.py +24 -15
snowflake/ml/model/type_hints.py +5 -11
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
snowflake/ml/modeling/cluster/birch.py +28 -17
snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
snowflake/ml/modeling/cluster/dbscan.py +28 -17
snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
snowflake/ml/modeling/cluster/k_means.py +28 -17
snowflake/ml/modeling/cluster/mean_shift.py +28 -17
snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
snowflake/ml/modeling/cluster/optics.py +28 -17
snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
snowflake/ml/modeling/compose/column_transformer.py +28 -17
snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
snowflake/ml/modeling/covariance/oas.py +28 -17
snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
snowflake/ml/modeling/decomposition/pca.py +28 -17
snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
snowflake/ml/modeling/impute/knn_imputer.py +28 -17
snowflake/ml/modeling/impute/missing_indicator.py +28 -17
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
snowflake/ml/modeling/linear_model/lars.py +28 -17
snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso.py +28 -17
snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
snowflake/ml/modeling/linear_model/perceptron.py +28 -17
snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ridge.py +28 -17
snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
snowflake/ml/modeling/manifold/isomap.py +28 -17
snowflake/ml/modeling/manifold/mds.py +28 -17
snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
snowflake/ml/modeling/manifold/tsne.py +28 -17
snowflake/ml/modeling/metrics/classification.py +6 -1
snowflake/ml/modeling/metrics/regression.py +517 -9
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
snowflake/ml/modeling/pipeline/pipeline.py +24 -0
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
snowflake/ml/modeling/svm/linear_svc.py +28 -17
snowflake/ml/modeling/svm/linear_svr.py +28 -17
snowflake/ml/modeling/svm/nu_svc.py +28 -17
snowflake/ml/modeling/svm/nu_svr.py +28 -17
snowflake/ml/modeling/svm/svc.py +28 -17
snowflake/ml/modeling/svm/svr.py +28 -17
snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
snowflake/ml/registry/model_registry.py +49 -65
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0

snowflake/ml/modeling/feature_selection/select_k_best.py CHANGED Viewed

@@ -621,26 +621,37 @@ class SelectKBest(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/feature_selection/select_percentile.py CHANGED Viewed

@@ -620,26 +620,37 @@ class SelectPercentile(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/feature_selection/sequential_feature_selector.py CHANGED Viewed

@@ -680,26 +680,37 @@ class SequentialFeatureSelector(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/feature_selection/variance_threshold.py CHANGED Viewed

@@ -613,26 +613,37 @@ class VarianceThreshold(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py CHANGED Viewed

@@ -706,26 +706,37 @@ class GaussianProcessClassifier(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py CHANGED Viewed

@@ -689,26 +689,37 @@ class GaussianProcessRegressor(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/impute/iterative_imputer.py CHANGED Viewed

@@ -732,26 +732,37 @@ class IterativeImputer(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/impute/knn_imputer.py CHANGED Viewed

@@ -667,26 +667,37 @@ class KNNImputer(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/impute/missing_indicator.py CHANGED Viewed

@@ -641,26 +641,37 @@ class MissingIndicator(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py CHANGED Viewed

@@ -616,26 +616,37 @@ class AdditiveChi2Sampler(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/kernel_approximation/nystroem.py CHANGED Viewed

@@ -664,26 +664,37 @@ class Nystroem(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl