PyPI - snowflake-ml-python - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (174) hide show

snowflake/ml/_internal/file_utils.py +8 -35
snowflake/ml/_internal/utils/identifier.py +74 -7
snowflake/ml/model/_core_requirements.py +1 -1
snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
snowflake/ml/model/_handlers/_base.py +3 -1
snowflake/ml/model/_handlers/sklearn.py +1 -0
snowflake/ml/model/_handlers/xgboost.py +1 -1
snowflake/ml/model/_model.py +24 -19
snowflake/ml/model/_model_meta.py +24 -15
snowflake/ml/model/type_hints.py +5 -11
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
snowflake/ml/modeling/cluster/birch.py +28 -17
snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
snowflake/ml/modeling/cluster/dbscan.py +28 -17
snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
snowflake/ml/modeling/cluster/k_means.py +28 -17
snowflake/ml/modeling/cluster/mean_shift.py +28 -17
snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
snowflake/ml/modeling/cluster/optics.py +28 -17
snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
snowflake/ml/modeling/compose/column_transformer.py +28 -17
snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
snowflake/ml/modeling/covariance/oas.py +28 -17
snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
snowflake/ml/modeling/decomposition/pca.py +28 -17
snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
snowflake/ml/modeling/impute/knn_imputer.py +28 -17
snowflake/ml/modeling/impute/missing_indicator.py +28 -17
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
snowflake/ml/modeling/linear_model/lars.py +28 -17
snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso.py +28 -17
snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
snowflake/ml/modeling/linear_model/perceptron.py +28 -17
snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ridge.py +28 -17
snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
snowflake/ml/modeling/manifold/isomap.py +28 -17
snowflake/ml/modeling/manifold/mds.py +28 -17
snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
snowflake/ml/modeling/manifold/tsne.py +28 -17
snowflake/ml/modeling/metrics/classification.py +6 -1
snowflake/ml/modeling/metrics/regression.py +517 -9
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
snowflake/ml/modeling/pipeline/pipeline.py +24 -0
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
snowflake/ml/modeling/svm/linear_svc.py +28 -17
snowflake/ml/modeling/svm/linear_svr.py +28 -17
snowflake/ml/modeling/svm/nu_svc.py +28 -17
snowflake/ml/modeling/svm/nu_svr.py +28 -17
snowflake/ml/modeling/svm/svc.py +28 -17
snowflake/ml/modeling/svm/svr.py +28 -17
snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
snowflake/ml/registry/model_registry.py +49 -65
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0

snowflake/ml/modeling/neighbors/k_neighbors_classifier.py CHANGED Viewed

@@ -688,26 +688,37 @@ class KNeighborsClassifier(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/neighbors/k_neighbors_regressor.py CHANGED Viewed

@@ -690,26 +690,37 @@ class KNeighborsRegressor(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/neighbors/kernel_density.py CHANGED Viewed

@@ -669,26 +669,37 @@ class KernelDensity(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/neighbors/local_outlier_factor.py CHANGED Viewed

@@ -697,26 +697,37 @@ class LocalOutlierFactor(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/neighbors/nearest_centroid.py CHANGED Viewed

@@ -628,26 +628,37 @@ class NearestCentroid(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/neighbors/nearest_neighbors.py CHANGED Viewed

@@ -680,26 +680,37 @@ class NearestNeighbors(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py CHANGED Viewed

@@ -699,26 +699,37 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py CHANGED Viewed

@@ -700,26 +700,37 @@ class RadiusNeighborsClassifier(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py CHANGED Viewed

@@ -690,26 +690,37 @@ class RadiusNeighborsRegressor(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/neural_network/bernoulli_rbm.py CHANGED Viewed

@@ -649,26 +649,37 @@ class BernoulliRBM(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/neural_network/mlp_classifier.py CHANGED Viewed

@@ -802,26 +802,37 @@ class MLPClassifier(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl