PyPI - snowflake-ml-python - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (174) hide show

snowflake/ml/_internal/file_utils.py +8 -35
snowflake/ml/_internal/utils/identifier.py +74 -7
snowflake/ml/model/_core_requirements.py +1 -1
snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
snowflake/ml/model/_handlers/_base.py +3 -1
snowflake/ml/model/_handlers/sklearn.py +1 -0
snowflake/ml/model/_handlers/xgboost.py +1 -1
snowflake/ml/model/_model.py +24 -19
snowflake/ml/model/_model_meta.py +24 -15
snowflake/ml/model/type_hints.py +5 -11
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
snowflake/ml/modeling/cluster/birch.py +28 -17
snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
snowflake/ml/modeling/cluster/dbscan.py +28 -17
snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
snowflake/ml/modeling/cluster/k_means.py +28 -17
snowflake/ml/modeling/cluster/mean_shift.py +28 -17
snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
snowflake/ml/modeling/cluster/optics.py +28 -17
snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
snowflake/ml/modeling/compose/column_transformer.py +28 -17
snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
snowflake/ml/modeling/covariance/oas.py +28 -17
snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
snowflake/ml/modeling/decomposition/pca.py +28 -17
snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
snowflake/ml/modeling/impute/knn_imputer.py +28 -17
snowflake/ml/modeling/impute/missing_indicator.py +28 -17
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
snowflake/ml/modeling/linear_model/lars.py +28 -17
snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso.py +28 -17
snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
snowflake/ml/modeling/linear_model/perceptron.py +28 -17
snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ridge.py +28 -17
snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
snowflake/ml/modeling/manifold/isomap.py +28 -17
snowflake/ml/modeling/manifold/mds.py +28 -17
snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
snowflake/ml/modeling/manifold/tsne.py +28 -17
snowflake/ml/modeling/metrics/classification.py +6 -1
snowflake/ml/modeling/metrics/regression.py +517 -9
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
snowflake/ml/modeling/pipeline/pipeline.py +24 -0
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
snowflake/ml/modeling/svm/linear_svc.py +28 -17
snowflake/ml/modeling/svm/linear_svr.py +28 -17
snowflake/ml/modeling/svm/nu_svc.py +28 -17
snowflake/ml/modeling/svm/nu_svr.py +28 -17
snowflake/ml/modeling/svm/svc.py +28 -17
snowflake/ml/modeling/svm/svr.py +28 -17
snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
snowflake/ml/registry/model_registry.py +49 -65
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0

snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py CHANGED Viewed

@@ -640,26 +640,37 @@ class PolynomialCountSketch(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/kernel_approximation/rbf_sampler.py CHANGED Viewed

@@ -627,26 +627,37 @@ class RBFSampler(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py CHANGED Viewed

@@ -625,26 +625,37 @@ class SkewedChi2Sampler(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/kernel_ridge/kernel_ridge.py CHANGED Viewed

@@ -659,26 +659,37 @@ class KernelRidge(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/lightgbm/lgbm_classifier.py CHANGED Viewed

@@ -648,26 +648,37 @@ class LGBMClassifier(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/lightgbm/lgbm_regressor.py CHANGED Viewed

@@ -648,26 +648,37 @@ class LGBMRegressor(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/linear_model/ard_regression.py CHANGED Viewed

@@ -668,26 +668,37 @@ class ARDRegression(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/linear_model/bayesian_ridge.py CHANGED Viewed

@@ -677,26 +677,37 @@ class BayesianRidge(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/linear_model/elastic_net.py CHANGED Viewed

@@ -683,26 +683,37 @@ class ElasticNet(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/linear_model/elastic_net_cv.py CHANGED Viewed

@@ -719,26 +719,37 @@ class ElasticNetCV(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/linear_model/gamma_regressor.py CHANGED Viewed

@@ -664,26 +664,37 @@ class GammaRegressor(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl