PyPI - snowflake-ml-python - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (174) hide show

snowflake/ml/_internal/file_utils.py +8 -35
snowflake/ml/_internal/utils/identifier.py +74 -7
snowflake/ml/model/_core_requirements.py +1 -1
snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
snowflake/ml/model/_handlers/_base.py +3 -1
snowflake/ml/model/_handlers/sklearn.py +1 -0
snowflake/ml/model/_handlers/xgboost.py +1 -1
snowflake/ml/model/_model.py +24 -19
snowflake/ml/model/_model_meta.py +24 -15
snowflake/ml/model/type_hints.py +5 -11
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
snowflake/ml/modeling/cluster/birch.py +28 -17
snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
snowflake/ml/modeling/cluster/dbscan.py +28 -17
snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
snowflake/ml/modeling/cluster/k_means.py +28 -17
snowflake/ml/modeling/cluster/mean_shift.py +28 -17
snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
snowflake/ml/modeling/cluster/optics.py +28 -17
snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
snowflake/ml/modeling/compose/column_transformer.py +28 -17
snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
snowflake/ml/modeling/covariance/oas.py +28 -17
snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
snowflake/ml/modeling/decomposition/pca.py +28 -17
snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
snowflake/ml/modeling/impute/knn_imputer.py +28 -17
snowflake/ml/modeling/impute/missing_indicator.py +28 -17
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
snowflake/ml/modeling/linear_model/lars.py +28 -17
snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso.py +28 -17
snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
snowflake/ml/modeling/linear_model/perceptron.py +28 -17
snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ridge.py +28 -17
snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
snowflake/ml/modeling/manifold/isomap.py +28 -17
snowflake/ml/modeling/manifold/mds.py +28 -17
snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
snowflake/ml/modeling/manifold/tsne.py +28 -17
snowflake/ml/modeling/metrics/classification.py +6 -1
snowflake/ml/modeling/metrics/regression.py +517 -9
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
snowflake/ml/modeling/pipeline/pipeline.py +24 -0
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
snowflake/ml/modeling/svm/linear_svc.py +28 -17
snowflake/ml/modeling/svm/linear_svr.py +28 -17
snowflake/ml/modeling/svm/nu_svc.py +28 -17
snowflake/ml/modeling/svm/nu_svr.py +28 -17
snowflake/ml/modeling/svm/svc.py +28 -17
snowflake/ml/modeling/svm/svr.py +28 -17
snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
snowflake/ml/registry/model_registry.py +49 -65
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0

snowflake/ml/modeling/neural_network/mlp_regressor.py CHANGED Viewed

@@ -798,26 +798,37 @@ class MLPRegressor(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/pipeline/pipeline.py CHANGED Viewed

@@ -14,6 +14,7 @@ from sklearn.utils import metaestimators
 from snowflake import snowpark
 from snowflake.ml._internal import telemetry
+from snowflake.ml.model.model_signature import ModelSignature, _infer_signature
 from snowflake.ml.modeling.framework import _utils, base
 _PROJECT = "ModelDevelopment"
@@ -103,6 +104,8 @@ class Pipeline(base.BaseTransformer):
         self._transformers_to_input_indices: Dict[str, List[int]] = {}
         self._is_convertable_to_sklearn = True
+        self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
         deps: Set[str] = {f"pandas=={pd.__version__}", f"scikit-learn=={skversion}"}
         for _, obj in steps:
             if isinstance(obj, base.BaseTransformer):
@@ -241,6 +244,7 @@ class Pipeline(base.BaseTransformer):
                 step_name=estimator[0], all_cols=all_cols, input_cols=estimator[1].get_input_cols()
             )
+        self._get_model_signatures(dataset=dataset)
         self._is_fitted = True
         return self
@@ -309,6 +313,7 @@ class Pipeline(base.BaseTransformer):
                 res = estimator[1].fit(transformed_dataset).transform(transformed_dataset)
             return res
+        self._get_model_signatures(dataset=dataset)
         self._is_fitted = True
         return transformed_dataset
@@ -346,6 +351,7 @@ class Pipeline(base.BaseTransformer):
             else:
                 transformed_dataset = estimator[1].fit(transformed_dataset).predict(transformed_dataset)
+        self._get_model_signatures(dataset=dataset)
         self._is_fitted = True
         return transformed_dataset
@@ -559,3 +565,21 @@ class Pipeline(base.BaseTransformer):
     def _get_dependencies(self) -> List[str]:
         return self._deps
+    def _get_model_signatures(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> None:
+        self._model_signature_dict = dict()
+        input_columns = self._get_sanitized_list_of_columns(dataset.columns)
+        inputs_signature = _infer_signature(dataset[input_columns], "input")
+        estimator_step = self._get_estimator()
+        if estimator_step:
+            estimator_signatures = estimator_step[1].model_signatures
+            for method, signature in estimator_signatures.items():
+                self._model_signature_dict[method] = ModelSignature(inputs=inputs_signature, outputs=signature.outputs)
+    @property
+    def model_signatures(self) -> Dict[str, ModelSignature]:
+        if self._model_signature_dict is None:
+            raise RuntimeError("Estimator not fitted before accessing property model_signatures! ")
+        return self._model_signature_dict

snowflake/ml/modeling/preprocessing/one_hot_encoder.py CHANGED Viewed

@@ -800,7 +800,7 @@ class OneHotEncoder(base.BaseTransformer):
         state_df = dataset._session.create_dataframe(state_pandas)
         transformed_dataset = dataset
-        origional_dataset_columns = transformed_dataset.columns[:]
+        original_dataset_columns = transformed_dataset.columns[:]
         all_output_cols = []
         for input_col in self.input_cols:
             output_cols = [
@@ -818,7 +818,7 @@ class OneHotEncoder(base.BaseTransformer):
         transformed_dataset = self._handle_unknown_in_transform(transformed_dataset)
         # Reorder columns. Passthrough columns are added at the right to the output of the transformers.
-        transformed_dataset = transformed_dataset[all_output_cols + origional_dataset_columns]
+        transformed_dataset = transformed_dataset[all_output_cols + original_dataset_columns]
         return transformed_dataset
     def _transform_snowpark_sparse_udf(self, dataset: snowpark.DataFrame) -> snowpark.DataFrame:
@@ -895,15 +895,14 @@ class OneHotEncoder(base.BaseTransformer):
             Output dataset.
         """
         encoder_sklearn = self.to_sklearn()
         transformed_dataset = encoder_sklearn.transform(dataset[self.input_cols])
-        if not self.sparse:
-            dataset = dataset.copy()
-            dataset[self.get_output_cols()] = transformed_dataset
-            return dataset
+        if self.sparse:
+            return transformed_dataset
-        return transformed_dataset
+        dataset = dataset.copy()
+        dataset[self.get_output_cols()] = transformed_dataset
+        return dataset
     def _create_unfitted_sklearn_object(self) -> preprocessing.OneHotEncoder:
         sklearn_args = self.get_sklearn_args(
@@ -1331,17 +1330,17 @@ class OneHotEncoder(base.BaseTransformer):
             Output columns.
         """
         if self.sparse:
-            output_cols = self.output_cols
-        else:
-            output_cols = (
-                [
-                    identifier.quote_name_without_upper_casing(col)
-                    for input_col in self.input_cols
-                    for col in self._dense_output_cols_mappings[input_col]
-                ]
-                if self._dense_output_cols_mappings
-                else []
-            )
+            return self.output_cols
+        output_cols = (
+            [
+                identifier.get_inferred_name(col)
+                for input_col in self.input_cols
+                for col in self._dense_output_cols_mappings[input_col]
+            ]
+            if self._dense_output_cols_mappings
+            else []
+        )
         return output_cols
     def _get_dense_output_cols_mappings(self) -> None:

snowflake/ml/modeling/preprocessing/polynomial_features.py CHANGED Viewed

@@ -639,26 +639,37 @@ class PolynomialFeatures(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/semi_supervised/label_propagation.py CHANGED Viewed

@@ -643,26 +643,37 @@ class LabelPropagation(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/semi_supervised/label_spreading.py CHANGED Viewed

@@ -652,26 +652,37 @@ class LabelSpreading(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/svm/linear_svc.py CHANGED Viewed

@@ -703,26 +703,37 @@ class LinearSVC(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/svm/linear_svr.py CHANGED Viewed

@@ -676,26 +676,37 @@ class LinearSVR(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/svm/nu_svc.py CHANGED Viewed

@@ -714,26 +714,37 @@ class NuSVC(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake/ml/modeling/svm/nu_svr.py CHANGED Viewed

@@ -675,26 +675,37 @@ class NuSVR(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl