PyPI - snowflake-ml-python - Versions diffs - 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

snowflake-ml-python 1.3.0py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (211) hide show

snowflake/ml/_internal/file_utils.py +3 -3
snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
snowflake/ml/_internal/telemetry.py +11 -2
snowflake/ml/_internal/utils/formatting.py +1 -1
snowflake/ml/feature_store/feature_store.py +15 -106
snowflake/ml/fileset/sfcfs.py +4 -3
snowflake/ml/fileset/stage_fs.py +18 -0
snowflake/ml/model/_api.py +9 -9
snowflake/ml/model/_client/model/model_version_impl.py +20 -15
snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
snowflake/ml/model/_model_composer/model_composer.py +10 -8
snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
snowflake/ml/model/_packager/model_packager.py +8 -6
snowflake/ml/model/custom_model.py +3 -1
snowflake/ml/model/type_hints.py +13 -0
snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
snowflake/ml/modeling/_internal/model_specifications.py +3 -1
snowflake/ml/modeling/_internal/model_trainer.py +2 -2
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
snowflake/ml/modeling/cluster/birch.py +33 -61
snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
snowflake/ml/modeling/cluster/dbscan.py +33 -61
snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
snowflake/ml/modeling/cluster/k_means.py +33 -61
snowflake/ml/modeling/cluster/mean_shift.py +33 -61
snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
snowflake/ml/modeling/cluster/optics.py +33 -61
snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
snowflake/ml/modeling/compose/column_transformer.py +33 -61
snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
snowflake/ml/modeling/covariance/oas.py +33 -61
snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
snowflake/ml/modeling/decomposition/pca.py +33 -61
snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
snowflake/ml/modeling/framework/base.py +55 -5
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
snowflake/ml/modeling/impute/knn_imputer.py +33 -61
snowflake/ml/modeling/impute/missing_indicator.py +33 -61
snowflake/ml/modeling/impute/simple_imputer.py +4 -15
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
snowflake/ml/modeling/linear_model/lars.py +33 -61
snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
snowflake/ml/modeling/linear_model/lasso.py +33 -61
snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
snowflake/ml/modeling/linear_model/perceptron.py +33 -61
snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
snowflake/ml/modeling/linear_model/ridge.py +33 -61
snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
snowflake/ml/modeling/manifold/isomap.py +33 -61
snowflake/ml/modeling/manifold/mds.py +33 -61
snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
snowflake/ml/modeling/manifold/tsne.py +33 -61
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
snowflake/ml/modeling/svm/linear_svc.py +33 -61
snowflake/ml/modeling/svm/linear_svr.py +33 -61
snowflake/ml/modeling/svm/nu_svc.py +33 -61
snowflake/ml/modeling/svm/nu_svr.py +33 -61
snowflake/ml/modeling/svm/svc.py +33 -61
snowflake/ml/modeling/svm/svr.py +33 -61
snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
snowflake/ml/registry/_manager/model_manager.py +6 -2
snowflake/ml/registry/model_registry.py +100 -27
snowflake/ml/registry/registry.py +6 -2
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
{snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
{snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
{snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0

snowflake/ml/modeling/tree/extra_tree_regressor.py CHANGED Viewed

@@ -365,18 +365,24 @@ class ExtraTreeRegressor(BaseTransformer):
         self._get_model_signatures(dataset)
         return self
-    def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
-        if self._drop_input_cols:
-            return []
-        else:
-            return list(set(dataset.columns) - set(self.output_cols))
     def _batch_inference_validate_snowpark(
         self,
         dataset: DataFrame,
         inference_method: str,
     ) -> List[str]:
-        """Util method to run validate that batch inference can be run on a snowpark dataframe.
+        """Util method to run validate that batch inference can be run on a snowpark dataframe and
+        return the available package that exists in the snowflake anaconda channel
+        Args:
+            dataset: snowpark dataframe
+            inference_method: the inference method such as predict, score...
+        Raises:
+            SnowflakeMLException: If the estimator is not fitted, raise error
+            SnowflakeMLException: If the session is None, raise error
+        Returns:
+            A list of available package that exists in the snowflake anaconda channel
         """
         if not self._is_fitted:
             raise exceptions.SnowflakeMLException(
@@ -450,7 +456,7 @@ class ExtraTreeRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session = dataset._session,
                 dependencies = self._deps,
-                pass_through_cols = self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type = expected_type_inferred,
             )
@@ -510,16 +516,16 @@ class ExtraTreeRegressor(BaseTransformer):
                 # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
                 # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
                 # each row containing a list of values.
-                expected_dtype = "ARRAY"
+                expected_dtype = "array"
             # If we were unable to assign a type to this transform in the factory, infer the type here.
             if expected_dtype == "":
-                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
                 if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
-                    expected_dtype = "ARRAY"
-                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                    expected_dtype = "array"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
-                    expected_dtype = "ARRAY"
+                    expected_dtype = "array"
                 else:
                     output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
@@ -537,7 +543,7 @@ class ExtraTreeRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session = dataset._session,
                 dependencies = self._deps,
-                pass_through_cols = self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type = expected_dtype,
             )
@@ -588,7 +594,7 @@ class ExtraTreeRegressor(BaseTransformer):
             subproject=_SUBPROJECT,
         )
         output_result, fitted_estimator = model_trainer.train_fit_predict(
-            pass_through_columns=self._get_pass_through_columns(dataset),
+            drop_input_cols=self._drop_input_cols,
             expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
         )
         self._sklearn_object = fitted_estimator
@@ -606,44 +612,6 @@ class ExtraTreeRegressor(BaseTransformer):
         assert self._sklearn_object is not None
         return self._sklearn_object.embedding_
-    def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
-        """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
-        Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
-        """
-        output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
-        if output_cols:
-            output_cols = [
-                identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
-                for c in output_cols
-            ]
-        elif getattr(self._sklearn_object, "classes_", None) is None:
-            output_cols = [output_cols_prefix]
-        elif self._sklearn_object is not None:
-            classes = self._sklearn_object.classes_
-            if isinstance(classes, numpy.ndarray):
-                output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
-            elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
-                # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
-                output_cols = []
-                for i, cl in enumerate(classes):
-                    # For binary classification, there is only one output column for each class
-                    # ndarray as the two classes are complementary.
-                    if len(cl) == 2:
-                        output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
-                    else:
-                        output_cols.extend([
-                            f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
-                        ])
-        else:
-            output_cols = []
-        # Make sure column names are valid snowflake identifiers.
-        assert output_cols is not None  # Make MyPy happy
-        rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
-        return rv
     @available_if(original_estimator_has_callable("predict_proba"))  # type: ignore[misc]
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,
@@ -683,7 +651,7 @@ class ExtraTreeRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
@@ -748,7 +716,7 @@ class ExtraTreeRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
         elif isinstance(dataset, pd.DataFrame):
@@ -809,7 +777,7 @@ class ExtraTreeRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
@@ -874,7 +842,7 @@ class ExtraTreeRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
@@ -930,13 +898,17 @@ class ExtraTreeRegressor(BaseTransformer):
         transform_kwargs: ScoreKwargsTypedDict = dict()
         if isinstance(dataset, DataFrame):
+            self._deps = self._batch_inference_validate_snowpark(
+                dataset=dataset,
+                inference_method="score",
+            )
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
                 dataset = dataset.select(selected_cols)
             assert isinstance(dataset._session, Session) # keep mypy happy
             transform_kwargs = dict(
                 session=dataset._session,
-                dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
+                dependencies=["snowflake-snowpark-python"] + self._deps,
                 score_sproc_imports=['sklearn'],
             )
         elif isinstance(dataset, pd.DataFrame):
@@ -1010,9 +982,9 @@ class ExtraTreeRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session = dataset._session,
                 dependencies = self._deps,
-                pass_through_cols = self._get_pass_through_columns(dataset),
-                expected_output_cols_type =  "array",
-                n_neighbors =  n_neighbors,
+                drop_input_cols = self._drop_input_cols,
+                expected_output_cols_type="array",
+                n_neighbors = n_neighbors,
                 return_distance =  return_distance
             )
         elif isinstance(dataset, pd.DataFrame):

snowflake/ml/modeling/xgboost/xgb_classifier.py CHANGED Viewed

@@ -483,18 +483,24 @@ class XGBClassifier(BaseTransformer):
         self._get_model_signatures(dataset)
         return self
-    def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
-        if self._drop_input_cols:
-            return []
-        else:
-            return list(set(dataset.columns) - set(self.output_cols))
     def _batch_inference_validate_snowpark(
         self,
         dataset: DataFrame,
         inference_method: str,
     ) -> List[str]:
-        """Util method to run validate that batch inference can be run on a snowpark dataframe.
+        """Util method to run validate that batch inference can be run on a snowpark dataframe and
+        return the available package that exists in the snowflake anaconda channel
+        Args:
+            dataset: snowpark dataframe
+            inference_method: the inference method such as predict, score...
+        Raises:
+            SnowflakeMLException: If the estimator is not fitted, raise error
+            SnowflakeMLException: If the session is None, raise error
+        Returns:
+            A list of available package that exists in the snowflake anaconda channel
         """
         if not self._is_fitted:
             raise exceptions.SnowflakeMLException(
@@ -568,7 +574,7 @@ class XGBClassifier(BaseTransformer):
             transform_kwargs = dict(
                 session = dataset._session,
                 dependencies = self._deps,
-                pass_through_cols = self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type = expected_type_inferred,
             )
@@ -628,16 +634,16 @@ class XGBClassifier(BaseTransformer):
                 # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
                 # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
                 # each row containing a list of values.
-                expected_dtype = "ARRAY"
+                expected_dtype = "array"
             # If we were unable to assign a type to this transform in the factory, infer the type here.
             if expected_dtype == "":
-                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
                 if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
-                    expected_dtype = "ARRAY"
-                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                    expected_dtype = "array"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
-                    expected_dtype = "ARRAY"
+                    expected_dtype = "array"
                 else:
                     output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
@@ -655,7 +661,7 @@ class XGBClassifier(BaseTransformer):
             transform_kwargs = dict(
                 session = dataset._session,
                 dependencies = self._deps,
-                pass_through_cols = self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type = expected_dtype,
             )
@@ -706,7 +712,7 @@ class XGBClassifier(BaseTransformer):
             subproject=_SUBPROJECT,
         )
         output_result, fitted_estimator = model_trainer.train_fit_predict(
-            pass_through_columns=self._get_pass_through_columns(dataset),
+            drop_input_cols=self._drop_input_cols,
             expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
         )
         self._sklearn_object = fitted_estimator
@@ -724,44 +730,6 @@ class XGBClassifier(BaseTransformer):
         assert self._sklearn_object is not None
         return self._sklearn_object.embedding_
-    def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
-        """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
-        Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
-        """
-        output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
-        if output_cols:
-            output_cols = [
-                identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
-                for c in output_cols
-            ]
-        elif getattr(self._sklearn_object, "classes_", None) is None:
-            output_cols = [output_cols_prefix]
-        elif self._sklearn_object is not None:
-            classes = self._sklearn_object.classes_
-            if isinstance(classes, numpy.ndarray):
-                output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
-            elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
-                # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
-                output_cols = []
-                for i, cl in enumerate(classes):
-                    # For binary classification, there is only one output column for each class
-                    # ndarray as the two classes are complementary.
-                    if len(cl) == 2:
-                        output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
-                    else:
-                        output_cols.extend([
-                            f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
-                        ])
-        else:
-            output_cols = []
-        # Make sure column names are valid snowflake identifiers.
-        assert output_cols is not None  # Make MyPy happy
-        rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
-        return rv
     @available_if(original_estimator_has_callable("predict_proba"))  # type: ignore[misc]
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,
@@ -803,7 +771,7 @@ class XGBClassifier(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
@@ -870,7 +838,7 @@ class XGBClassifier(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
         elif isinstance(dataset, pd.DataFrame):
@@ -931,7 +899,7 @@ class XGBClassifier(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
@@ -996,7 +964,7 @@ class XGBClassifier(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
@@ -1052,13 +1020,17 @@ class XGBClassifier(BaseTransformer):
         transform_kwargs: ScoreKwargsTypedDict = dict()
         if isinstance(dataset, DataFrame):
+            self._deps = self._batch_inference_validate_snowpark(
+                dataset=dataset,
+                inference_method="score",
+            )
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
                 dataset = dataset.select(selected_cols)
             assert isinstance(dataset._session, Session) # keep mypy happy
             transform_kwargs = dict(
                 session=dataset._session,
-                dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
+                dependencies=["snowflake-snowpark-python"] + self._deps,
                 score_sproc_imports=['xgboost'],
             )
         elif isinstance(dataset, pd.DataFrame):
@@ -1132,9 +1104,9 @@ class XGBClassifier(BaseTransformer):
             transform_kwargs = dict(
                 session = dataset._session,
                 dependencies = self._deps,
-                pass_through_cols = self._get_pass_through_columns(dataset),
-                expected_output_cols_type =  "array",
-                n_neighbors =  n_neighbors,
+                drop_input_cols = self._drop_input_cols,
+                expected_output_cols_type="array",
+                n_neighbors = n_neighbors,
                 return_distance =  return_distance
             )
         elif isinstance(dataset, pd.DataFrame):

snowflake/ml/modeling/xgboost/xgb_regressor.py CHANGED Viewed

@@ -482,18 +482,24 @@ class XGBRegressor(BaseTransformer):
         self._get_model_signatures(dataset)
         return self
-    def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
-        if self._drop_input_cols:
-            return []
-        else:
-            return list(set(dataset.columns) - set(self.output_cols))
     def _batch_inference_validate_snowpark(
         self,
         dataset: DataFrame,
         inference_method: str,
     ) -> List[str]:
-        """Util method to run validate that batch inference can be run on a snowpark dataframe.
+        """Util method to run validate that batch inference can be run on a snowpark dataframe and
+        return the available package that exists in the snowflake anaconda channel
+        Args:
+            dataset: snowpark dataframe
+            inference_method: the inference method such as predict, score...
+        Raises:
+            SnowflakeMLException: If the estimator is not fitted, raise error
+            SnowflakeMLException: If the session is None, raise error
+        Returns:
+            A list of available package that exists in the snowflake anaconda channel
         """
         if not self._is_fitted:
             raise exceptions.SnowflakeMLException(
@@ -567,7 +573,7 @@ class XGBRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session = dataset._session,
                 dependencies = self._deps,
-                pass_through_cols = self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type = expected_type_inferred,
             )
@@ -627,16 +633,16 @@ class XGBRegressor(BaseTransformer):
                 # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
                 # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
                 # each row containing a list of values.
-                expected_dtype = "ARRAY"
+                expected_dtype = "array"
             # If we were unable to assign a type to this transform in the factory, infer the type here.
             if expected_dtype == "":
-                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
+                # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
                 if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
-                    expected_dtype = "ARRAY"
-                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
+                    expected_dtype = "array"
+                # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
                 elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
-                    expected_dtype = "ARRAY"
+                    expected_dtype = "array"
                 else:
                     output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
                     # We can only infer the output types from the input types if the following two statemetns are true:
@@ -654,7 +660,7 @@ class XGBRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session = dataset._session,
                 dependencies = self._deps,
-                pass_through_cols = self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type = expected_dtype,
             )
@@ -705,7 +711,7 @@ class XGBRegressor(BaseTransformer):
             subproject=_SUBPROJECT,
         )
         output_result, fitted_estimator = model_trainer.train_fit_predict(
-            pass_through_columns=self._get_pass_through_columns(dataset),
+            drop_input_cols=self._drop_input_cols,
             expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
         )
         self._sklearn_object = fitted_estimator
@@ -723,44 +729,6 @@ class XGBRegressor(BaseTransformer):
         assert self._sklearn_object is not None
         return self._sklearn_object.embedding_
-    def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
-        """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
-        Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
-        """
-        output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
-        if output_cols:
-            output_cols = [
-                identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
-                for c in output_cols
-            ]
-        elif getattr(self._sklearn_object, "classes_", None) is None:
-            output_cols = [output_cols_prefix]
-        elif self._sklearn_object is not None:
-            classes = self._sklearn_object.classes_
-            if isinstance(classes, numpy.ndarray):
-                output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
-            elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
-                # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
-                output_cols = []
-                for i, cl in enumerate(classes):
-                    # For binary classification, there is only one output column for each class
-                    # ndarray as the two classes are complementary.
-                    if len(cl) == 2:
-                        output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
-                    else:
-                        output_cols.extend([
-                            f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
-                        ])
-        else:
-            output_cols = []
-        # Make sure column names are valid snowflake identifiers.
-        assert output_cols is not None  # Make MyPy happy
-        rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
-        return rv
     @available_if(original_estimator_has_callable("predict_proba"))  # type: ignore[misc]
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,
@@ -800,7 +768,7 @@ class XGBRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
@@ -865,7 +833,7 @@ class XGBRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
         elif isinstance(dataset, pd.DataFrame):
@@ -926,7 +894,7 @@ class XGBRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
@@ -991,7 +959,7 @@ class XGBRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session=dataset._session,
                 dependencies=self._deps,
-                pass_through_cols=self._get_pass_through_columns(dataset),
+                drop_input_cols = self._drop_input_cols,
                 expected_output_cols_type="float",
             )
@@ -1047,13 +1015,17 @@ class XGBRegressor(BaseTransformer):
         transform_kwargs: ScoreKwargsTypedDict = dict()
         if isinstance(dataset, DataFrame):
+            self._deps = self._batch_inference_validate_snowpark(
+                dataset=dataset,
+                inference_method="score",
+            )
             selected_cols = self._get_active_columns()
             if len(selected_cols) > 0:
                 dataset = dataset.select(selected_cols)
             assert isinstance(dataset._session, Session) # keep mypy happy
             transform_kwargs = dict(
                 session=dataset._session,
-                dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
+                dependencies=["snowflake-snowpark-python"] + self._deps,
                 score_sproc_imports=['xgboost'],
             )
         elif isinstance(dataset, pd.DataFrame):
@@ -1127,9 +1099,9 @@ class XGBRegressor(BaseTransformer):
             transform_kwargs = dict(
                 session = dataset._session,
                 dependencies = self._deps,
-                pass_through_cols = self._get_pass_through_columns(dataset),
-                expected_output_cols_type =  "array",
-                n_neighbors =  n_neighbors,
+                drop_input_cols = self._drop_input_cols,
+                expected_output_cols_type="array",
+                n_neighbors = n_neighbors,
                 return_distance =  return_distance
             )
         elif isinstance(dataset, pd.DataFrame):

snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

snowflake-ml-python 1.3.0py3-none-any.whl → 1.4.0py3-none-any.whl