PyPI - snowflake-ml-python - Versions diffs - 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl - Mend

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

snowflake/ml/modeling/xgboost/xgbrf_regressor.py CHANGED Viewed

@@ -125,111 +125,171 @@ class XGBRFRegressor(BaseTransformer):
         can seriously hurt performance in gradient boosting. Set the batch_size as large as possible
         based on the available memory.
-    n_estimators: int
+    n_estimators: Optional[int]
             Number of trees in random forest to fit.
-        max_depth:  Optional[int]
+        max_depth:  typing.Optional[int]
             Maximum tree depth for base learners.
-        max_leaves :
+        max_leaves: typing.Optional[int]
             Maximum number of leaves; 0 indicates no limit.
-        max_bin :
+        max_bin: typing.Optional[int]
             If using histogram-based algorithm, maximum number of bins per feature
-        grow_policy :
-            Tree growing policy. 0: favor splitting at nodes closest to the node, i.e. grow
-            depth-wise. 1: favor splitting at nodes with highest loss change.
-        learning_rate: Optional[float]
+        grow_policy: typing.Optional[str]
+            Tree growing policy.
+            - depthwise: Favors splitting at nodes closest to the node,
+            - lossguide: Favors splitting at nodes with highest loss change.
+        learning_rate: typing.Optional[float]
             Boosting learning rate (xgb's "eta")
-        verbosity: Optional[int]
+        verbosity: typing.Optional[int]
             The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
-        objective: typing.Union[str, typing.Callable[[numpy.ndarray, numpy.ndarray], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
-            Specify the learning task and the corresponding learning objective or
-            a custom objective function to be used (see note below).
-        booster: Optional[str]
-            Specify which booster to use: gbtree, gblinear or dart.
-        tree_method: Optional[str]
+        objective: typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
+            Specify the learning task and the corresponding learning objective or a custom
+            objective function to be used.
+            For custom objective, see :doc:`/tutorials/custom_metric_obj` and
+            :ref:`custom-obj-metric` for more information, along with the end note for
+            function signatures.
+        booster: typing.Optional[str]
+            Specify which booster to use: ``gbtree``, ``gblinear`` or ``dart``.
+        tree_method: typing.Optional[str]
             Specify which tree method to use.  Default to auto.  If this parameter is set to
             default, XGBoost will choose the most conservative option available.  It's
             recommended to study this option from the parameters document :doc:`tree method
             </treemethod>`
-        n_jobs: Optional[int]
+        n_jobs: typing.Optional[int]
             Number of parallel threads used to run xgboost.  When used with other
             Scikit-Learn algorithms like grid search, you may choose which algorithm to
             parallelize and balance the threads.  Creating thread contention will
             significantly slow down both algorithms.
-        gamma: Optional[float]
-            (min_split_loss) Minimum loss reduction required to make a further partition on a
-            leaf node of the tree.
-        min_child_weight: Optional[float]
+        gamma: typing.Optional[float]
+            (min_split_loss) Minimum loss reduction required to make a further partition on
+            a leaf node of the tree.
+        min_child_weight: typing.Optional[float]
             Minimum sum of instance weight(hessian) needed in a child.
-        max_delta_step: Optional[float]
+        max_delta_step: typing.Optional[float]
             Maximum delta step we allow each tree's weight estimation to be.
-        subsample: Optional[float]
+        subsample: typing.Optional[float]
             Subsample ratio of the training instance.
-        sampling_method :
-            Sampling method. Used only by `gpu_hist` tree method.
-              - `uniform`: select random training instances uniformly.
-              - `gradient_based` select random training instances with higher probability when
-                the gradient and hessian are larger. (cf. CatBoost)
-        colsample_bytree: Optional[float]
+        sampling_method: typing.Optional[str]
+            Sampling method. Used only by the GPU version of ``hist`` tree method.
+            - ``uniform``: Select random training instances uniformly.
+            - ``gradient_based``: Select random training instances with higher probability
+                when the gradient and hessian are larger. (cf. CatBoost)
+        colsample_bytree: typing.Optional[float]
             Subsample ratio of columns when constructing each tree.
-        colsample_bylevel: Optional[float]
+        colsample_bylevel: typing.Optional[float]
             Subsample ratio of columns for each level.
-        colsample_bynode: Optional[float]
+        colsample_bynode: typing.Optional[float]
             Subsample ratio of columns for each split.
-        reg_alpha: Optional[float]
+        reg_alpha: typing.Optional[float]
             L1 regularization term on weights (xgb's alpha).
-        reg_lambda: Optional[float]
+        reg_lambda: typing.Optional[float]
             L2 regularization term on weights (xgb's lambda).
-        scale_pos_weight: Optional[float]
+        scale_pos_weight: typing.Optional[float]
             Balancing of positive and negative weights.
-        base_score: Optional[float]
+        base_score: typing.Optional[float]
             The initial prediction score of all instances, global bias.
-        random_state: Optional[Union[numpy.random.RandomState, int]]
+        random_state: typing.Union[numpy.random.mtrand.RandomState, numpy.random._generator.Generator, int, NoneType]
             Random number seed.
                Using gblinear booster with shotgun updater is nondeterministic as
                it uses Hogwild algorithm.
-        missing: float, default np.nan
-            Value in the data which needs to be present as a missing value.
-        num_parallel_tree: Optional[int]
+        missing: float
+            Value in the data which needs to be present as a missing value. Default to
+            :py:data:`numpy.nan`.
+        num_parallel_tree: typing.Optional[int]
             Used for boosting random forest.
-        monotone_constraints: Optional[Union[Dict[str, int], str]]
+        monotone_constraints: typing.Union[typing.Dict[str, int], str, NoneType]
             Constraint of variable monotonicity.  See :doc:`tutorial </tutorials/monotonic>`
             for more information.
-        interaction_constraints: Optional[Union[str, List[Tuple[str]]]]
+        interaction_constraints: typing.Union[str, typing.List[typing.Tuple[str]], NoneType]
             Constraints for interaction representing permitted interactions.  The
             constraints must be specified in the form of a nested list, e.g. ``[[0, 1], [2,
             3, 4]]``, where each inner list is a group of indices of features that are
             allowed to interact with each other.  See :doc:`tutorial
             </tutorials/feature_interaction_constraint>` for more information
-        importance_type: Optional[str]
+        importance_type: typing.Optional[str]
             The feature importance type for the feature_importances\_ property:
             * For tree model, it's either "gain", "weight", "cover", "total_gain" or
               "total_cover".
-            * For linear model, only "weight" is defined and it's the normalized coefficients
-              without bias.
+            * For linear model, only "weight" is defined and it's the normalized
+              coefficients without bias.
+        device: typing.Optional[str]
+            Device ordinal, available options are `cpu`, `cuda`, and `gpu`.
+        validate_parameters: typing.Optional[bool]
-        gpu_id: Optional[int]
-            Device ordinal.
-        validate_parameters: Optional[bool]
             Give warnings for unknown parameter.
-        predictor: Optional[str]
-            Force XGBoost to use specific predictor, available choices are [cpu_predictor,
-            gpu_predictor].
         enable_categorical: bool
-            Experimental support for categorical data.  When enabled, cudf/pandas.DataFrame
-            should be used to specify categorical data type.  Also, JSON/UBJSON
-            serialization format is required.
+            See the same parameter of :py:class:`DMatrix` for details.
-        feature_types: FeatureTypes
+        feature_types: typing.Optional[typing.Sequence[str]]
             Used for specifying feature types without constructing a dataframe. See
             :py:class:`DMatrix` for details.
-        max_cat_to_onehot: Optional[int]
+        max_cat_to_onehot: typing.Optional[int]
             A threshold for deciding whether XGBoost should use one-hot encoding based split
             for categorical data.  When number of categories is lesser than the threshold
@@ -238,36 +298,41 @@ class XGBRFRegressor(BaseTransformer):
             categorical feature support. See :doc:`Categorical Data
             </tutorials/categorical>` and :ref:`cat-param` for details.
-        max_cat_threshold: Optional[int]
+        max_cat_threshold: typing.Optional[int]
             Maximum number of categories considered for each split. Used only by
             partition-based splits for preventing over-fitting. Also, `enable_categorical`
             needs to be set to have categorical feature support. See :doc:`Categorical Data
             </tutorials/categorical>` and :ref:`cat-param` for details.
-        eval_metric: Optional[Union[str, List[str], Callable]]
+        multi_strategy: typing.Optional[str]
+            The strategy used for training multi-target models, including multi-target
+            regression and multi-class classification. See :doc:`/tutorials/multioutput` for
+            more information.
+            - ``one_output_per_tree``: One model for each target.
+            - ``multi_output_tree``:  Use multi-target trees.
+        eval_metric: typing.Union[str, typing.List[str], typing.Callable, NoneType]
             Metric used for monitoring the training result and early stopping.  It can be a
             string or list of strings as names of predefined metric in XGBoost (See
-            doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any other
-            user defined metric that looks like `sklearn.metrics`.
+            doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any
+            other user defined metric that looks like `sklearn.metrics`.
             If custom objective is also provided, then custom metric should implement the
             corresponding reverse link function.
             Unlike the `scoring` parameter commonly used in scikit-learn, when a callable
-            object is provided, it's assumed to be a cost function and by default XGBoost will
-            minimize the result during early stopping.
-            For advanced usage on Early stopping like directly choosing to maximize instead of
-            minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
+            object is provided, it's assumed to be a cost function and by default XGBoost
+            will minimize the result during early stopping.
-            See :doc:`Custom Objective and Evaluation Metric </tutorials/custom_metric_obj>`
-            for more.
+            For advanced usage on Early stopping like directly choosing to maximize instead
+            of minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
-                 This parameter replaces `eval_metric` in :py:meth:`fit` method.  The old one
-                 receives un-transformed prediction regardless of whether custom objective is
-                 being used.
+            See :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more
+            information.
                 from sklearn.datasets import load_diabetes
                 from sklearn.metrics import mean_absolute_error
@@ -278,24 +343,29 @@ class XGBRFRegressor(BaseTransformer):
                 )
                 reg.fit(X, y, eval_set=[(X, y)])
-        early_stopping_rounds: Optional[int]
+        early_stopping_rounds: typing.Optional[int]
-            Activates early stopping. Validation metric needs to improve at least once in
-            every **early_stopping_rounds** round(s) to continue training.  Requires at least
-            one item in **eval_set** in :py:meth:`fit`.
+            - Activates early stopping. Validation metric needs to improve at least once in
+              every **early_stopping_rounds** round(s) to continue training.  Requires at
+              least one item in **eval_set** in :py:meth:`fit`.
-            The method returns the model from the last iteration (not the best one).  If
-            there's more than one item in **eval_set**, the last entry will be used for early
-            stopping.  If there's more than one metric in **eval_metric**, the last metric
-            will be used for early stopping.
+            - If early stopping occurs, the model will have two additional attributes:
+              :py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the
+              :py:meth:`predict` and :py:meth:`apply` methods to determine the optimal
+              number of trees during inference. If users want to access the full model
+              (including trees built after early stopping), they can specify the
+              `iteration_range` in these inference methods. In addition, other utilities
+              like model plotting can also use the entire model.
-            If early stopping occurs, the model will have three additional fields:
-            :py:attr:`best_score`, :py:attr:`best_iteration` and
-            :py:attr:`best_ntree_limit`.
+            - If you prefer to discard the trees after `best_iteration`, consider using the
+              callback function :py:class:`xgboost.callback.EarlyStopping`.
-                This parameter replaces `early_stopping_rounds` in :py:meth:`fit` method.
+            - If there's more than one item in **eval_set**, the last entry will be used for
+              early stopping.  If there's more than one metric in **eval_metric**, the last
+              metric will be used for early stopping.
+        callbacks: typing.Optional[typing.List[xgboost.callback.TrainingCallback]]
-        callbacks: Optional[List[TrainingCallback]]
             List of callback functions that are applied at end of each iteration.
             It is possible to use predefined callbacks by using
             :ref:`Callback API <callback_api>`.
@@ -307,9 +377,11 @@ class XGBRFRegressor(BaseTransformer):
                 for params in parameters_grid:
                     # be sure to (re)initialize the callbacks before each run
                     callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]
-                    xgboost.train(params, Xy, callbacks=callbacks)
+                    reg = xgboost.XGBRegressor(**params, callbacks=callbacks)
+                    reg.fit(X, y)
+        kwargs: typing.Optional[typing.Any]
-        kwargs: dict, optional
             Keyword arguments for XGBoost Booster object.  Full documentation of parameters
             can be found :doc:`here </parameter>`.
             Attempting to set a parameter via the constructor args and \*\*kwargs
@@ -320,13 +392,16 @@ class XGBRFRegressor(BaseTransformer):
                 with scikit-learn.
                 A custom objective function can be provided for the ``objective``
-                parameter. In this case, it should have the signature
-                ``objective(y_true, y_pred) -> grad, hess``:
+                parameter. In this case, it should have the signature ``objective(y_true,
+                y_pred) -> [grad, hess]`` or ``objective(y_true, y_pred, *, sample_weight)
+                -> [grad, hess]``:
                 y_true: array_like of shape [n_samples]
                     The target values
                 y_pred: array_like of shape [n_samples]
                     The predicted values
+                sample_weight :
+                    Optional sample weights.
                 grad: array_like of shape [n_samples]
                     The value of the gradient for each sample point.

snowflake/ml/monitoring/_client/model_monitor_sql_client.py CHANGED Viewed

@@ -42,7 +42,7 @@ class ModelMonitorSQLClient:
     def _infer_qualified_schema(
         self, database_name: Optional[sql_identifier.SqlIdentifier], schema_name: Optional[sql_identifier.SqlIdentifier]
     ) -> str:
-        return f"{database_name or self._database_name}.{schema_name or self._schema_name}"
+        return f"""{database_name or self._database_name}.{schema_name or self._schema_name}"""
     def create_model_monitor(
         self,
@@ -74,17 +74,17 @@ class ModelMonitorSQLClient:
     ) -> None:
         baseline_sql = ""
         if baseline:
-            baseline_sql = f"BASELINE='{self._infer_qualified_schema(baseline_database, baseline_schema)}.{baseline}'"
+            baseline_sql = f"""BASELINE={self._infer_qualified_schema(baseline_database, baseline_schema)}.{baseline}"""
         query_result_checker.SqlResultValidator(
             self._sql_client._session,
             f"""
             CREATE MODEL MONITOR {self._infer_qualified_schema(monitor_database, monitor_schema)}.{monitor_name}
                 WITH
-                    MODEL='{self._infer_qualified_schema(model_database, model_schema)}.{model_name}'
+                    MODEL={self._infer_qualified_schema(model_database, model_schema)}.{model_name}
                     VERSION='{version_name}'
                     FUNCTION='{function_name}'
                     WAREHOUSE='{warehouse_name}'
-                    SOURCE='{self._infer_qualified_schema(source_database, source_schema)}.{source}'
+                    SOURCE={self._infer_qualified_schema(source_database, source_schema)}.{source}
                     ID_COLUMNS={_build_sql_list_from_columns(id_columns)}
                     PREDICTION_SCORE_COLUMNS={_build_sql_list_from_columns(prediction_score_columns)}
                     PREDICTION_CLASS_COLUMNS={_build_sql_list_from_columns(prediction_class_columns)}

snowflake/ml/registry/_manager/model_manager.py CHANGED Viewed

@@ -1,13 +1,13 @@
 from types import ModuleType
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 import pandas as pd
 from absl.logging import logging
-from packaging import version
 from snowflake.ml._internal import telemetry
+from snowflake.ml._internal.exceptions import error_codes, exceptions
 from snowflake.ml._internal.human_readable_id import hrid_generator
-from snowflake.ml._internal.utils import snowflake_env, sql_identifier
+from snowflake.ml._internal.utils import sql_identifier
 from snowflake.ml.model import model_signature, type_hints as model_types
 from snowflake.ml.model._client.model import model_impl, model_version_impl
 from snowflake.ml.model._client.ops import metadata_ops, model_ops, service_ops
@@ -50,14 +50,40 @@ class ModelManager:
         python_version: Optional[str] = None,
         signatures: Optional[Dict[str, model_signature.ModelSignature]] = None,
         sample_input_data: Optional[model_types.SupportedDataType] = None,
+        user_files: Optional[Dict[str, List[str]]] = None,
         code_paths: Optional[List[str]] = None,
         ext_modules: Optional[List[ModuleType]] = None,
         task: model_types.Task = model_types.Task.UNKNOWN,
         options: Optional[model_types.ModelSaveOption] = None,
         statement_params: Optional[Dict[str, Any]] = None,
     ) -> model_version_impl.ModelVersion:
-        if not version_name:
-            version_name = self._hrid_generator.generate()[1]
+        database_name_id, schema_name_id, model_name_id = self._parse_fully_qualified_name(model_name)
+        model_exists = self._model_ops.validate_existence(
+            database_name=database_name_id,
+            schema_name=schema_name_id,
+            model_name=model_name_id,
+            statement_params=statement_params,
+        )
+        if version_name is None:
+            if model_exists:
+                versions = self._model_ops.list_models_or_versions(
+                    database_name=database_name_id,
+                    schema_name=schema_name_id,
+                    model_name=model_name_id,
+                    statement_params=statement_params,
+                )
+                for _ in range(1000):
+                    hrid = self._hrid_generator.generate()[1]
+                    if sql_identifier.SqlIdentifier(hrid) not in versions:
+                        version_name = hrid
+                        break
+                if version_name is None:
+                    raise RuntimeError("Random version name generation failed.")
+            else:
+                version_name = self._hrid_generator.generate()[1]
         if isinstance(model, model_version_impl.ModelVersion):
             (
@@ -75,10 +101,24 @@ class ModelManager:
                 schema_name=None,
                 model_name=sql_identifier.SqlIdentifier(model_name),
                 version_name=sql_identifier.SqlIdentifier(version_name),
+                model_exists=model_exists,
                 statement_params=statement_params,
             )
             return self.get_model(model_name=model_name, statement_params=statement_params).version(version_name)
+        version_name_id = sql_identifier.SqlIdentifier(version_name)
+        if model_exists and self._model_ops.validate_existence(
+            database_name=database_name_id,
+            schema_name=schema_name_id,
+            model_name=model_name_id,
+            version_name=version_name_id,
+            statement_params=statement_params,
+        ):
+            raise ValueError(
+                f"Model {model_name} version {version_name} already existed. "
+                + "To auto-generate `version_name`, skip that argument."
+            )
         return self._log_model(
             model=model,
             model_name=model_name,
@@ -91,6 +131,7 @@ class ModelManager:
             python_version=python_version,
             signatures=signatures,
             sample_input_data=sample_input_data,
+            user_files=user_files,
             code_paths=code_paths,
             ext_modules=ext_modules,
             task=task,
@@ -103,7 +144,7 @@ class ModelManager:
         model: model_types.SupportedModelType,
         *,
         model_name: str,
-        version_name: Optional[str] = None,
+        version_name: str,
         comment: Optional[str] = None,
         metrics: Optional[Dict[str, Any]] = None,
         conda_dependencies: Optional[List[str]] = None,
@@ -112,6 +153,7 @@ class ModelManager:
         python_version: Optional[str] = None,
         signatures: Optional[Dict[str, model_signature.ModelSignature]] = None,
         sample_input_data: Optional[model_types.SupportedDataType] = None,
+        user_files: Optional[Dict[str, List[str]]] = None,
         code_paths: Optional[List[str]] = None,
         ext_modules: Optional[List[ModuleType]] = None,
         task: model_types.Task = model_types.Task.UNKNOWN,
@@ -119,28 +161,8 @@ class ModelManager:
         statement_params: Optional[Dict[str, Any]] = None,
     ) -> model_version_impl.ModelVersion:
         database_name_id, schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(model_name)
-        if not version_name:
-            version_name = self._hrid_generator.generate()[1]
         version_name_id = sql_identifier.SqlIdentifier(version_name)
-        if self._model_ops.validate_existence(
-            database_name=database_name_id,
-            schema_name=schema_name_id,
-            model_name=model_name_id,
-            statement_params=statement_params,
-        ) and self._model_ops.validate_existence(
-            database_name=database_name_id,
-            schema_name=schema_name_id,
-            model_name=model_name_id,
-            version_name=version_name_id,
-            statement_params=statement_params,
-        ):
-            raise ValueError(
-                f"Model {model_name} version {version_name} already existed. "
-                + "To auto-generate `version_name`, skip that argument."
-            )
         stage_path = self._model_ops.prepare_model_stage_path(
             database_name=database_name_id,
             schema_name=schema_name_id,
@@ -148,13 +170,10 @@ class ModelManager:
         )
         platforms = None
-        # TODO(jbahk): Remove the version check after Snowflake 8.40.0 release
         # User specified target platforms are defaulted to None and will not show up in the generated manifest.
-        # In the backend, we attempt to create a model for all platforms (WH, SPCS) regardless by default.
-        if snowflake_env.get_current_snowflake_version(self._model_ops._session) >= version.parse("8.40.0"):
+        if target_platforms:
             # Convert any string target platforms to TargetPlatform objects
-            if target_platforms:
-                platforms = [model_types.TargetPlatform(platform) for platform in target_platforms]
+            platforms = [model_types.TargetPlatform(platform) for platform in target_platforms]
         logger.info("Start packaging and uploading your model. It might take some time based on the size of the model.")
@@ -170,6 +189,7 @@ class ModelManager:
             pip_requirements=pip_requirements,
             target_platforms=platforms,
             python_version=python_version,
+            user_files=user_files,
             code_paths=code_paths,
             ext_modules=ext_modules,
             options=options,
@@ -229,7 +249,7 @@ class ModelManager:
         *,
         statement_params: Optional[Dict[str, Any]] = None,
     ) -> model_impl.Model:
-        database_name_id, schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(model_name)
+        database_name_id, schema_name_id, model_name_id = self._parse_fully_qualified_name(model_name)
         if self._model_ops.validate_existence(
             database_name=database_name_id,
             schema_name=schema_name_id,
@@ -289,7 +309,7 @@ class ModelManager:
         *,
         statement_params: Optional[Dict[str, Any]] = None,
     ) -> None:
-        database_name_id, schema_name_id, model_name_id = sql_identifier.parse_fully_qualified_name(model_name)
+        database_name_id, schema_name_id, model_name_id = self._parse_fully_qualified_name(model_name)
         self._model_ops.delete_model_or_version(
             database_name=database_name_id,
@@ -297,3 +317,20 @@ class ModelManager:
             model_name=model_name_id,
             statement_params=statement_params,
         )
+    def _parse_fully_qualified_name(
+        self, model_name: str
+    ) -> Tuple[
+        Optional[sql_identifier.SqlIdentifier], Optional[sql_identifier.SqlIdentifier], sql_identifier.SqlIdentifier
+    ]:
+        try:
+            return sql_identifier.parse_fully_qualified_name(model_name)
+        except ValueError:
+            raise exceptions.SnowflakeMLException(
+                error_code=error_codes.INVALID_ARGUMENT,
+                original_exception=ValueError(
+                    f"The model_name `{model_name}` cannot be parsed as a SQL identifier. Alphanumeric characters and "
+                    "underscores are permitted. See https://docs.snowflake.com/en/sql-reference/identifiers-syntax for "
+                    "more information."
+                ),
+            )

snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.3py3-none-any.whl