PyPI - snowflake-ml-python - Versions diffs - 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl - Mend

snowflake-ml-python 1.19.0py3-none-any.whl → 1.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

snowflake/ml/_internal/env_utils.py +16 -0
snowflake/ml/_internal/platform_capabilities.py +36 -0
snowflake/ml/_internal/telemetry.py +56 -7
snowflake/ml/data/_internal/arrow_ingestor.py +67 -2
snowflake/ml/data/data_connector.py +103 -1
snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +8 -2
snowflake/ml/experiment/_entities/run.py +15 -0
snowflake/ml/experiment/callback/keras.py +25 -2
snowflake/ml/experiment/callback/lightgbm.py +27 -2
snowflake/ml/experiment/callback/xgboost.py +25 -2
snowflake/ml/experiment/experiment_tracking.py +123 -13
snowflake/ml/experiment/utils.py +6 -0
snowflake/ml/feature_store/access_manager.py +1 -0
snowflake/ml/feature_store/feature_store.py +1 -1
snowflake/ml/feature_store/feature_view.py +34 -24
snowflake/ml/jobs/_interop/protocols.py +3 -0
snowflake/ml/jobs/_utils/feature_flags.py +1 -0
snowflake/ml/jobs/_utils/payload_utils.py +360 -357
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +95 -8
snowflake/ml/jobs/_utils/scripts/start_mlruntime.sh +92 -0
snowflake/ml/jobs/_utils/scripts/startup.sh +112 -0
snowflake/ml/jobs/_utils/spec_utils.py +2 -406
snowflake/ml/jobs/_utils/stage_utils.py +22 -1
snowflake/ml/jobs/_utils/types.py +14 -7
snowflake/ml/jobs/job.py +8 -9
snowflake/ml/jobs/manager.py +64 -129
snowflake/ml/model/_client/model/inference_engine_utils.py +8 -4
snowflake/ml/model/_client/model/model_version_impl.py +109 -28
snowflake/ml/model/_client/ops/model_ops.py +32 -6
snowflake/ml/model/_client/ops/service_ops.py +9 -4
snowflake/ml/model/_client/sql/service.py +69 -2
snowflake/ml/model/_packager/model_handler.py +8 -2
snowflake/ml/model/_packager/model_handlers/{huggingface_pipeline.py → huggingface.py} +203 -76
snowflake/ml/model/_packager/model_handlers/mlflow.py +6 -1
snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
snowflake/ml/model/_signatures/core.py +305 -8
snowflake/ml/model/_signatures/utils.py +13 -4
snowflake/ml/model/compute_pool.py +2 -0
snowflake/ml/model/models/huggingface.py +285 -0
snowflake/ml/model/models/huggingface_pipeline.py +25 -215
snowflake/ml/model/type_hints.py +5 -1
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
snowflake/ml/monitoring/_client/model_monitor_sql_client.py +12 -0
snowflake/ml/monitoring/_manager/model_monitor_manager.py +12 -0
snowflake/ml/monitoring/entities/model_monitor_config.py +5 -0
snowflake/ml/utils/html_utils.py +67 -1
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/METADATA +94 -7
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/RECORD +52 -48
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/top_level.txt +0 -0

snowflake/ml/experiment/experiment_tracking.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import functools
 import json
 import sys
+import warnings
 from typing import Any, Optional, Union
 from urllib.parse import quote
@@ -27,6 +28,13 @@ class ExperimentTracking:
     Class to manage experiments in Snowflake.
     """
+    _instance = None
+    def __new__(cls, *args: Any, **kwargs: Any) -> "ExperimentTracking":
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
     def __init__(
         self,
         session: snowpark.Session,
@@ -36,6 +44,7 @@ class ExperimentTracking:
     ) -> None:
         """
         Initializes experiment tracking within a pre-created schema.
+        This is a singleton class, so if an instance already exists, it will not reinitialize.
         Args:
             session: The Snowpark Session to connect with Snowflake.
@@ -47,6 +56,21 @@ class ExperimentTracking:
         Raises:
             ValueError: If no database is provided and no active database exists in the session.
         """
+        if hasattr(self, "_initialized"):
+            warnings.warn(
+                "ExperimentTracking is a singleton class. Reusing the existing instance, which has the setting:\n"
+                f"    Database: {self._database_name}, Schema: {self._schema_name}\n"
+                "To change the database or schema, use the database_name and schema_name arguments to set_experiment.",
+                UserWarning,
+                stacklevel=2,
+            )
+            return
+        # Declare types for mypy
+        self._database_name: sql_identifier.SqlIdentifier
+        self._schema_name: sql_identifier.SqlIdentifier
+        self._sql_client: sql_client.ExperimentTrackingSQLClient
         if database_name:
             self._database_name = sql_identifier.SqlIdentifier(database_name)
         elif session_db := session.get_current_database():
@@ -78,6 +102,8 @@ class ExperimentTracking:
         # The run in context
         self._run: Optional[entities.Run] = None
+        self._initialized = True
     def __getstate__(self) -> dict[str, Any]:
         parent_state = (
             super().__getstate__()  # type: ignore[misc] # object.__getstate__ appears in 3.11
@@ -116,19 +142,40 @@ class ExperimentTracking:
     def set_experiment(
         self,
         experiment_name: str,
+        database_name: Optional[str] = None,
+        schema_name: Optional[str] = None,
     ) -> entities.Experiment:
         """
         Set the experiment in context. Creates a new experiment if it doesn't exist.
         Args:
             experiment_name: The name of the experiment.
+            database_name: The name of the database. If None, reuse the current database. Defaults to None.
+            schema_name: The name of the schema. If None, the behavior depends on whether `database_name` is specified.
+                If `database_name` is specified, the schema is set to "PUBLIC".
+                If `database_name` is not specified, reuse the current schema. Defaults to None.
         Returns:
             Experiment: The experiment that was set.
         """
+        if database_name is not None:
+            if schema_name is None:
+                schema_name = "PUBLIC"
+        database_name = (
+            sql_identifier.SqlIdentifier(database_name) if database_name is not None else self._database_name
+        )
+        schema_name = sql_identifier.SqlIdentifier(schema_name) if schema_name is not None else self._schema_name
         experiment_name = sql_identifier.SqlIdentifier(experiment_name)
-        if self._experiment and self._experiment.name == experiment_name:
+        if (
+            self._experiment
+            and self._experiment.name == experiment_name
+            and self._database_name == database_name
+            and self._schema_name == schema_name
+        ):
             return self._experiment
+        self._update_database_and_schema(database_name, schema_name)
         self._sql_client.create_experiment(
             experiment_name=experiment_name,
             creation_mode=sql_client_utils.CreationMode(if_not_exists=True),
@@ -140,15 +187,42 @@ class ExperimentTracking:
     def delete_experiment(
         self,
         experiment_name: str,
+        database_name: Optional[str] = None,
+        schema_name: Optional[str] = None,
     ) -> None:
         """
         Delete an experiment.
         Args:
             experiment_name: The name of the experiment.
+            database_name: The name of the database. If None, reuse the current database.
+                Must be specified if `schema_name` is specified. Defaults to None.
+            schema_name: The name of the schema. If None, reuse the current schema.
+                Must be specified if `database_name` is specified. Defaults to None.
+        Raises:
+            ValueError: If database_name is specified but schema_name is not.
         """
-        self._sql_client.drop_experiment(experiment_name=sql_identifier.SqlIdentifier(experiment_name))
-        if self._experiment and self._experiment.name == experiment_name:
+        if (database_name is None) ^ (schema_name is None):  # if only one of database_name and schema_name is set
+            raise ValueError(
+                "If one of database_name and schema_name is specified, the other one must also be specified."
+            )
+        database_name = (
+            sql_identifier.SqlIdentifier(database_name) if database_name is not None else self._database_name
+        )
+        schema_name = sql_identifier.SqlIdentifier(schema_name) if schema_name is not None else self._schema_name
+        self._sql_client.drop_experiment(
+            database_name=database_name,
+            schema_name=schema_name,
+            experiment_name=sql_identifier.SqlIdentifier(experiment_name),
+        )
+        if (
+            self._experiment
+            and self._experiment.name == experiment_name
+            and self._database_name == database_name
+            and self._schema_name == schema_name
+        ):
             self._experiment = None
             self._run = None
@@ -283,16 +357,26 @@ class ExperimentTracking:
         Args:
             metrics: Dictionary containing metric keys and float values.
             step: The step of the metrics. Defaults to 0.
+        Raises:
+            snowpark.exceptions.SnowparkSQLException: If logging metrics fails due to Snowflake SQL errors,
+                except for run metadata size limit errors which will issue a warning instead of raising.
         """
         run = self._get_or_start_run()
         metrics_list = []
         for key, value in metrics.items():
             metrics_list.append(entities.Metric(key, value, step))
-        self._sql_client.modify_run_add_metrics(
-            experiment_name=run.experiment_name,
-            run_name=run.name,
-            metrics=json.dumps([metric.to_dict() for metric in metrics_list]),
-        )
+        try:
+            self._sql_client.modify_run_add_metrics(
+                experiment_name=run.experiment_name,
+                run_name=run.name,
+                metrics=json.dumps([metric.to_dict() for metric in metrics_list]),
+            )
+        except snowpark.exceptions.SnowparkSQLException as e:
+            if e.sql_error_code == 400003:  # EXPERIMENT_RUN_PROPERTY_SIZE_LIMIT_EXCEEDED
+                run._warn_about_run_metadata_size(e.message)
+            else:
+                raise
     def log_param(
         self,
@@ -318,16 +402,26 @@ class ExperimentTracking:
         Args:
             params: Dictionary containing parameter keys and values. Values can be of any type, but will be converted
                 to string.
+        Raises:
+            snowpark.exceptions.SnowparkSQLException: If logging parameters fails due to Snowflake SQL errors,
+                except for run metadata size limit errors which will issue a warning instead of raising.
         """
         run = self._get_or_start_run()
         params_list = []
         for key, value in params.items():
             params_list.append(entities.Param(key, str(value)))
-        self._sql_client.modify_run_add_params(
-            experiment_name=run.experiment_name,
-            run_name=run.name,
-            params=json.dumps([param.to_dict() for param in params_list]),
-        )
+        try:
+            self._sql_client.modify_run_add_params(
+                experiment_name=run.experiment_name,
+                run_name=run.name,
+                params=json.dumps([param.to_dict() for param in params_list]),
+            )
+        except snowpark.exceptions.SnowparkSQLException as e:
+            if e.sql_error_code == 400003:  # EXPERIMENT_RUN_PROPERTY_SIZE_LIMIT_EXCEEDED
+                run._warn_about_run_metadata_size(e.message)
+            else:
+                raise
     def log_artifact(
         self,
@@ -431,6 +525,22 @@ class ExperimentTracking:
                 return sql_identifier.SqlIdentifier(run_name)
         raise RuntimeError("Random run name generation failed.")
+    def _update_database_and_schema(
+        self, database_name: sql_identifier.SqlIdentifier, schema_name: sql_identifier.SqlIdentifier
+    ) -> None:
+        self._database_name = database_name
+        self._schema_name = schema_name
+        self._sql_client = sql_client.ExperimentTrackingSQLClient(
+            session=self._session,
+            database_name=database_name,
+            schema_name=schema_name,
+        )
+        self._registry = registry.Registry(
+            session=self._session,
+            database_name=database_name,
+            schema_name=schema_name,
+        )
     def _print_urls(
         self,
         experiment_name: sql_identifier.SqlIdentifier,

snowflake/ml/experiment/utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import numbers
 from typing import Any, Union
@@ -12,3 +13,8 @@ def flatten_nested_params(params: Union[list[Any], dict[str, Any]], prefix: str
         else:
             flat_params[new_prefix] = value
     return flat_params
+def is_integer(value: Any) -> bool:
+    """Check if the given value is an integer, excluding booleans."""
+    return isinstance(value, numbers.Integral) and not isinstance(value, bool)

snowflake/ml/feature_store/access_manager.py CHANGED Viewed

@@ -202,6 +202,7 @@ def _configure_role_hierarchy(
     session.sql(f"GRANT ROLE {producer_role} TO ROLE {session.get_current_role()}").collect()
     if consumer_role is not None:
+        # Create CONSUMER and grant it to PRODUCER to build hierarchy
         consumer_role = SqlIdentifier(consumer_role)
         session.sql(f"CREATE ROLE IF NOT EXISTS {consumer_role}").collect()
         session.sql(f"GRANT ROLE {consumer_role} TO ROLE {producer_role}").collect()

snowflake/ml/feature_store/feature_store.py CHANGED Viewed

@@ -1200,7 +1200,7 @@ class FeatureStore:
                 {self._config.database}.INFORMATION_SCHEMA.DYNAMIC_TABLE_REFRESH_HISTORY (RESULT_LIMIT => 10000)
             )
             WHERE NAME = '{fv_resolved_name}'
-            AND SCHEMA_NAME = '{self._config.schema}'
+            AND SCHEMA_NAME = '{self._config.schema.resolved()}'
             """
         )

snowflake/ml/feature_store/feature_view.py CHANGED Viewed

@@ -218,38 +218,48 @@ class FeatureView(lineage_node.LineageNode):
         """
         Create a FeatureView instance.
+        # noqa: DAR101
         Args:
-            name: name of the FeatureView. NOTE: following Snowflake identifier rule
-            entities: entities that the FeatureView is associated with.
-            feature_df: Snowpark DataFrame containing data source and all feature feature_df logics.
-                Final projection of the DataFrame should contain feature names, join keys and timestamp(if applicable).
+            name: The name of the FeatureView. This must follow Snowflake identifier rules.
+            entities: The entities that the FeatureView is associated with.
+            feature_df: The Snowpark DataFrame containing data source and all feature feature_df logic.
+                The final projection of the DataFrame should contain feature names, join keys and timestamp if
+                applicable.
             timestamp_col: name of the timestamp column for point-in-time lookup when consuming the
                 feature values.
-            refresh_freq: Time unit defining how often the new feature data should be generated.
-                Valid args are { <num> { seconds | minutes | hours | days } | DOWNSTREAM | <cron expr> <time zone>}.
-                NOTE: Currently minimum refresh frequency is 1 minute.
-                NOTE: If refresh_freq is in cron expression format, there must be a valid time zone as well.
-                    E.g. * * * * * UTC
-                NOTE: If refresh_freq is not provided, then FeatureView will be registered as View on Snowflake backend
-                    and there won't be extra storage cost.
-            desc: description of the FeatureView.
-            warehouse: warehouse to refresh feature view. Not needed for static feature view (refresh_freq is None).
-                For managed feature view, this warehouse will overwrite the default warehouse of Feature Store if it is
-                specified, otherwise the default warehouse will be used.
+            refresh_freq: Time unit defining how often the new feature data should be generated, in the format
+                ``{ <num> { seconds | minutes | hours | days } | DOWNSTREAM | <cron expr> <time zone>}``.
+                The minimum refresh frequency is 1 minute.
+                When using a ``cron`` format, you must provide a time zone.
+                When you don't provide a refresh value, the ``FeatureView`` is registered as a ``View`` on the Snowflake
+                backend. There are no extra storage costs incurred for this view.
+            desc: Description of the FeatureView.
+            warehouse: The warehouse used to refresh this feature view. Not needed when ``refresh_freq`` is ``None``.
+                This warehouse will overwrite the default warehouse of Feature Store if specified, otherwise the default
+                warehouse will be used.
             initialize: Specifies the behavior of the initial refresh of feature view. This property cannot be altered
                 after you register the feature view. It supports ON_CREATE (default) or ON_SCHEDULE. ON_CREATE refreshes
                 the feature view synchronously at creation. ON_SCHEDULE refreshes the feature view at the next scheduled
                 refresh. It is only effective when refresh_freq is not None.
             refresh_mode: The refresh mode of managed feature view. The value can be 'AUTO', 'FULL' or 'INCREMENTAL'.
-                For managed feature view, the default value is 'AUTO'. For static feature view it has no effect.
-                Check https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table for for details.
-            cluster_by: Columns to cluster the feature view by.
-                - Defaults to the join keys from entities.
-                - If `timestamp_col` is provided, it is added to the default clustering keys.
-            online_config: Optional configuration for online storage. If provided with enable=True,
-                online storage will be enabled. Defaults to None (no online storage).
-                NOTE: this feature is currently in Public Preview.
-            _kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
+                For managed feature view, the default value is 'AUTO'. For static feature view it has no effect. For
+                more information, see
+                `CREATE DYNAMIC TABLE <https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table>`__.
+            cluster_by: Columns to cluster the feature view by. If ``timestamp_col`` is provided, it is added to the
+                default clustering keys. Default is to use the join keys from entities in the view.
+            online_config: Configuration for online storage. If provided with ``enable=True``,
+                online storage will be enabled. Defaults to ``None`` (no online storage).
+                .. note::
+                    This feature is currently in preview.
+            _kwargs: Reserved kwargs for system generated args.
+                .. caution::
+                    Use of additional keywords is prohibited.
         Example::

snowflake/ml/jobs/_interop/protocols.py CHANGED Viewed

@@ -266,6 +266,9 @@ class PandasDataFrameProtocol(SerializationProtocol):
         # TODO: Support partitioned writes for large datasets
         result_path = posixpath.join(dest_dir, self.DEFAULT_PATH_PATTERN.format(0))
+        # stage mount v2 has a bug where it creates an empty file when creating a new file
+        with data_utils.open_stream(result_path, "wb", session=session) as stream:
+            stream.write(b"")  # Dummy write to create the file
         with data_utils.open_stream(result_path, "wb", session=session) as stream:
             obj.to_parquet(stream)

snowflake/ml/jobs/_utils/feature_flags.py CHANGED Viewed

@@ -31,6 +31,7 @@ def parse_bool_env_value(value: Optional[str], default: bool = False) -> bool:
 class FeatureFlags(Enum):
     USE_SUBMIT_JOB_V2 = "MLRS_USE_SUBMIT_JOB_V2"
     ENABLE_RUNTIME_VERSIONS = "MLRS_ENABLE_RUNTIME_VERSIONS"
+    ENABLE_STAGE_MOUNT_V2 = "MLRS_ENABLE_STAGE_MOUNT_V2"
     def is_enabled(self, default: bool = False) -> bool:
         """Check if the feature flag is enabled.

snowflake-ml-python 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl

snowflake-ml-python 1.19.0py3-none-any.whl → 1.21.0py3-none-any.whl