PyPI - snowflake-ml-python - Versions diffs - 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl - Mend

snowflake-ml-python 1.20.0py3-none-any.whl → 1.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

snowflake/ml/_internal/platform_capabilities.py +36 -0
snowflake/ml/_internal/utils/url.py +42 -0
snowflake/ml/data/_internal/arrow_ingestor.py +67 -2
snowflake/ml/data/data_connector.py +103 -1
snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +8 -2
snowflake/ml/experiment/callback/__init__.py +0 -0
snowflake/ml/experiment/callback/keras.py +25 -2
snowflake/ml/experiment/callback/lightgbm.py +27 -2
snowflake/ml/experiment/callback/xgboost.py +25 -2
snowflake/ml/experiment/experiment_tracking.py +93 -3
snowflake/ml/experiment/utils.py +6 -0
snowflake/ml/feature_store/feature_view.py +34 -24
snowflake/ml/jobs/_interop/protocols.py +3 -0
snowflake/ml/jobs/_utils/constants.py +1 -0
snowflake/ml/jobs/_utils/payload_utils.py +354 -356
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +95 -8
snowflake/ml/jobs/_utils/scripts/start_mlruntime.sh +92 -0
snowflake/ml/jobs/_utils/scripts/startup.sh +112 -0
snowflake/ml/jobs/_utils/spec_utils.py +1 -445
snowflake/ml/jobs/_utils/stage_utils.py +22 -1
snowflake/ml/jobs/_utils/types.py +14 -7
snowflake/ml/jobs/job.py +2 -8
snowflake/ml/jobs/manager.py +57 -135
snowflake/ml/lineage/lineage_node.py +1 -1
snowflake/ml/model/__init__.py +6 -0
snowflake/ml/model/_client/model/batch_inference_specs.py +16 -1
snowflake/ml/model/_client/model/model_version_impl.py +130 -14
snowflake/ml/model/_client/ops/deployment_step.py +36 -0
snowflake/ml/model/_client/ops/model_ops.py +93 -8
snowflake/ml/model/_client/ops/service_ops.py +32 -52
snowflake/ml/model/_client/service/import_model_spec_schema.py +23 -0
snowflake/ml/model/_client/service/model_deployment_spec.py +12 -4
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +3 -0
snowflake/ml/model/_client/sql/model_version.py +30 -6
snowflake/ml/model/_client/sql/service.py +94 -5
snowflake/ml/model/_model_composer/model_composer.py +1 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +5 -0
snowflake/ml/model/_model_composer/model_method/model_method.py +61 -2
snowflake/ml/model/_packager/model_handler.py +8 -2
snowflake/ml/model/_packager/model_handlers/custom.py +52 -0
snowflake/ml/model/_packager/model_handlers/{huggingface_pipeline.py → huggingface.py} +203 -76
snowflake/ml/model/_packager/model_handlers/mlflow.py +6 -1
snowflake/ml/model/_packager/model_handlers/xgboost.py +26 -1
snowflake/ml/model/_packager/model_meta/model_meta.py +40 -7
snowflake/ml/model/_packager/model_packager.py +1 -1
snowflake/ml/model/_signatures/core.py +390 -8
snowflake/ml/model/_signatures/utils.py +13 -4
snowflake/ml/model/code_path.py +104 -0
snowflake/ml/model/compute_pool.py +2 -0
snowflake/ml/model/custom_model.py +55 -13
snowflake/ml/model/model_signature.py +13 -1
snowflake/ml/model/models/huggingface.py +285 -0
snowflake/ml/model/models/huggingface_pipeline.py +19 -208
snowflake/ml/model/type_hints.py +7 -1
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
snowflake/ml/monitoring/_client/model_monitor_sql_client.py +12 -0
snowflake/ml/monitoring/_manager/model_monitor_manager.py +12 -0
snowflake/ml/monitoring/entities/model_monitor_config.py +5 -0
snowflake/ml/registry/_manager/model_manager.py +230 -15
snowflake/ml/registry/registry.py +4 -4
snowflake/ml/utils/html_utils.py +67 -1
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/METADATA +81 -7
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/RECORD +67 -59
snowflake/ml/jobs/_utils/runtime_env_utils.py +0 -63
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/top_level.txt +0 -0

snowflake/ml/_internal/platform_capabilities.py CHANGED Viewed

@@ -18,6 +18,8 @@ logger = logging.getLogger(__name__)
 LIVE_COMMIT_PARAMETER = "ENABLE_LIVE_VERSION_IN_SDK"
 INLINE_DEPLOYMENT_SPEC_PARAMETER = "ENABLE_INLINE_DEPLOYMENT_SPEC_FROM_CLIENT_VERSION"
 SET_MODULE_FUNCTIONS_VOLATILITY_FROM_MANIFEST = "SET_MODULE_FUNCTIONS_VOLATILITY_FROM_MANIFEST"
+ENABLE_MODEL_METHOD_SIGNATURE_PARAMETERS = "ENABLE_MODEL_METHOD_SIGNATURE_PARAMETERS"
+FEATURE_MODEL_INFERENCE_AUTOCAPTURE = "FEATURE_MODEL_INFERENCE_AUTOCAPTURE"
 class PlatformCapabilities:
@@ -80,6 +82,12 @@ class PlatformCapabilities:
     def is_live_commit_enabled(self) -> bool:
         return self._get_bool_feature(LIVE_COMMIT_PARAMETER, False)
+    def is_model_method_signature_parameters_enabled(self) -> bool:
+        return self._get_bool_feature(ENABLE_MODEL_METHOD_SIGNATURE_PARAMETERS, False)
+    def is_inference_autocapture_enabled(self) -> bool:
+        return self._is_feature_enabled(FEATURE_MODEL_INFERENCE_AUTOCAPTURE)
     @staticmethod
     def _get_features(session: snowpark_session.Session) -> dict[str, Any]:
         try:
@@ -182,3 +190,31 @@ class PlatformCapabilities:
             f"current={current_version}, feature={feature_version}, enabled={result}"
         )
         return result
+    def _is_feature_enabled(self, feature_name: str) -> bool:
+        """Check if the feature parameter value belongs to enabled values.
+        Args:
+            feature_name: The name of the feature to retrieve.
+        Returns:
+            bool: True if the value is "ENABLED" or "ENABLED_PUBLIC_PREVIEW",
+                  False if the value is "DISABLED", "DISABLED_PRIVATE_PREVIEW", or not set.
+        Raises:
+            ValueError: If the feature value is set but not one of the recognized values.
+        """
+        value = self.features.get(feature_name)
+        if value is None:
+            logger.debug(f"Feature {feature_name} not found.")
+            return False
+        if isinstance(value, str):
+            value_str = str(value)
+            if value_str.upper() in ["ENABLED", "ENABLED_PUBLIC_PREVIEW"]:
+                return True
+            elif value_str.upper() in ["DISABLED", "DISABLED_PRIVATE_PREVIEW"]:
+                return False
+            else:
+                raise ValueError(f"Invalid feature parameter value: {value} for feature {feature_name}")
+        raise ValueError(f"Invalid feature parameter string value: {value} for feature {feature_name}")

snowflake/ml/_internal/utils/url.py ADDED Viewed

@@ -0,0 +1,42 @@
+from urllib.parse import urlunparse
+from snowflake import snowpark as snowpark
+JOB_URL_PREFIX = "#/compute/job/"
+SERVICE_URL_PREFIX = "#/compute/service/"
+def get_snowflake_url(
+    session: snowpark.Session,
+    url_path: str,
+    params: str = "",
+    query: str = "",
+    fragment: str = "",
+) -> str:
+    """Construct a Snowflake URL from session connection details and URL components.
+    Args:
+        session: The Snowpark session containing connection details.
+        url_path: The path component of the URL (e.g., "/compute/job/123").
+        params: Optional parameters for the URL (RFC 1808). Defaults to "".
+        query: Optional query string for the URL. Defaults to "".
+        fragment: Optional fragment identifier for the URL (e.g., "#section"). Defaults to "".
+    Returns:
+        A fully constructed Snowflake URL string with scheme, host, and specified components.
+    """
+    scheme = "https"
+    if hasattr(session.connection, "scheme"):
+        scheme = session.connection.scheme
+    host = session.connection.host
+    return urlunparse(
+        (
+            scheme,
+            host,
+            url_path,
+            params,
+            query,
+            fragment,
+        )
+    )

snowflake/ml/data/_internal/arrow_ingestor.py CHANGED Viewed

@@ -1,6 +1,8 @@
+import base64
 import collections
 import logging
 import os
+import re
 import time
 from typing import TYPE_CHECKING, Any, Deque, Iterator, Optional, Sequence, Union
@@ -165,8 +167,71 @@ class ArrowIngestor(data_ingestor.DataIngestor, mixins.SerializableSessionMixin)
         # Re-shuffle input files on each iteration start
         if shuffle:
             np.random.shuffle(sources)
-        pa_dataset: pds.Dataset = pds.dataset(sources, format=format, **self._kwargs)
-        return pa_dataset
+        try:
+            pa_dataset: pds.Dataset = pds.dataset(sources, format=format, **self._kwargs)
+            return pa_dataset
+        except Exception as e:
+            self._tmp_debug_parquet_invalid(e, sources)
+    def _tmp_debug_parquet_invalid(self, e: Exception, sources: list[Any]) -> None:
+        # Attach rich debug info to help diagnose intermittent Parquet footer/magic byte errors
+        debug_parts: list[str] = []
+        debug_parts.append("SNOWML DEBUG: Failed to construct Arrow Dataset")
+        debug_parts.append(
+            "SNOWML DEBUG: " f"data_sources_count={len(self._data_sources)} " f"resolved_sources_count={len(sources)}"
+        )
+        # Try to include the exact file path mentioned by pyarrow, if present
+        error_text = str(e)
+        snow_paths: list[str] = []
+        try:
+            # Extract snow://... tokens possibly wrapped in quotes
+            for match in re.finditer(r'(snow://[^\s\'"]+)', error_text):
+                token = match.group(1).rstrip(").,;]")
+                snow_paths.append(token)
+        except Exception:
+            pass
+        fs = self._kwargs.get("filesystem")
+        if fs is not None:
+            # Always include a directory listing with sizes for context
+            try:
+                debug_parts.append("SNOWML DEBUG: Listing resolved sources with sizes:")
+                for s in sources:
+                    try:
+                        info = fs.info(s)
+                        size = info.get("size", None)
+                        md5 = info.get("md5", None)
+                        debug_parts.append(f"  - {s} size={size} md5={md5}")
+                    except Exception as le:
+                        debug_parts.append(f"  - {s} info_failed={le}")
+            except Exception as le:
+                debug_parts.append(f"SNOWML DEBUG: listing sources failed: {le}")
+            # If pyarrow referenced a specific file, dump its full contents (base64) for inspection
+            for path in snow_paths[:1]:  # usually only one path appears in the message
+                try:
+                    info = fs.info(path)
+                    size = info.get("size", None)
+                    debug_parts.append(f"SNOWML DEBUG: Inspecting referenced file: {path} size={size}")
+                    with fs.open(path, "rb") as f:
+                        content = f.read()
+                    magic_head = content[:4]
+                    magic_tail = content[-4:] if content else b""
+                    looks_like_parquet = (magic_head == b"PAR1") and (magic_tail == b"PAR1")
+                    debug_parts.append(
+                        "SNOWML DEBUG: "
+                        f"file_magic_head={magic_head!r} "
+                        f"file_magic_tail={magic_tail!r} "
+                        f"parquet_magic_detected={looks_like_parquet}"
+                    )
+                    b64 = base64.b64encode(content).decode("ascii")
+                    debug_parts.append("SNOWML DEBUG: file_content_base64 (entire file):")
+                    debug_parts.append(b64)
+                except Exception as fe:
+                    debug_parts.append(f"SNOWML DEBUG: failed to read referenced file {path}: {fe}")
+        else:
+            debug_parts.append("SNOWML DEBUG: No filesystem available; cannot inspect files")
+        debug_message = "\n".join(debug_parts)
+        # Re-raise with augmented message to surface in stacktrace
+        raise RuntimeError(f"{e}\n{debug_message}") from e
     def _get_batches_from_buffer(self, batch_size: int) -> dict[str, npt.NDArray[Any]]:
         """Generate new batches from the existing record batch buffer."""

snowflake/ml/data/data_connector.py CHANGED Viewed

@@ -1,5 +1,15 @@
 import os
-from typing import TYPE_CHECKING, Any, Generator, Optional, Sequence, TypeVar
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Generator,
+    Literal,
+    Optional,
+    Sequence,
+    TypeVar,
+    Union,
+    overload,
+)
 import numpy.typing as npt
 from typing_extensions import deprecated
@@ -11,6 +21,7 @@ from snowflake.ml.data._internal.arrow_ingestor import ArrowIngestor
 from snowflake.snowpark import context as sp_context
 if TYPE_CHECKING:
+    import datasets as hf_datasets
     import pandas as pd
     import ray
     import tensorflow as tf
@@ -257,6 +268,97 @@ class DataConnector:
         except ImportError as e:
             raise ImportError("Ray is not installed, please install ray in your local environment.") from e
+    @overload
+    def to_huggingface_dataset(
+        self,
+        *,
+        streaming: Literal[False] = ...,
+        limit: Optional[int] = ...,
+    ) -> "hf_datasets.Dataset":
+        ...
+    @overload
+    def to_huggingface_dataset(
+        self,
+        *,
+        streaming: Literal[True],
+        limit: Optional[int] = ...,
+        batch_size: int = ...,
+        shuffle: bool = ...,
+        drop_last_batch: bool = ...,
+    ) -> "hf_datasets.IterableDataset":
+        ...
+    @telemetry.send_api_usage_telemetry(
+        project=_PROJECT,
+        subproject_extractor=lambda self: type(self).__name__,
+        func_params_to_log=["streaming", "limit", "batch_size", "shuffle", "drop_last_batch"],
+    )
+    def to_huggingface_dataset(
+        self,
+        *,
+        streaming: bool = False,
+        limit: Optional[int] = None,
+        batch_size: int = 1024,
+        shuffle: bool = False,
+        drop_last_batch: bool = False,
+    ) -> "Union[hf_datasets.Dataset, hf_datasets.IterableDataset]":
+        """Retrieve the Snowflake data as a HuggingFace Dataset.
+        Args:
+            streaming: If True, returns an IterableDataset that streams data in batches.
+                If False (default), returns an in-memory Dataset.
+            limit: Maximum number of rows to load. If None, loads all rows.
+            batch_size: Size of batches for internal data retrieval.
+            shuffle: Whether to shuffle the data. If True, files will be shuffled and rows
+                in each file will also be shuffled.
+            drop_last_batch: Whether to drop the last batch if it's smaller than batch_size.
+        Returns:
+            A HuggingFace Dataset (in-memory) or IterableDataset (streaming).
+        """
+        import datasets as hf_datasets
+        if streaming:
+            return self._to_huggingface_iterable_dataset(
+                limit=limit,
+                batch_size=batch_size,
+                shuffle=shuffle,
+                drop_last_batch=drop_last_batch,
+            )
+        else:
+            return hf_datasets.Dataset.from_pandas(self._ingestor.to_pandas(limit))
+    def _to_huggingface_iterable_dataset(
+        self,
+        *,
+        limit: Optional[int],
+        batch_size: int,
+        shuffle: bool,
+        drop_last_batch: bool,
+    ) -> "hf_datasets.IterableDataset":
+        """Create a HuggingFace IterableDataset that streams data in batches."""
+        import datasets as hf_datasets
+        def generator() -> Generator[dict[str, Any], None, None]:
+            rows_yielded = 0
+            for batch in self._ingestor.to_batches(batch_size, shuffle, drop_last_batch):
+                # Yield individual rows from each batch
+                num_rows = len(next(iter(batch.values())))
+                for i in range(num_rows):
+                    if limit is not None and rows_yielded >= limit:
+                        return
+                    yield {k: v[i].item() if hasattr(v[i], "item") else v[i] for k, v in batch.items()}
+                    rows_yielded += 1
+        result = hf_datasets.IterableDataset.from_generator(generator)
+        # In datasets >= 3.x, from_generator returns IterableDatasetDict
+        # We need to extract the IterableDataset from the dict
+        if hasattr(hf_datasets, "IterableDatasetDict") and isinstance(result, hf_datasets.IterableDatasetDict):
+            # Get the first (and only) dataset from the dict
+            result = next(iter(result.values()))
+        return result
 # Switch to use Runtime's Data Ingester if running in ML runtime
 # Fail silently if the data ingester is not found

snowflake/ml/experiment/_client/experiment_tracking_sql_client.py CHANGED Viewed

@@ -41,8 +41,14 @@ class ExperimentTrackingSQLClient(_base._BaseSQLClient):
         ).has_dimensions(expected_rows=1, expected_cols=1).validate()
     @telemetry.send_api_usage_telemetry(project=telemetry.TelemetryProject.EXPERIMENT_TRACKING.value)
-    def drop_experiment(self, *, experiment_name: sql_identifier.SqlIdentifier) -> None:
-        experiment_fqn = self.fully_qualified_object_name(self._database_name, self._schema_name, experiment_name)
+    def drop_experiment(
+        self,
+        *,
+        database_name: sql_identifier.SqlIdentifier,
+        schema_name: sql_identifier.SqlIdentifier,
+        experiment_name: sql_identifier.SqlIdentifier,
+    ) -> None:
+        experiment_fqn = self.fully_qualified_object_name(database_name, schema_name, experiment_name)
         query_result_checker.SqlResultValidator(self._session, f"DROP EXPERIMENT {experiment_fqn}").has_dimensions(
             expected_rows=1, expected_cols=1
         ).validate()

snowflake/ml/experiment/callback/__init__.py ADDED Viewed

File without changes

snowflake/ml/experiment/callback/keras.py CHANGED Viewed

@@ -12,6 +12,8 @@ if TYPE_CHECKING:
 class SnowflakeKerasCallback(keras.callbacks.Callback):
+    """Keras callback for automatically logging to a Snowflake ML Experiment."""
     def __init__(
         self,
         experiment_tracking: "ExperimentTracking",
@@ -23,12 +25,33 @@ class SnowflakeKerasCallback(keras.callbacks.Callback):
         version_name: Optional[str] = None,
         model_signature: Optional["ModelSignature"] = None,
     ) -> None:
+        """
+        Creates a new Keras callback.
+        Args:
+            experiment_tracking (snowflake.ml.experiment.ExperimentTracking): The Experiment Tracking instance
+                to use for logging.
+            log_model (bool): Whether to log the model at the end of training. Default is True.
+            log_metrics (bool): Whether to log metrics during training. Default is True.
+            log_params (bool): Whether to log model parameters at the start of training. Default is True.
+            log_every_n_epochs (int): Frequency with which to log metrics. Must be positive.
+                Default is 1, logging after every epoch.
+            model_name (Optional[str]): The model name to use when logging the model.
+                If None, the model name will be derived from the experiment name.
+            version_name (Optional[str]): The model version name to use when logging the model.
+                If None, the version name will be randomly generated.
+            model_signature (Optional[snowflake.ml.model.model_signature.ModelSignature]): The model signature to use
+                when logging the model. This is required if ``log_model`` is set to True.
+        Raises:
+            ValueError: When ``log_every_n_epochs`` is not a positive integer.
+        """
         self._experiment_tracking = experiment_tracking
         self.log_model = log_model
         self.log_metrics = log_metrics
         self.log_params = log_params
-        if log_every_n_epochs < 1:
-            raise ValueError("`log_every_n_epochs` must be positive.")
+        if not (utils.is_integer(log_every_n_epochs) and log_every_n_epochs > 0):
+            raise ValueError("`log_every_n_epochs` must be a positive integer.")
         self.log_every_n_epochs = log_every_n_epochs
         self.model_name = model_name
         self.version_name = version_name

snowflake/ml/experiment/callback/lightgbm.py CHANGED Viewed

@@ -3,12 +3,16 @@ from warnings import warn
 import lightgbm as lgb
+from snowflake.ml.experiment import utils
 if TYPE_CHECKING:
     from snowflake.ml.experiment.experiment_tracking import ExperimentTracking
     from snowflake.ml.model.model_signature import ModelSignature
 class SnowflakeLightgbmCallback(lgb.callback._RecordEvaluationCallback):
+    """LightGBM callback for automatically logging to a Snowflake ML Experiment."""
     def __init__(
         self,
         experiment_tracking: "ExperimentTracking",
@@ -20,12 +24,33 @@ class SnowflakeLightgbmCallback(lgb.callback._RecordEvaluationCallback):
         version_name: Optional[str] = None,
         model_signature: Optional["ModelSignature"] = None,
     ) -> None:
+        """
+        Creates a new LightGBM callback.
+        Args:
+            experiment_tracking (snowflake.ml.experiment.ExperimentTracking): The Experiment Tracking instance
+                to use for logging.
+            log_model (bool): Whether to log the model at the end of training. Default is True.
+            log_metrics (bool): Whether to log metrics during training. Default is True.
+            log_params (bool): Whether to log model parameters at the start of training. Default is True.
+            log_every_n_epochs (int): Frequency with which to log metrics. Must be positive.
+                Default is 1, logging after every iteration.
+            model_name (Optional[str]): The model name to use when logging the model.
+                If None, the model name will be derived from the experiment name.
+            version_name (Optional[str]): The model version name to use when logging the model.
+                If None, the version name will be randomly generated.
+            model_signature (Optional[snowflake.ml.model.model_signature.ModelSignature]): The model signature to use
+                when logging the model. This is required if ``log_model`` is set to True.
+        Raises:
+            ValueError: When ``log_every_n_epochs`` is not a positive integer.
+        """
         self._experiment_tracking = experiment_tracking
         self.log_model = log_model
         self.log_metrics = log_metrics
         self.log_params = log_params
-        if log_every_n_epochs < 1:
-            raise ValueError("`log_every_n_epochs` must be positive.")
+        if not (utils.is_integer(log_every_n_epochs) and log_every_n_epochs > 0):
+            raise ValueError("`log_every_n_epochs` must be a positive integer.")
         self.log_every_n_epochs = log_every_n_epochs
         self.model_name = model_name
         self.version_name = version_name

snowflake/ml/experiment/callback/xgboost.py CHANGED Viewed

@@ -12,6 +12,8 @@ if TYPE_CHECKING:
 class SnowflakeXgboostCallback(xgb.callback.TrainingCallback):
+    """XGBoost callback for automatically logging to a Snowflake ML Experiment."""
     def __init__(
         self,
         experiment_tracking: "ExperimentTracking",
@@ -23,12 +25,33 @@ class SnowflakeXgboostCallback(xgb.callback.TrainingCallback):
         version_name: Optional[str] = None,
         model_signature: Optional["ModelSignature"] = None,
     ) -> None:
+        """
+        Initialize the callback.
+        Args:
+            experiment_tracking (snowflake.ml.experiment.ExperimentTracking): The Experiment Tracking instance
+                to use for logging.
+            log_model (bool): Whether to log the model at the end of training. Default is True.
+            log_metrics (bool): Whether to log metrics during training. Default is True.
+            log_params (bool): Whether to log model parameters at the start of training. Default is True.
+            log_every_n_epochs (int): Frequency with which to log metrics. Must be positive.
+                Default is 1, logging after every iteration.
+            model_name (Optional[str]): The model name to use when logging the model.
+                If None, the model name will be derived from the experiment name.
+            version_name (Optional[str]): The model version name to use when logging the model.
+                If None, the version name will be randomly generated.
+            model_signature (Optional[snowflake.ml.model.model_signature.ModelSignature]): The model signature to use
+                when logging the model. This is required if ``log_model`` is set to True.
+        Raises:
+            ValueError: When ``log_every_n_epochs`` is not a positive integer.
+        """
         self._experiment_tracking = experiment_tracking
         self.log_model = log_model
         self.log_metrics = log_metrics
         self.log_params = log_params
-        if log_every_n_epochs < 1:
-            raise ValueError("`log_every_n_epochs` must be positive.")
+        if not (utils.is_integer(log_every_n_epochs) and log_every_n_epochs > 0):
+            raise ValueError("`log_every_n_epochs` must be a positive integer.")
         self.log_every_n_epochs = log_every_n_epochs
         self.model_name = model_name
         self.version_name = version_name

snowflake/ml/experiment/experiment_tracking.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import functools
 import json
 import sys
+import warnings
 from typing import Any, Optional, Union
 from urllib.parse import quote
@@ -27,6 +28,13 @@ class ExperimentTracking:
     Class to manage experiments in Snowflake.
     """
+    _instance = None
+    def __new__(cls, *args: Any, **kwargs: Any) -> "ExperimentTracking":
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
     def __init__(
         self,
         session: snowpark.Session,
@@ -36,6 +44,7 @@ class ExperimentTracking:
     ) -> None:
         """
         Initializes experiment tracking within a pre-created schema.
+        This is a singleton class, so if an instance already exists, it will not reinitialize.
         Args:
             session: The Snowpark Session to connect with Snowflake.
@@ -47,6 +56,21 @@ class ExperimentTracking:
         Raises:
             ValueError: If no database is provided and no active database exists in the session.
         """
+        if hasattr(self, "_initialized"):
+            warnings.warn(
+                "ExperimentTracking is a singleton class. Reusing the existing instance, which has the setting:\n"
+                f"    Database: {self._database_name}, Schema: {self._schema_name}\n"
+                "To change the database or schema, use the database_name and schema_name arguments to set_experiment.",
+                UserWarning,
+                stacklevel=2,
+            )
+            return
+        # Declare types for mypy
+        self._database_name: sql_identifier.SqlIdentifier
+        self._schema_name: sql_identifier.SqlIdentifier
+        self._sql_client: sql_client.ExperimentTrackingSQLClient
         if database_name:
             self._database_name = sql_identifier.SqlIdentifier(database_name)
         elif session_db := session.get_current_database():
@@ -78,6 +102,8 @@ class ExperimentTracking:
         # The run in context
         self._run: Optional[entities.Run] = None
+        self._initialized = True
     def __getstate__(self) -> dict[str, Any]:
         parent_state = (
             super().__getstate__()  # type: ignore[misc] # object.__getstate__ appears in 3.11
@@ -116,19 +142,40 @@ class ExperimentTracking:
     def set_experiment(
         self,
         experiment_name: str,
+        database_name: Optional[str] = None,
+        schema_name: Optional[str] = None,
     ) -> entities.Experiment:
         """
         Set the experiment in context. Creates a new experiment if it doesn't exist.
         Args:
             experiment_name: The name of the experiment.
+            database_name: The name of the database. If None, reuse the current database. Defaults to None.
+            schema_name: The name of the schema. If None, the behavior depends on whether `database_name` is specified.
+                If `database_name` is specified, the schema is set to "PUBLIC".
+                If `database_name` is not specified, reuse the current schema. Defaults to None.
         Returns:
             Experiment: The experiment that was set.
         """
+        if database_name is not None:
+            if schema_name is None:
+                schema_name = "PUBLIC"
+        database_name = (
+            sql_identifier.SqlIdentifier(database_name) if database_name is not None else self._database_name
+        )
+        schema_name = sql_identifier.SqlIdentifier(schema_name) if schema_name is not None else self._schema_name
         experiment_name = sql_identifier.SqlIdentifier(experiment_name)
-        if self._experiment and self._experiment.name == experiment_name:
+        if (
+            self._experiment
+            and self._experiment.name == experiment_name
+            and self._database_name == database_name
+            and self._schema_name == schema_name
+        ):
             return self._experiment
+        self._update_database_and_schema(database_name, schema_name)
         self._sql_client.create_experiment(
             experiment_name=experiment_name,
             creation_mode=sql_client_utils.CreationMode(if_not_exists=True),
@@ -140,15 +187,42 @@ class ExperimentTracking:
     def delete_experiment(
         self,
         experiment_name: str,
+        database_name: Optional[str] = None,
+        schema_name: Optional[str] = None,
     ) -> None:
         """
         Delete an experiment.
         Args:
             experiment_name: The name of the experiment.
+            database_name: The name of the database. If None, reuse the current database.
+                Must be specified if `schema_name` is specified. Defaults to None.
+            schema_name: The name of the schema. If None, reuse the current schema.
+                Must be specified if `database_name` is specified. Defaults to None.
+        Raises:
+            ValueError: If database_name is specified but schema_name is not.
         """
-        self._sql_client.drop_experiment(experiment_name=sql_identifier.SqlIdentifier(experiment_name))
-        if self._experiment and self._experiment.name == experiment_name:
+        if (database_name is None) ^ (schema_name is None):  # if only one of database_name and schema_name is set
+            raise ValueError(
+                "If one of database_name and schema_name is specified, the other one must also be specified."
+            )
+        database_name = (
+            sql_identifier.SqlIdentifier(database_name) if database_name is not None else self._database_name
+        )
+        schema_name = sql_identifier.SqlIdentifier(schema_name) if schema_name is not None else self._schema_name
+        self._sql_client.drop_experiment(
+            database_name=database_name,
+            schema_name=schema_name,
+            experiment_name=sql_identifier.SqlIdentifier(experiment_name),
+        )
+        if (
+            self._experiment
+            and self._experiment.name == experiment_name
+            and self._database_name == database_name
+            and self._schema_name == schema_name
+        ):
             self._experiment = None
             self._run = None
@@ -451,6 +525,22 @@ class ExperimentTracking:
                 return sql_identifier.SqlIdentifier(run_name)
         raise RuntimeError("Random run name generation failed.")
+    def _update_database_and_schema(
+        self, database_name: sql_identifier.SqlIdentifier, schema_name: sql_identifier.SqlIdentifier
+    ) -> None:
+        self._database_name = database_name
+        self._schema_name = schema_name
+        self._sql_client = sql_client.ExperimentTrackingSQLClient(
+            session=self._session,
+            database_name=database_name,
+            schema_name=schema_name,
+        )
+        self._registry = registry.Registry(
+            session=self._session,
+            database_name=database_name,
+            schema_name=schema_name,
+        )
     def _print_urls(
         self,
         experiment_name: sql_identifier.SqlIdentifier,

snowflake/ml/experiment/utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import numbers
 from typing import Any, Union
@@ -12,3 +13,8 @@ def flatten_nested_params(params: Union[list[Any], dict[str, Any]], prefix: str
         else:
             flat_params[new_prefix] = value
     return flat_params
+def is_integer(value: Any) -> bool:
+    """Check if the given value is an integer, excluding booleans."""
+    return isinstance(value, numbers.Integral) and not isinstance(value, bool)

snowflake-ml-python 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl

snowflake-ml-python 1.20.0py3-none-any.whl → 1.22.0py3-none-any.whl