PyPI - snowflake-ml-python - Versions diffs - 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl - Mend

snowflake-ml-python 1.19.0py3-none-any.whl → 1.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

snowflake/ml/_internal/env_utils.py +16 -0
snowflake/ml/_internal/platform_capabilities.py +36 -0
snowflake/ml/_internal/telemetry.py +56 -7
snowflake/ml/data/_internal/arrow_ingestor.py +67 -2
snowflake/ml/data/data_connector.py +103 -1
snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +8 -2
snowflake/ml/experiment/_entities/run.py +15 -0
snowflake/ml/experiment/callback/keras.py +25 -2
snowflake/ml/experiment/callback/lightgbm.py +27 -2
snowflake/ml/experiment/callback/xgboost.py +25 -2
snowflake/ml/experiment/experiment_tracking.py +123 -13
snowflake/ml/experiment/utils.py +6 -0
snowflake/ml/feature_store/access_manager.py +1 -0
snowflake/ml/feature_store/feature_store.py +1 -1
snowflake/ml/feature_store/feature_view.py +34 -24
snowflake/ml/jobs/_interop/protocols.py +3 -0
snowflake/ml/jobs/_utils/feature_flags.py +1 -0
snowflake/ml/jobs/_utils/payload_utils.py +360 -357
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +95 -8
snowflake/ml/jobs/_utils/scripts/start_mlruntime.sh +92 -0
snowflake/ml/jobs/_utils/scripts/startup.sh +112 -0
snowflake/ml/jobs/_utils/spec_utils.py +2 -406
snowflake/ml/jobs/_utils/stage_utils.py +22 -1
snowflake/ml/jobs/_utils/types.py +14 -7
snowflake/ml/jobs/job.py +8 -9
snowflake/ml/jobs/manager.py +64 -129
snowflake/ml/model/_client/model/inference_engine_utils.py +8 -4
snowflake/ml/model/_client/model/model_version_impl.py +109 -28
snowflake/ml/model/_client/ops/model_ops.py +32 -6
snowflake/ml/model/_client/ops/service_ops.py +9 -4
snowflake/ml/model/_client/sql/service.py +69 -2
snowflake/ml/model/_packager/model_handler.py +8 -2
snowflake/ml/model/_packager/model_handlers/{huggingface_pipeline.py → huggingface.py} +203 -76
snowflake/ml/model/_packager/model_handlers/mlflow.py +6 -1
snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
snowflake/ml/model/_signatures/core.py +305 -8
snowflake/ml/model/_signatures/utils.py +13 -4
snowflake/ml/model/compute_pool.py +2 -0
snowflake/ml/model/models/huggingface.py +285 -0
snowflake/ml/model/models/huggingface_pipeline.py +25 -215
snowflake/ml/model/type_hints.py +5 -1
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
snowflake/ml/monitoring/_client/model_monitor_sql_client.py +12 -0
snowflake/ml/monitoring/_manager/model_monitor_manager.py +12 -0
snowflake/ml/monitoring/entities/model_monitor_config.py +5 -0
snowflake/ml/utils/html_utils.py +67 -1
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/METADATA +94 -7
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/RECORD +52 -48
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/top_level.txt +0 -0

snowflake/ml/_internal/env_utils.py CHANGED Viewed

@@ -16,6 +16,7 @@ from snowflake.ml import version as snowml_version
 from snowflake.ml._internal import env as snowml_env, relax_version_strategy
 from snowflake.ml._internal.utils import query_result_checker
 from snowflake.snowpark import context, exceptions, session
+from snowflake.snowpark._internal import utils as snowpark_utils
 class CONDA_OS(Enum):
@@ -38,6 +39,21 @@ SNOWPARK_ML_PKG_NAME = "snowflake-ml-python"
 SNOWFLAKE_CONDA_CHANNEL_URL = "https://repo.anaconda.com/pkgs/snowflake"
+def get_execution_context() -> str:
+    """Detect execution context: EXTERNAL, SPCS, or SPROC.
+    Returns:
+        str: The execution context - "SPROC" if running in a stored procedure,
+             "SPCS" if running in SPCS ML runtime, "EXTERNAL" otherwise.
+    """
+    if snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
+        return "SPROC"
+    elif snowml_env.IN_ML_RUNTIME:
+        return "SPCS"
+    else:
+        return "EXTERNAL"
 def _validate_pip_requirement_string(req_str: str) -> requirements.Requirement:
     """Validate the input pip requirement string according to PEP 508.

snowflake/ml/_internal/platform_capabilities.py CHANGED Viewed

@@ -18,6 +18,8 @@ logger = logging.getLogger(__name__)
 LIVE_COMMIT_PARAMETER = "ENABLE_LIVE_VERSION_IN_SDK"
 INLINE_DEPLOYMENT_SPEC_PARAMETER = "ENABLE_INLINE_DEPLOYMENT_SPEC_FROM_CLIENT_VERSION"
 SET_MODULE_FUNCTIONS_VOLATILITY_FROM_MANIFEST = "SET_MODULE_FUNCTIONS_VOLATILITY_FROM_MANIFEST"
+ENABLE_MODEL_METHOD_SIGNATURE_PARAMETERS = "ENABLE_MODEL_METHOD_SIGNATURE_PARAMETERS"
+FEATURE_MODEL_INFERENCE_AUTOCAPTURE = "FEATURE_MODEL_INFERENCE_AUTOCAPTURE"
 class PlatformCapabilities:
@@ -80,6 +82,12 @@ class PlatformCapabilities:
     def is_live_commit_enabled(self) -> bool:
         return self._get_bool_feature(LIVE_COMMIT_PARAMETER, False)
+    def is_model_method_signature_parameters_enabled(self) -> bool:
+        return self._get_bool_feature(ENABLE_MODEL_METHOD_SIGNATURE_PARAMETERS, False)
+    def is_inference_autocapture_enabled(self) -> bool:
+        return self._is_feature_enabled(FEATURE_MODEL_INFERENCE_AUTOCAPTURE)
     @staticmethod
     def _get_features(session: snowpark_session.Session) -> dict[str, Any]:
         try:
@@ -182,3 +190,31 @@ class PlatformCapabilities:
             f"current={current_version}, feature={feature_version}, enabled={result}"
         )
         return result
+    def _is_feature_enabled(self, feature_name: str) -> bool:
+        """Check if the feature parameter value belongs to enabled values.
+        Args:
+            feature_name: The name of the feature to retrieve.
+        Returns:
+            bool: True if the value is "ENABLED" or "ENABLED_PUBLIC_PREVIEW",
+                  False if the value is "DISABLED", "DISABLED_PRIVATE_PREVIEW", or not set.
+        Raises:
+            ValueError: If the feature value is set but not one of the recognized values.
+        """
+        value = self.features.get(feature_name)
+        if value is None:
+            logger.debug(f"Feature {feature_name} not found.")
+            return False
+        if isinstance(value, str):
+            value_str = str(value)
+            if value_str.upper() in ["ENABLED", "ENABLED_PUBLIC_PREVIEW"]:
+                return True
+            elif value_str.upper() in ["DISABLED", "DISABLED_PRIVATE_PREVIEW"]:
+                return False
+            else:
+                raise ValueError(f"Invalid feature parameter value: {value} for feature {feature_name}")
+        raise ValueError(f"Invalid feature parameter string value: {value} for feature {feature_name}")

snowflake/ml/_internal/telemetry.py CHANGED Viewed

@@ -16,7 +16,7 @@ from typing_extensions import ParamSpec
 from snowflake import connector
 from snowflake.connector import connect, telemetry as connector_telemetry, time_util
 from snowflake.ml import version as snowml_version
-from snowflake.ml._internal import env
+from snowflake.ml._internal import env, env_utils
 from snowflake.ml._internal.exceptions import (
     error_codes,
     exceptions as snowml_exceptions,
@@ -37,6 +37,22 @@ _CONNECTION_TYPES = {
 _Args = ParamSpec("_Args")
 _ReturnValue = TypeVar("_ReturnValue")
+_conn: Optional[connector.SnowflakeConnection] = None
+def clear_cached_conn() -> None:
+    """Clear the cached Snowflake connection. Primarily for testing purposes."""
+    global _conn
+    if _conn is not None and _conn.is_valid():
+        _conn.close()
+    _conn = None
+def get_cached_conn() -> Optional[connector.SnowflakeConnection]:
+    """Get the cached Snowflake connection. Primarily for testing purposes."""
+    global _conn
+    return _conn
 def _get_login_token() -> Union[str, bytes]:
     with open("/snowflake/session/token") as f:
@@ -44,7 +60,11 @@ def _get_login_token() -> Union[str, bytes]:
 def _get_snowflake_connection() -> Optional[connector.SnowflakeConnection]:
-    conn = None
+    global _conn
+    if _conn is not None and _conn.is_valid():
+        return _conn
+    conn: Optional[connector.SnowflakeConnection] = None
     if os.getenv("SNOWFLAKE_HOST") is not None and os.getenv("SNOWFLAKE_ACCOUNT") is not None:
         try:
             conn = connect(
@@ -66,6 +86,13 @@ def _get_snowflake_connection() -> Optional[connector.SnowflakeConnection]:
             # Failed to get an active session. No connection available.
             pass
+    # cache the connection if it's a SnowflakeConnection. there is a behavior at runtime where it could be a
+    # StoredProcConnection perhaps incorrect type hinting somewhere
+    if isinstance(conn, connector.SnowflakeConnection):
+        # if _conn was expired, we need to copy telemetry data to new connection
+        if _conn is not None and conn is not None:
+            conn._telemetry._log_batch.extend(_conn._telemetry._log_batch)
+        _conn = conn
     return conn
@@ -113,6 +140,13 @@ class TelemetryField(enum.Enum):
     FUNC_CAT_USAGE = "usage"
+@enum.unique
+class CustomTagKey(enum.Enum):
+    """Keys for custom tags in telemetry."""
+    EXECUTION_CONTEXT = "execution_context"
 class _TelemetrySourceType(enum.Enum):
     # Automatically inferred telemetry/statement parameters
     AUTO_TELEMETRY = "SNOWML_AUTO_TELEMETRY"
@@ -441,6 +475,7 @@ def send_api_usage_telemetry(
     sfqids_extractor: Optional[Callable[..., list[str]]] = None,
     subproject_extractor: Optional[Callable[[Any], str]] = None,
     custom_tags: Optional[dict[str, Union[bool, int, str, float]]] = None,
+    log_execution_context: bool = True,
 ) -> Callable[[Callable[_Args, _ReturnValue]], Callable[_Args, _ReturnValue]]:
     """
     Decorator that sends API usage telemetry and adds function usage statement parameters to the dataframe returned by
@@ -455,6 +490,8 @@ def send_api_usage_telemetry(
         sfqids_extractor: Extract sfqids from `self`.
         subproject_extractor: Extract subproject at runtime from `self`.
         custom_tags: Custom tags.
+        log_execution_context: If True, automatically detect and log execution context
+            (EXTERNAL, SPCS, or SPROC) in custom_tags.
     Returns:
         Decorator that sends function usage telemetry for any call to the decorated function.
@@ -495,6 +532,11 @@ def send_api_usage_telemetry(
             if subproject_extractor is not None:
                 subproject_name = subproject_extractor(args[0])
+            # Add execution context if enabled
+            final_custom_tags = {**custom_tags} if custom_tags is not None else {}
+            if log_execution_context:
+                final_custom_tags[CustomTagKey.EXECUTION_CONTEXT.value] = env_utils.get_execution_context()
             statement_params = get_function_usage_statement_params(
                 project=project,
                 subproject=subproject_name,
@@ -502,7 +544,7 @@ def send_api_usage_telemetry(
                 function_name=_get_full_func_name(func),
                 function_parameters=params,
                 api_calls=api_calls,
-                custom_tags=custom_tags,
+                custom_tags=final_custom_tags,
             )
             def update_stmt_params_if_snowpark_df(obj: _ReturnValue, statement_params: dict[str, Any]) -> _ReturnValue:
@@ -538,7 +580,10 @@ def send_api_usage_telemetry(
             if conn_attr_name:
                 # raise AttributeError if conn attribute does not exist in `self`
                 conn = operator.attrgetter(conn_attr_name)(args[0])
-                if not isinstance(conn, _CONNECTION_TYPES.get(type(conn).__name__, connector.SnowflakeConnection)):
+                if not isinstance(
+                    conn,
+                    _CONNECTION_TYPES.get(type(conn).__name__, connector.SnowflakeConnection),
+                ):
                     raise TypeError(
                         f"Expected a conn object of type {' or '.join(_CONNECTION_TYPES.keys())} but got {type(conn)}"
                     )
@@ -560,7 +605,7 @@ def send_api_usage_telemetry(
                 func_params=params,
                 api_calls=api_calls,
                 sfqids=sfqids,
-                custom_tags=custom_tags,
+                custom_tags=final_custom_tags,
             )
             try:
                 return ctx.run(execute_func_with_statement_params)
@@ -571,7 +616,8 @@ def send_api_usage_telemetry(
                         raise
                     if isinstance(e, snowpark_exceptions.SnowparkClientException):
                         me = snowml_exceptions.SnowflakeMLException(
-                            error_code=error_codes.INTERNAL_SNOWPARK_ERROR, original_exception=e
+                            error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
+                            original_exception=e,
                         )
                     else:
                         me = snowml_exceptions.SnowflakeMLException(
@@ -627,7 +673,10 @@ def _get_full_func_name(func: Callable[..., Any]) -> str:
 def _get_func_params(
-    func: Callable[..., Any], func_params_to_log: Optional[Iterable[str]], args: Any, kwargs: Any
+    func: Callable[..., Any],
+    func_params_to_log: Optional[Iterable[str]],
+    args: Any,
+    kwargs: Any,
 ) -> dict[str, Any]:
     """
     Get function parameters.

snowflake/ml/data/_internal/arrow_ingestor.py CHANGED Viewed

@@ -1,6 +1,8 @@
+import base64
 import collections
 import logging
 import os
+import re
 import time
 from typing import TYPE_CHECKING, Any, Deque, Iterator, Optional, Sequence, Union
@@ -165,8 +167,71 @@ class ArrowIngestor(data_ingestor.DataIngestor, mixins.SerializableSessionMixin)
         # Re-shuffle input files on each iteration start
         if shuffle:
             np.random.shuffle(sources)
-        pa_dataset: pds.Dataset = pds.dataset(sources, format=format, **self._kwargs)
-        return pa_dataset
+        try:
+            pa_dataset: pds.Dataset = pds.dataset(sources, format=format, **self._kwargs)
+            return pa_dataset
+        except Exception as e:
+            self._tmp_debug_parquet_invalid(e, sources)
+    def _tmp_debug_parquet_invalid(self, e: Exception, sources: list[Any]) -> None:
+        # Attach rich debug info to help diagnose intermittent Parquet footer/magic byte errors
+        debug_parts: list[str] = []
+        debug_parts.append("SNOWML DEBUG: Failed to construct Arrow Dataset")
+        debug_parts.append(
+            "SNOWML DEBUG: " f"data_sources_count={len(self._data_sources)} " f"resolved_sources_count={len(sources)}"
+        )
+        # Try to include the exact file path mentioned by pyarrow, if present
+        error_text = str(e)
+        snow_paths: list[str] = []
+        try:
+            # Extract snow://... tokens possibly wrapped in quotes
+            for match in re.finditer(r'(snow://[^\s\'"]+)', error_text):
+                token = match.group(1).rstrip(").,;]")
+                snow_paths.append(token)
+        except Exception:
+            pass
+        fs = self._kwargs.get("filesystem")
+        if fs is not None:
+            # Always include a directory listing with sizes for context
+            try:
+                debug_parts.append("SNOWML DEBUG: Listing resolved sources with sizes:")
+                for s in sources:
+                    try:
+                        info = fs.info(s)
+                        size = info.get("size", None)
+                        md5 = info.get("md5", None)
+                        debug_parts.append(f"  - {s} size={size} md5={md5}")
+                    except Exception as le:
+                        debug_parts.append(f"  - {s} info_failed={le}")
+            except Exception as le:
+                debug_parts.append(f"SNOWML DEBUG: listing sources failed: {le}")
+            # If pyarrow referenced a specific file, dump its full contents (base64) for inspection
+            for path in snow_paths[:1]:  # usually only one path appears in the message
+                try:
+                    info = fs.info(path)
+                    size = info.get("size", None)
+                    debug_parts.append(f"SNOWML DEBUG: Inspecting referenced file: {path} size={size}")
+                    with fs.open(path, "rb") as f:
+                        content = f.read()
+                    magic_head = content[:4]
+                    magic_tail = content[-4:] if content else b""
+                    looks_like_parquet = (magic_head == b"PAR1") and (magic_tail == b"PAR1")
+                    debug_parts.append(
+                        "SNOWML DEBUG: "
+                        f"file_magic_head={magic_head!r} "
+                        f"file_magic_tail={magic_tail!r} "
+                        f"parquet_magic_detected={looks_like_parquet}"
+                    )
+                    b64 = base64.b64encode(content).decode("ascii")
+                    debug_parts.append("SNOWML DEBUG: file_content_base64 (entire file):")
+                    debug_parts.append(b64)
+                except Exception as fe:
+                    debug_parts.append(f"SNOWML DEBUG: failed to read referenced file {path}: {fe}")
+        else:
+            debug_parts.append("SNOWML DEBUG: No filesystem available; cannot inspect files")
+        debug_message = "\n".join(debug_parts)
+        # Re-raise with augmented message to surface in stacktrace
+        raise RuntimeError(f"{e}\n{debug_message}") from e
     def _get_batches_from_buffer(self, batch_size: int) -> dict[str, npt.NDArray[Any]]:
         """Generate new batches from the existing record batch buffer."""

snowflake/ml/data/data_connector.py CHANGED Viewed

@@ -1,5 +1,15 @@
 import os
-from typing import TYPE_CHECKING, Any, Generator, Optional, Sequence, TypeVar
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Generator,
+    Literal,
+    Optional,
+    Sequence,
+    TypeVar,
+    Union,
+    overload,
+)
 import numpy.typing as npt
 from typing_extensions import deprecated
@@ -11,6 +21,7 @@ from snowflake.ml.data._internal.arrow_ingestor import ArrowIngestor
 from snowflake.snowpark import context as sp_context
 if TYPE_CHECKING:
+    import datasets as hf_datasets
     import pandas as pd
     import ray
     import tensorflow as tf
@@ -257,6 +268,97 @@ class DataConnector:
         except ImportError as e:
             raise ImportError("Ray is not installed, please install ray in your local environment.") from e
+    @overload
+    def to_huggingface_dataset(
+        self,
+        *,
+        streaming: Literal[False] = ...,
+        limit: Optional[int] = ...,
+    ) -> "hf_datasets.Dataset":
+        ...
+    @overload
+    def to_huggingface_dataset(
+        self,
+        *,
+        streaming: Literal[True],
+        limit: Optional[int] = ...,
+        batch_size: int = ...,
+        shuffle: bool = ...,
+        drop_last_batch: bool = ...,
+    ) -> "hf_datasets.IterableDataset":
+        ...
+    @telemetry.send_api_usage_telemetry(
+        project=_PROJECT,
+        subproject_extractor=lambda self: type(self).__name__,
+        func_params_to_log=["streaming", "limit", "batch_size", "shuffle", "drop_last_batch"],
+    )
+    def to_huggingface_dataset(
+        self,
+        *,
+        streaming: bool = False,
+        limit: Optional[int] = None,
+        batch_size: int = 1024,
+        shuffle: bool = False,
+        drop_last_batch: bool = False,
+    ) -> "Union[hf_datasets.Dataset, hf_datasets.IterableDataset]":
+        """Retrieve the Snowflake data as a HuggingFace Dataset.
+        Args:
+            streaming: If True, returns an IterableDataset that streams data in batches.
+                If False (default), returns an in-memory Dataset.
+            limit: Maximum number of rows to load. If None, loads all rows.
+            batch_size: Size of batches for internal data retrieval.
+            shuffle: Whether to shuffle the data. If True, files will be shuffled and rows
+                in each file will also be shuffled.
+            drop_last_batch: Whether to drop the last batch if it's smaller than batch_size.
+        Returns:
+            A HuggingFace Dataset (in-memory) or IterableDataset (streaming).
+        """
+        import datasets as hf_datasets
+        if streaming:
+            return self._to_huggingface_iterable_dataset(
+                limit=limit,
+                batch_size=batch_size,
+                shuffle=shuffle,
+                drop_last_batch=drop_last_batch,
+            )
+        else:
+            return hf_datasets.Dataset.from_pandas(self._ingestor.to_pandas(limit))
+    def _to_huggingface_iterable_dataset(
+        self,
+        *,
+        limit: Optional[int],
+        batch_size: int,
+        shuffle: bool,
+        drop_last_batch: bool,
+    ) -> "hf_datasets.IterableDataset":
+        """Create a HuggingFace IterableDataset that streams data in batches."""
+        import datasets as hf_datasets
+        def generator() -> Generator[dict[str, Any], None, None]:
+            rows_yielded = 0
+            for batch in self._ingestor.to_batches(batch_size, shuffle, drop_last_batch):
+                # Yield individual rows from each batch
+                num_rows = len(next(iter(batch.values())))
+                for i in range(num_rows):
+                    if limit is not None and rows_yielded >= limit:
+                        return
+                    yield {k: v[i].item() if hasattr(v[i], "item") else v[i] for k, v in batch.items()}
+                    rows_yielded += 1
+        result = hf_datasets.IterableDataset.from_generator(generator)
+        # In datasets >= 3.x, from_generator returns IterableDatasetDict
+        # We need to extract the IterableDataset from the dict
+        if hasattr(hf_datasets, "IterableDatasetDict") and isinstance(result, hf_datasets.IterableDatasetDict):
+            # Get the first (and only) dataset from the dict
+            result = next(iter(result.values()))
+        return result
 # Switch to use Runtime's Data Ingester if running in ML runtime
 # Fail silently if the data ingester is not found

snowflake/ml/experiment/_client/experiment_tracking_sql_client.py CHANGED Viewed

@@ -41,8 +41,14 @@ class ExperimentTrackingSQLClient(_base._BaseSQLClient):
         ).has_dimensions(expected_rows=1, expected_cols=1).validate()
     @telemetry.send_api_usage_telemetry(project=telemetry.TelemetryProject.EXPERIMENT_TRACKING.value)
-    def drop_experiment(self, *, experiment_name: sql_identifier.SqlIdentifier) -> None:
-        experiment_fqn = self.fully_qualified_object_name(self._database_name, self._schema_name, experiment_name)
+    def drop_experiment(
+        self,
+        *,
+        database_name: sql_identifier.SqlIdentifier,
+        schema_name: sql_identifier.SqlIdentifier,
+        experiment_name: sql_identifier.SqlIdentifier,
+    ) -> None:
+        experiment_fqn = self.fully_qualified_object_name(database_name, schema_name, experiment_name)
         query_result_checker.SqlResultValidator(self._session, f"DROP EXPERIMENT {experiment_fqn}").has_dimensions(
             expected_rows=1, expected_cols=1
         ).validate()

snowflake/ml/experiment/_entities/run.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import types
+import warnings
 from typing import TYPE_CHECKING, Optional
 from snowflake.ml._internal.utils import sql_identifier
@@ -7,6 +8,8 @@ from snowflake.ml.experiment import _experiment_info as experiment_info
 if TYPE_CHECKING:
     from snowflake.ml.experiment import experiment_tracking
+METADATA_SIZE_WARNING_MESSAGE = "It is likely that no further metrics or parameters will be logged for this run."
 class Run:
     def __init__(
@@ -20,6 +23,9 @@ class Run:
         self.experiment_name = experiment_name
         self.name = run_name
+        # Whether we've already shown the user a warning about exceeding the run metadata size limit.
+        self._warned_about_metadata_size = False
         self._patcher = experiment_info.ExperimentInfoPatcher(
             experiment_info=self._get_experiment_info(),
         )
@@ -45,3 +51,12 @@ class Run:
             ),
             run_name=self.name.identifier(),
         )
+    def _warn_about_run_metadata_size(self, sql_error_msg: str) -> None:
+        if not self._warned_about_metadata_size:
+            warnings.warn(
+                f"{sql_error_msg}. {METADATA_SIZE_WARNING_MESSAGE}",
+                RuntimeWarning,
+                stacklevel=2,
+            )
+            self._warned_about_metadata_size = True

snowflake/ml/experiment/callback/keras.py CHANGED Viewed

@@ -12,6 +12,8 @@ if TYPE_CHECKING:
 class SnowflakeKerasCallback(keras.callbacks.Callback):
+    """Keras callback for automatically logging to a Snowflake ML Experiment."""
     def __init__(
         self,
         experiment_tracking: "ExperimentTracking",
@@ -23,12 +25,33 @@ class SnowflakeKerasCallback(keras.callbacks.Callback):
         version_name: Optional[str] = None,
         model_signature: Optional["ModelSignature"] = None,
     ) -> None:
+        """
+        Creates a new Keras callback.
+        Args:
+            experiment_tracking (snowflake.ml.experiment.ExperimentTracking): The Experiment Tracking instance
+                to use for logging.
+            log_model (bool): Whether to log the model at the end of training. Default is True.
+            log_metrics (bool): Whether to log metrics during training. Default is True.
+            log_params (bool): Whether to log model parameters at the start of training. Default is True.
+            log_every_n_epochs (int): Frequency with which to log metrics. Must be positive.
+                Default is 1, logging after every epoch.
+            model_name (Optional[str]): The model name to use when logging the model.
+                If None, the model name will be derived from the experiment name.
+            version_name (Optional[str]): The model version name to use when logging the model.
+                If None, the version name will be randomly generated.
+            model_signature (Optional[snowflake.ml.model.model_signature.ModelSignature]): The model signature to use
+                when logging the model. This is required if ``log_model`` is set to True.
+        Raises:
+            ValueError: When ``log_every_n_epochs`` is not a positive integer.
+        """
         self._experiment_tracking = experiment_tracking
         self.log_model = log_model
         self.log_metrics = log_metrics
         self.log_params = log_params
-        if log_every_n_epochs < 1:
-            raise ValueError("`log_every_n_epochs` must be positive.")
+        if not (utils.is_integer(log_every_n_epochs) and log_every_n_epochs > 0):
+            raise ValueError("`log_every_n_epochs` must be a positive integer.")
         self.log_every_n_epochs = log_every_n_epochs
         self.model_name = model_name
         self.version_name = version_name

snowflake/ml/experiment/callback/lightgbm.py CHANGED Viewed

@@ -3,12 +3,16 @@ from warnings import warn
 import lightgbm as lgb
+from snowflake.ml.experiment import utils
 if TYPE_CHECKING:
     from snowflake.ml.experiment.experiment_tracking import ExperimentTracking
     from snowflake.ml.model.model_signature import ModelSignature
 class SnowflakeLightgbmCallback(lgb.callback._RecordEvaluationCallback):
+    """LightGBM callback for automatically logging to a Snowflake ML Experiment."""
     def __init__(
         self,
         experiment_tracking: "ExperimentTracking",
@@ -20,12 +24,33 @@ class SnowflakeLightgbmCallback(lgb.callback._RecordEvaluationCallback):
         version_name: Optional[str] = None,
         model_signature: Optional["ModelSignature"] = None,
     ) -> None:
+        """
+        Creates a new LightGBM callback.
+        Args:
+            experiment_tracking (snowflake.ml.experiment.ExperimentTracking): The Experiment Tracking instance
+                to use for logging.
+            log_model (bool): Whether to log the model at the end of training. Default is True.
+            log_metrics (bool): Whether to log metrics during training. Default is True.
+            log_params (bool): Whether to log model parameters at the start of training. Default is True.
+            log_every_n_epochs (int): Frequency with which to log metrics. Must be positive.
+                Default is 1, logging after every iteration.
+            model_name (Optional[str]): The model name to use when logging the model.
+                If None, the model name will be derived from the experiment name.
+            version_name (Optional[str]): The model version name to use when logging the model.
+                If None, the version name will be randomly generated.
+            model_signature (Optional[snowflake.ml.model.model_signature.ModelSignature]): The model signature to use
+                when logging the model. This is required if ``log_model`` is set to True.
+        Raises:
+            ValueError: When ``log_every_n_epochs`` is not a positive integer.
+        """
         self._experiment_tracking = experiment_tracking
         self.log_model = log_model
         self.log_metrics = log_metrics
         self.log_params = log_params
-        if log_every_n_epochs < 1:
-            raise ValueError("`log_every_n_epochs` must be positive.")
+        if not (utils.is_integer(log_every_n_epochs) and log_every_n_epochs > 0):
+            raise ValueError("`log_every_n_epochs` must be a positive integer.")
         self.log_every_n_epochs = log_every_n_epochs
         self.model_name = model_name
         self.version_name = version_name

snowflake/ml/experiment/callback/xgboost.py CHANGED Viewed

@@ -12,6 +12,8 @@ if TYPE_CHECKING:
 class SnowflakeXgboostCallback(xgb.callback.TrainingCallback):
+    """XGBoost callback for automatically logging to a Snowflake ML Experiment."""
     def __init__(
         self,
         experiment_tracking: "ExperimentTracking",
@@ -23,12 +25,33 @@ class SnowflakeXgboostCallback(xgb.callback.TrainingCallback):
         version_name: Optional[str] = None,
         model_signature: Optional["ModelSignature"] = None,
     ) -> None:
+        """
+        Initialize the callback.
+        Args:
+            experiment_tracking (snowflake.ml.experiment.ExperimentTracking): The Experiment Tracking instance
+                to use for logging.
+            log_model (bool): Whether to log the model at the end of training. Default is True.
+            log_metrics (bool): Whether to log metrics during training. Default is True.
+            log_params (bool): Whether to log model parameters at the start of training. Default is True.
+            log_every_n_epochs (int): Frequency with which to log metrics. Must be positive.
+                Default is 1, logging after every iteration.
+            model_name (Optional[str]): The model name to use when logging the model.
+                If None, the model name will be derived from the experiment name.
+            version_name (Optional[str]): The model version name to use when logging the model.
+                If None, the version name will be randomly generated.
+            model_signature (Optional[snowflake.ml.model.model_signature.ModelSignature]): The model signature to use
+                when logging the model. This is required if ``log_model`` is set to True.
+        Raises:
+            ValueError: When ``log_every_n_epochs`` is not a positive integer.
+        """
         self._experiment_tracking = experiment_tracking
         self.log_model = log_model
         self.log_metrics = log_metrics
         self.log_params = log_params
-        if log_every_n_epochs < 1:
-            raise ValueError("`log_every_n_epochs` must be positive.")
+        if not (utils.is_integer(log_every_n_epochs) and log_every_n_epochs > 0):
+            raise ValueError("`log_every_n_epochs` must be a positive integer.")
         self.log_every_n_epochs = log_every_n_epochs
         self.model_name = model_name
         self.version_name = version_name

snowflake-ml-python 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl

snowflake-ml-python 1.19.0py3-none-any.whl → 1.21.0py3-none-any.whl