PyPI - snowflake-ml-python - Versions diffs - 1.8.6__py3-none-any.whl → 1.9.1__py3-none-any.whl - Mend

snowflake-ml-python 1.8.6py3-none-any.whl → 1.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

snowflake/ml/_internal/env_utils.py +44 -3
snowflake/ml/_internal/platform_capabilities.py +52 -2
snowflake/ml/_internal/type_utils.py +1 -1
snowflake/ml/_internal/utils/identifier.py +1 -1
snowflake/ml/_internal/utils/mixins.py +71 -0
snowflake/ml/_internal/utils/service_logger.py +4 -2
snowflake/ml/data/_internal/arrow_ingestor.py +11 -1
snowflake/ml/data/data_connector.py +43 -2
snowflake/ml/data/data_ingestor.py +8 -0
snowflake/ml/data/torch_utils.py +1 -1
snowflake/ml/dataset/dataset.py +3 -2
snowflake/ml/dataset/dataset_reader.py +22 -6
snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +98 -0
snowflake/ml/experiment/_entities/__init__.py +4 -0
snowflake/ml/experiment/_entities/experiment.py +10 -0
snowflake/ml/experiment/_entities/run.py +62 -0
snowflake/ml/experiment/_entities/run_metadata.py +68 -0
snowflake/ml/experiment/_experiment_info.py +63 -0
snowflake/ml/experiment/experiment_tracking.py +319 -0
snowflake/ml/jobs/_utils/constants.py +1 -1
snowflake/ml/jobs/_utils/interop_utils.py +63 -4
snowflake/ml/jobs/_utils/payload_utils.py +5 -3
snowflake/ml/jobs/_utils/query_helper.py +20 -0
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +5 -1
snowflake/ml/jobs/_utils/spec_utils.py +21 -4
snowflake/ml/jobs/decorators.py +18 -25
snowflake/ml/jobs/job.py +137 -37
snowflake/ml/jobs/manager.py +228 -153
snowflake/ml/lineage/lineage_node.py +2 -2
snowflake/ml/model/_client/model/model_version_impl.py +16 -4
snowflake/ml/model/_client/ops/model_ops.py +12 -3
snowflake/ml/model/_client/ops/service_ops.py +324 -138
snowflake/ml/model/_client/service/model_deployment_spec.py +1 -1
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +3 -1
snowflake/ml/model/_model_composer/model_composer.py +6 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +55 -13
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
snowflake/ml/model/_packager/model_env/model_env.py +35 -27
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +41 -2
snowflake/ml/model/_packager/model_handlers/pytorch.py +5 -1
snowflake/ml/model/_packager/model_meta/model_meta.py +3 -1
snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -1
snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -3
snowflake/ml/model/_signatures/snowpark_handler.py +55 -3
snowflake/ml/model/event_handler.py +117 -0
snowflake/ml/model/model_signature.py +9 -9
snowflake/ml/model/models/huggingface_pipeline.py +170 -1
snowflake/ml/model/target_platform.py +11 -0
snowflake/ml/model/task.py +9 -0
snowflake/ml/model/type_hints.py +5 -13
snowflake/ml/modeling/framework/base.py +1 -1
snowflake/ml/modeling/metrics/classification.py +14 -14
snowflake/ml/modeling/metrics/correlation.py +19 -8
snowflake/ml/modeling/metrics/metrics_utils.py +2 -0
snowflake/ml/modeling/metrics/ranking.py +6 -6
snowflake/ml/modeling/metrics/regression.py +9 -9
snowflake/ml/monitoring/explain_visualize.py +12 -5
snowflake/ml/registry/_manager/model_manager.py +47 -15
snowflake/ml/registry/registry.py +109 -64
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.8.6.dist-info → snowflake_ml_python-1.9.1.dist-info}/METADATA +118 -18
{snowflake_ml_python-1.8.6.dist-info → snowflake_ml_python-1.9.1.dist-info}/RECORD +65 -53
{snowflake_ml_python-1.8.6.dist-info → snowflake_ml_python-1.9.1.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.8.6.dist-info → snowflake_ml_python-1.9.1.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.8.6.dist-info → snowflake_ml_python-1.9.1.dist-info}/top_level.txt +0 -0

snowflake/ml/jobs/decorators.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import copy
 import functools
-from typing import Callable, Optional, TypeVar
+from typing import Any, Callable, Optional, TypeVar
 from typing_extensions import ParamSpec
@@ -20,16 +20,11 @@ def remote(
     compute_pool: str,
     *,
     stage_name: str,
+    target_instances: int = 1,
     pip_requirements: Optional[list[str]] = None,
     external_access_integrations: Optional[list[str]] = None,
-    query_warehouse: Optional[str] = None,
-    env_vars: Optional[dict[str, str]] = None,
-    target_instances: int = 1,
-    min_instances: Optional[int] = None,
-    enable_metrics: bool = False,
-    database: Optional[str] = None,
-    schema: Optional[str] = None,
     session: Optional[snowpark.Session] = None,
+    **kwargs: Any,
 ) -> Callable[[Callable[_Args, _ReturnValue]], Callable[_Args, jb.MLJob[_ReturnValue]]]:
     """
     Submit a job to the compute pool.
@@ -37,17 +32,20 @@ def remote(
     Args:
         compute_pool: The compute pool to use for the job.
         stage_name: The name of the stage where the job payload will be uploaded.
+        target_instances: The number of nodes in the job. If none specified, create a single node job.
         pip_requirements: A list of pip requirements for the job.
         external_access_integrations: A list of external access integrations.
-        query_warehouse: The query warehouse to use. Defaults to session warehouse.
-        env_vars: Environment variables to set in container
-        target_instances: The number of nodes in the job. If none specified, create a single node job.
-        min_instances: The minimum number of nodes required to start the job. If none specified,
-            defaults to target_instances. If set, the job will not start until the minimum number of nodes is available.
-        enable_metrics: Whether to enable metrics publishing for the job.
-        database: The database to use for the job.
-        schema: The schema to use for the job.
         session: The Snowpark session to use. If none specified, uses active session.
+        kwargs: Additional keyword arguments. Supported arguments:
+            database (str): The database to use for the job.
+            schema (str): The schema to use for the job.
+            min_instances (int): The minimum number of nodes required to start the job.
+                If none specified, defaults to target_instances. If set, the job
+                will not start until the minimum number of nodes is available.
+            env_vars (dict): Environment variables to set in container.
+            enable_metrics (bool): Whether to enable metrics publishing for the job.
+            query_warehouse (str): The query warehouse to use. Defaults to session warehouse.
+            spec_overrides (dict): A dictionary of overrides for the service spec.
     Returns:
         Decorator that dispatches invocations of the decorated function as remote jobs.
@@ -61,22 +59,17 @@ def remote(
         wrapped_func.__code__ = wrapped_func.__code__.replace(co_firstlineno=func.__code__.co_firstlineno + 1)
         @functools.wraps(func)
-        def wrapper(*args: _Args.args, **kwargs: _Args.kwargs) -> jb.MLJob[_ReturnValue]:
-            payload = payload_utils.create_function_payload(func, *args, **kwargs)
+        def wrapper(*_args: _Args.args, **_kwargs: _Args.kwargs) -> jb.MLJob[_ReturnValue]:
+            payload = payload_utils.create_function_payload(func, *_args, **_kwargs)
             job = jm._submit_job(
                 source=payload,
                 stage_name=stage_name,
                 compute_pool=compute_pool,
+                target_instances=target_instances,
                 pip_requirements=pip_requirements,
                 external_access_integrations=external_access_integrations,
-                query_warehouse=query_warehouse,
-                env_vars=env_vars,
-                target_instances=target_instances,
-                min_instances=min_instances,
-                enable_metrics=enable_metrics,
-                database=database,
-                schema=schema,
                 session=payload.session or session,
+                **kwargs,
             )
             assert isinstance(job, jb.MLJob), f"Unexpected job type: {type(job)}"
             return job

snowflake/ml/jobs/job.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 import logging
 import os
 import time
@@ -9,7 +10,8 @@ import yaml
 from snowflake import snowpark
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import identifier
-from snowflake.ml.jobs._utils import constants, interop_utils, types
+from snowflake.ml._internal.utils.mixins import SerializableSessionMixin
+from snowflake.ml.jobs._utils import constants, interop_utils, query_helper, types
 from snowflake.snowpark import Row, context as sp_context
 from snowflake.snowpark.exceptions import SnowparkSQLException
@@ -21,7 +23,7 @@ T = TypeVar("T")
 logger = logging.getLogger(__name__)
-class MLJob(Generic[T]):
+class MLJob(Generic[T], SerializableSessionMixin):
     def __init__(
         self,
         id: str,
@@ -220,6 +222,19 @@ class MLJob(Generic[T]):
             return cast(T, self._result.result)
         raise RuntimeError(f"Job execution failed (id={self.name})") from self._result.exception
+    @telemetry.send_api_usage_telemetry(project=_PROJECT)
+    def cancel(self) -> None:
+        """
+        Cancel the running job.
+        Raises:
+            RuntimeError: If cancellation fails.  # noqa: DAR401
+        """
+        try:
+            self._session.sql(f"CALL {self.id}!spcs_cancel_job()").collect()
+            logger.debug(f"Cancellation requested for job {self.id}")
+        except SnowparkSQLException as e:
+            raise RuntimeError(f"Failed to cancel job {self.id}: {e.message}") from e
 @telemetry.send_api_usage_telemetry(project=_PROJECT, func_params_to_log=["job_id", "instance_id"])
 def _get_status(session: snowpark.Session, job_id: str, instance_id: Optional[int] = None) -> types.JOB_STATUS:
@@ -262,6 +277,7 @@ def _get_logs(
     Raises:
         RuntimeError: if failed to get head instance_id
+        SnowparkSQLException: if there is an error retrieving logs from SPCS interface.
     """
     # If instance_id is not specified, try to get the head instance ID
     if instance_id is None:
@@ -279,30 +295,55 @@ def _get_logs(
     if limit > 0:
         params.append(limit)
     try:
-        (row,) = session.sql(
+        (row,) = query_helper.run_query(
+            session,
             f"SELECT SYSTEM$GET_SERVICE_LOGS(?, ?, ?{f', ?' if limit > 0 else ''})",
             params=params,
-        ).collect()
+        )
+        full_log = str(row[0])
     except SnowparkSQLException as e:
         if "Container Status: PENDING" in e.message:
             logger.warning("Waiting for container to start. Logs will be shown when available.")
             return ""
         else:
-            # event table accepts job name, not fully qualified name
-            # cast is to resolve the type check error
-            db, schema, name = identifier.parse_schema_level_object_identifier(job_id)
-            db = cast(str, db or session.get_current_database())
-            schema = cast(str, schema or session.get_current_schema())
-            logs = _get_service_log_from_event_table(
-                session, db, schema, name, limit, instance_id if instance_id else None
-            )
-            if len(logs) == 0:
-                raise RuntimeError(
-                    "No logs were found. Please verify that the database, schema, and job ID are correct."
+            # Fallback plan:
+            # 1. Try SPCS Interface (doesn't require event table permission)
+            # 2. If the interface call fails, query Event Table (requires permission)
+            logger.debug("falling back to SPCS Interface for logs")
+            try:
+                logs = _get_logs_spcs(
+                    session,
+                    job_id,
+                    limit=limit,
+                    instance_id=instance_id if instance_id else 0,
+                    container_name=constants.DEFAULT_CONTAINER_NAME,
                 )
-            return os.linesep.join(row[0] for row in logs)
-    full_log = str(row[0])
+                full_log = os.linesep.join(row[0] for row in logs)
+            except SnowparkSQLException as spcs_error:
+                if spcs_error.sql_error_code == 2143:
+                    logger.debug("persistent logs may not be enabled, falling back to event table")
+                else:
+                    # If SPCS Interface fails for any other reason,
+                    # for example, incorrect argument format,raise the error directly
+                    raise
+                # event table accepts job name, not fully qualified name
+                db, schema, name = identifier.parse_schema_level_object_identifier(job_id)
+                db = db or session.get_current_database()
+                schema = schema or session.get_current_schema()
+                event_table_logs = _get_service_log_from_event_table(
+                    session,
+                    name,
+                    database=db,
+                    schema=schema,
+                    instance_id=instance_id if instance_id else 0,
+                    limit=limit,
+                )
+                if len(event_table_logs) == 0:
+                    raise RuntimeError(
+                        "No logs were found. Please verify that the database, schema, and job ID are correct."
+                    )
+                full_log = os.linesep.join(json.loads(row[0]) for row in event_table_logs)
     # If verbose is True, return the complete log
     if verbose:
@@ -340,13 +381,21 @@ def _get_head_instance_id(session: snowpark.Session, job_id: str) -> Optional[in
         RuntimeError: If the instances died or if some instances disappeared.
     """
-    target_instances = _get_target_instances(session, job_id)
+    try:
+        target_instances = _get_target_instances(session, job_id)
+    except SnowparkSQLException:
+        # service may be deleted
+        raise RuntimeError("Couldn’t retrieve service information")
     if target_instances == 1:
         return 0
     try:
-        rows = session.sql("SHOW SERVICE INSTANCES IN SERVICE IDENTIFIER(?)", params=(job_id,)).collect()
+        rows = query_helper.run_query(
+            session,
+            "SHOW SERVICE INSTANCES IN SERVICE IDENTIFIER(?)",
+            params=(job_id,),
+        )
     except SnowparkSQLException:
         # service may be deleted
         raise RuntimeError("Couldn’t retrieve instances")
@@ -373,19 +422,29 @@ def _get_head_instance_id(session: snowpark.Session, job_id: str) -> Optional[in
 def _get_service_log_from_event_table(
-    session: snowpark.Session, database: str, schema: str, name: str, limit: int, instance_id: Optional[int]
+    session: snowpark.Session,
+    name: str,
+    database: Optional[str] = None,
+    schema: Optional[str] = None,
+    instance_id: Optional[int] = None,
+    limit: int = -1,
 ) -> list[Row]:
+    event_table_name = session.sql("SHOW PARAMETERS LIKE 'event_table' IN ACCOUNT").collect()[0]["value"]
+    query = [
+        "SELECT VALUE FROM IDENTIFIER(?)",
+        'WHERE RESOURCE_ATTRIBUTES:"snow.service.name" = ?',
+    ]
     params: list[Any] = [
-        database,
-        schema,
+        event_table_name,
         name,
     ]
-    query = [
-        "SELECT VALUE FROM snowflake.telemetry.events_view",
-        'WHERE RESOURCE_ATTRIBUTES:"snow.database.name" = ?',
-        'AND RESOURCE_ATTRIBUTES:"snow.schema.name" = ?',
-        'AND RESOURCE_ATTRIBUTES:"snow.service.name" = ?',
-    ]
+    if database:
+        query.append('AND RESOURCE_ATTRIBUTES:"snow.database.name" = ?')
+        params.append(database)
+    if schema:
+        query.append('AND RESOURCE_ATTRIBUTES:"snow.schema.name" = ?')
+        params.append(schema)
     if instance_id:
         query.append('AND RESOURCE_ATTRIBUTES:"snow.service.container.instance" = ?')
@@ -398,16 +457,18 @@ def _get_service_log_from_event_table(
     if limit > 0:
         query.append("LIMIT ?")
         params.append(limit)
-    rows = session.sql(
+    # the wrap used in query_helper does not have return type.
+    # sticking a # type: ignore[no-any-return] is to pass type check
+    rows = query_helper.run_query(
+        session,
         "\n".join(line for line in query if line),
         params=params,
-    ).collect()
-    return rows
+    )
+    return rows  # type: ignore[no-any-return]
-def _get_service_info(session: snowpark.Session, job_id: str) -> Row:
-    (row,) = session.sql("DESCRIBE SERVICE IDENTIFIER(?)", params=(job_id,)).collect()
+def _get_service_info(session: snowpark.Session, job_id: str) -> Any:
+    (row,) = query_helper.run_query(session, "DESCRIBE SERVICE IDENTIFIER(?)", params=(job_id,))
     return row
@@ -426,8 +487,10 @@ def _get_compute_pool_info(session: snowpark.Session, compute_pool: str) -> Row:
         ValueError: If the compute pool is not found.
     """
     try:
-        (pool_info,) = session.sql("SHOW COMPUTE POOLS LIKE ?", params=(compute_pool,)).collect()
-        return pool_info
+        # the wrap used in query_helper does not have return type.
+        # sticking a # type: ignore[no-any-return] is to pass type check
+        (pool_info,) = query_helper.run_query(session, "SHOW COMPUTE POOLS LIKE ?", params=(compute_pool,))
+        return pool_info  # type: ignore[no-any-return]
     except ValueError as e:
         if "not enough values to unpack" in str(e):
             raise ValueError(f"Compute pool '{compute_pool}' not found")
@@ -438,3 +501,40 @@ def _get_compute_pool_info(session: snowpark.Session, compute_pool: str) -> Row:
 def _get_target_instances(session: snowpark.Session, job_id: str) -> int:
     row = _get_service_info(session, job_id)
     return int(row["target_instances"])
+def _get_logs_spcs(
+    session: snowpark.Session,
+    fully_qualified_name: str,
+    limit: int = -1,
+    instance_id: Optional[int] = None,
+    container_name: Optional[str] = None,
+    start_time: Optional[str] = None,
+    end_time: Optional[str] = None,
+) -> list[Row]:
+    query = [
+        f"SELECT LOG FROM table({fully_qualified_name}!spcs_get_logs(",
+    ]
+    conditions_params = []
+    if start_time:
+        conditions_params.append(f"start_time => TO_TIMESTAMP_LTZ('{start_time}')")
+    if end_time:
+        conditions_params.append(f"end_time => TO_TIMESTAMP_LTZ('{end_time}')")
+    if len(conditions_params) > 0:
+        query.append(", ".join(conditions_params))
+    query.append("))")
+    query_params = []
+    if instance_id is not None:
+        query_params.append(f"INSTANCE_ID = {instance_id}")
+    if container_name:
+        query_params.append(f"CONTAINER_NAME = '{container_name}'")
+    if len(query_params) > 0:
+        query.append("WHERE " + " AND ".join(query_params))
+    query.append("ORDER BY TIMESTAMP ASC")
+    if limit > 0:
+        query.append(f" LIMIT {limit};")
+    rows = session.sql("\n".join(query)).collect()
+    return rows

snowflake-ml-python 1.8.6__py3-none-any.whl → 1.9.1__py3-none-any.whl

snowflake-ml-python 1.8.6py3-none-any.whl → 1.9.1py3-none-any.whl