PyPI - snowflake-ml-python - Versions diffs - 1.8.5__py3-none-any.whl → 1.9.0__py3-none-any.whl - Mend

snowflake-ml-python 1.8.5py3-none-any.whl → 1.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

snowflake/ml/_internal/telemetry.py +6 -9
snowflake/ml/_internal/utils/connection_params.py +196 -0
snowflake/ml/_internal/utils/identifier.py +1 -1
snowflake/ml/_internal/utils/mixins.py +61 -0
snowflake/ml/jobs/__init__.py +2 -0
snowflake/ml/jobs/_utils/constants.py +3 -2
snowflake/ml/jobs/_utils/function_payload_utils.py +43 -0
snowflake/ml/jobs/_utils/interop_utils.py +63 -4
snowflake/ml/jobs/_utils/payload_utils.py +89 -40
snowflake/ml/jobs/_utils/query_helper.py +9 -0
snowflake/ml/jobs/_utils/scripts/constants.py +19 -3
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +8 -26
snowflake/ml/jobs/_utils/spec_utils.py +29 -5
snowflake/ml/jobs/_utils/stage_utils.py +119 -0
snowflake/ml/jobs/_utils/types.py +5 -1
snowflake/ml/jobs/decorators.py +20 -28
snowflake/ml/jobs/job.py +197 -61
snowflake/ml/jobs/manager.py +253 -121
snowflake/ml/model/_client/model/model_impl.py +58 -0
snowflake/ml/model/_client/model/model_version_impl.py +90 -0
snowflake/ml/model/_client/ops/model_ops.py +18 -6
snowflake/ml/model/_client/ops/service_ops.py +23 -6
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +2 -0
snowflake/ml/model/_client/sql/service.py +68 -20
snowflake/ml/model/_client/sql/stage.py +5 -2
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -10
snowflake/ml/model/_packager/model_env/model_env.py +35 -27
snowflake/ml/model/_packager/model_handlers/pytorch.py +5 -1
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +103 -73
snowflake/ml/model/_packager/model_meta/model_meta.py +3 -1
snowflake/ml/model/_signatures/core.py +24 -0
snowflake/ml/model/_signatures/snowpark_handler.py +55 -3
snowflake/ml/model/target_platform.py +11 -0
snowflake/ml/model/task.py +9 -0
snowflake/ml/model/type_hints.py +5 -13
snowflake/ml/modeling/metrics/metrics_utils.py +2 -0
snowflake/ml/monitoring/explain_visualize.py +2 -2
snowflake/ml/monitoring/model_monitor.py +0 -4
snowflake/ml/registry/_manager/model_manager.py +30 -15
snowflake/ml/registry/registry.py +144 -47
snowflake/ml/utils/connection_params.py +1 -1
snowflake/ml/utils/html_utils.py +263 -0
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/METADATA +64 -19
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/RECORD +48 -41
snowflake/ml/monitoring/model_monitor_version.py +0 -1
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/top_level.txt +0 -0

snowflake/ml/jobs/job.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 import logging
 import os
 import time
@@ -7,21 +8,23 @@ from typing import Any, Generic, Literal, Optional, TypeVar, Union, cast, overlo
 import yaml
 from snowflake import snowpark
+from snowflake.connector import errors
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import identifier
-from snowflake.ml.jobs._utils import constants, interop_utils, types
+from snowflake.ml._internal.utils.mixins import SerializableSessionMixin
+from snowflake.ml.jobs._utils import constants, interop_utils, query_helper, types
 from snowflake.snowpark import Row, context as sp_context
 from snowflake.snowpark.exceptions import SnowparkSQLException
 _PROJECT = "MLJob"
-TERMINAL_JOB_STATUSES = {"FAILED", "DONE", "INTERNAL_ERROR"}
+TERMINAL_JOB_STATUSES = {"FAILED", "DONE", "CANCELLED", "INTERNAL_ERROR"}
 T = TypeVar("T")
 logger = logging.getLogger(__name__)
-class MLJob(Generic[T]):
+class MLJob(Generic[T], SerializableSessionMixin):
     def __init__(
         self,
         id: str,
@@ -67,7 +70,8 @@ class MLJob(Generic[T]):
     def _compute_pool(self) -> str:
         """Get the job's compute pool name."""
         row = _get_service_info(self._session, self.id)
-        return cast(str, row["compute_pool"])
+        compute_pool = row[query_helper.get_attribute_map(self._session, {"compute_pool": 5})["compute_pool"]]
+        return cast(str, compute_pool)
     @property
     def _service_spec(self) -> dict[str, Any]:
@@ -181,16 +185,20 @@ class MLJob(Generic[T]):
         while (status := self.status) not in TERMINAL_JOB_STATUSES:
             if status == "PENDING" and not warning_shown:
                 pool_info = _get_compute_pool_info(self._session, self._compute_pool)
-                if (pool_info.max_nodes - pool_info.active_nodes) < self.min_instances:
+                requested_attributes = {"max_nodes": 3, "active_nodes": 9}
+                if (
+                    pool_info[requested_attributes["max_nodes"]] - pool_info[requested_attributes["active_nodes"]]
+                ) < self.min_instances:
                     logger.warning(
-                        f"Compute pool busy ({pool_info.active_nodes}/{pool_info.max_nodes} nodes in use)."
-                        " Job execution may be delayed."
+                        f'Compute pool busy ({pool_info[requested_attributes["active_nodes"]]}'
+                        f'/{pool_info[requested_attributes["max_nodes"]]} nodes in use, '
+                        f"{self.min_instances} nodes required). Job execution may be delayed."
                     )
                     warning_shown = True
             if timeout >= 0 and (elapsed := time.monotonic() - start_time) >= timeout:
                 raise TimeoutError(f"Job {self.name} did not complete within {elapsed} seconds")
             time.sleep(delay)
-            delay = min(delay * 2, constants.JOB_POLL_MAX_DELAY_SECONDS)  # Exponential backoff
+            delay = min(delay * 1.2, constants.JOB_POLL_MAX_DELAY_SECONDS)  # Exponential backoff
         return self.status
     @snowpark._internal.utils.private_preview(version="1.8.2")
@@ -220,27 +228,46 @@ class MLJob(Generic[T]):
             return cast(T, self._result.result)
         raise RuntimeError(f"Job execution failed (id={self.name})") from self._result.exception
+    @telemetry.send_api_usage_telemetry(project=_PROJECT)
+    def cancel(self) -> None:
+        """
+        Cancel the running job.
+        Raises:
+            RuntimeError: If cancellation fails.  # noqa: DAR401
+        """
+        try:
+            self._session.sql(f"CALL {self.id}!spcs_cancel_job()").collect()
+            logger.debug(f"Cancellation requested for job {self.id}")
+        except SnowparkSQLException as e:
+            raise RuntimeError(f"Failed to cancel job {self.id}: {e.message}") from e
 @telemetry.send_api_usage_telemetry(project=_PROJECT, func_params_to_log=["job_id", "instance_id"])
 def _get_status(session: snowpark.Session, job_id: str, instance_id: Optional[int] = None) -> types.JOB_STATUS:
     """Retrieve job or job instance execution status."""
     if instance_id is not None:
         # Get specific instance status
-        rows = session.sql("SHOW SERVICE INSTANCES IN SERVICE IDENTIFIER(?)", params=(job_id,)).collect()
-        for row in rows:
-            if row["instance_id"] == str(instance_id):
-                return cast(types.JOB_STATUS, row["status"])
+        rows = session._conn.run_query(
+            "SHOW SERVICE INSTANCES IN SERVICE IDENTIFIER(?)", params=[job_id], _force_qmark_paramstyle=True
+        )
+        request_attributes = query_helper.get_attribute_map(session, {"status": 5, "instance_id": 4})
+        if isinstance(rows, dict) and "data" in rows:
+            for row in rows["data"]:
+                if row[request_attributes["instance_id"]] == str(instance_id):
+                    return cast(types.JOB_STATUS, row[request_attributes["status"]])
         raise ValueError(f"Instance {instance_id} not found in job {job_id}")
     else:
         row = _get_service_info(session, job_id)
-        return cast(types.JOB_STATUS, row["status"])
+        request_attributes = query_helper.get_attribute_map(session, {"status": 1})
+        return cast(types.JOB_STATUS, row[request_attributes["status"]])
 @telemetry.send_api_usage_telemetry(project=_PROJECT, func_params_to_log=["job_id"])
 def _get_service_spec(session: snowpark.Session, job_id: str) -> dict[str, Any]:
     """Retrieve job execution service spec."""
     row = _get_service_info(session, job_id)
-    return cast(dict[str, Any], yaml.safe_load(row["spec"]))
+    requested_attributes = query_helper.get_attribute_map(session, {"spec": 6})
+    return cast(dict[str, Any], yaml.safe_load(row[requested_attributes["spec"]]))
 @telemetry.send_api_usage_telemetry(project=_PROJECT, func_params_to_log=["job_id", "limit", "instance_id"])
@@ -262,6 +289,7 @@ def _get_logs(
     Raises:
         RuntimeError: if failed to get head instance_id
+        SnowparkSQLException: if there is an error retrieving logs from SPCS interface.
     """
     # If instance_id is not specified, try to get the head instance ID
     if instance_id is None:
@@ -279,30 +307,59 @@ def _get_logs(
     if limit > 0:
         params.append(limit)
     try:
-        (row,) = session.sql(
+        data = session._conn.run_query(
             f"SELECT SYSTEM$GET_SERVICE_LOGS(?, ?, ?{f', ?' if limit > 0 else ''})",
             params=params,
-        ).collect()
-    except SnowparkSQLException as e:
-        if "Container Status: PENDING" in e.message:
+            _force_qmark_paramstyle=True,
+        )
+        if isinstance(data, dict) and "data" in data:
+            full_log = str(data["data"][0][0])
+        # pass type check
+        else:
+            full_log = ""
+    except errors.ProgrammingError as e:
+        if "Container Status: PENDING" in str(e):
             logger.warning("Waiting for container to start. Logs will be shown when available.")
             return ""
         else:
-            # event table accepts job name, not fully qualified name
-            # cast is to resolve the type check error
-            db, schema, name = identifier.parse_schema_level_object_identifier(job_id)
-            db = cast(str, db or session.get_current_database())
-            schema = cast(str, schema or session.get_current_schema())
-            logs = _get_service_log_from_event_table(
-                session, db, schema, name, limit, instance_id if instance_id else None
-            )
-            if len(logs) == 0:
-                raise RuntimeError(
-                    "No logs were found. Please verify that the database, schema, and job ID are correct."
+            # Fallback plan:
+            # 1. Try SPCS Interface (doesn't require event table permission)
+            # 2. If the interface call fails, query Event Table (requires permission)
+            logger.debug("falling back to SPCS Interface for logs")
+            try:
+                logs = _get_logs_spcs(
+                    session,
+                    job_id,
+                    limit=limit,
+                    instance_id=instance_id if instance_id else 0,
+                    container_name=constants.DEFAULT_CONTAINER_NAME,
                 )
-            return os.linesep.join(row[0] for row in logs)
-    full_log = str(row[0])
+                full_log = os.linesep.join(row[0] for row in logs)
+            except SnowparkSQLException as spcs_error:
+                if spcs_error.sql_error_code == 2143:
+                    logger.debug("persistent logs may not be enabled, falling back to event table")
+                else:
+                    # If SPCS Interface fails for any other reason,
+                    # for example, incorrect argument format,raise the error directly
+                    raise
+                # event table accepts job name, not fully qualified name
+                db, schema, name = identifier.parse_schema_level_object_identifier(job_id)
+                db = db or session.get_current_database()
+                schema = schema or session.get_current_schema()
+                event_table_logs = _get_service_log_from_event_table(
+                    session,
+                    name,
+                    database=db,
+                    schema=schema,
+                    instance_id=instance_id if instance_id else 0,
+                    limit=limit,
+                )
+                if len(event_table_logs) == 0:
+                    raise RuntimeError(
+                        "No logs were found. Please verify that the database, schema, and job ID are correct."
+                    )
+                full_log = os.linesep.join(json.loads(row[0]) for row in event_table_logs)
     # If verbose is True, return the complete log
     if verbose:
@@ -338,47 +395,72 @@ def _get_head_instance_id(session: snowpark.Session, job_id: str) -> Optional[in
      Raises:
         RuntimeError: If the instances died or if some instances disappeared.
     """
     try:
-        rows = session.sql("SHOW SERVICE INSTANCES IN SERVICE IDENTIFIER(?)", params=(job_id,)).collect()
-    except SnowparkSQLException:
+        target_instances = _get_target_instances(session, job_id)
+    except errors.ProgrammingError:
+        # service may be deleted
+        raise RuntimeError("Couldn’t retrieve service information")
+    if target_instances == 1:
+        return 0
+    try:
+        rows = session._conn.run_query(
+            "SHOW SERVICE INSTANCES IN SERVICE IDENTIFIER(?)", params=(job_id,), _force_qmark_paramstyle=True
+        )
+    except errors.ProgrammingError:
         # service may be deleted
         raise RuntimeError("Couldn’t retrieve instances")
-    if not rows:
+    if not rows or not isinstance(rows, dict) or not rows.get("data"):
         return None
-    if _get_target_instances(session, job_id) > len(rows):
+    if target_instances > len(rows["data"]):
         raise RuntimeError("Couldn’t retrieve head instance due to missing instances.")
+    requested_attributes = query_helper.get_attribute_map(session, {"start_time": 8, "instance_id": 4})
     # Sort by start_time first, then by instance_id
     try:
-        sorted_instances = sorted(rows, key=lambda x: (x["start_time"], int(x["instance_id"])))
+        sorted_instances = sorted(
+            rows["data"],
+            key=lambda x: (x[requested_attributes["start_time"]], int(x[requested_attributes["instance_id"]])),
+        )
     except TypeError:
         raise RuntimeError("Job instance information unavailable.")
     head_instance = sorted_instances[0]
-    if not head_instance["start_time"]:
+    if not head_instance[requested_attributes["start_time"]]:
         # If head instance hasn't started yet, return None
         return None
     try:
-        return int(head_instance["instance_id"])
+        return int(head_instance[requested_attributes["instance_id"]])
     except (ValueError, TypeError):
         return 0
 def _get_service_log_from_event_table(
-    session: snowpark.Session, database: str, schema: str, name: str, limit: int, instance_id: Optional[int]
-) -> list[Row]:
+    session: snowpark.Session,
+    name: str,
+    database: Optional[str] = None,
+    schema: Optional[str] = None,
+    instance_id: Optional[int] = None,
+    limit: int = -1,
+) -> Any:
     params: list[Any] = [
-        database,
-        schema,
         name,
     ]
     query = [
         "SELECT VALUE FROM snowflake.telemetry.events_view",
-        'WHERE RESOURCE_ATTRIBUTES:"snow.database.name" = ?',
-        'AND RESOURCE_ATTRIBUTES:"snow.schema.name" = ?',
-        'AND RESOURCE_ATTRIBUTES:"snow.service.name" = ?',
+        'WHERE RESOURCE_ATTRIBUTES:"snow.service.name" = ?',
     ]
+    if database:
+        query.append('AND RESOURCE_ATTRIBUTES:"snow.database.name" = ?')
+        params.append(database)
+    if schema:
+        query.append('AND RESOURCE_ATTRIBUTES:"snow.schema.name" = ?')
+        params.append(schema)
     if instance_id:
         query.append('AND RESOURCE_ATTRIBUTES:"snow.service.container.instance" = ?')
@@ -391,20 +473,23 @@ def _get_service_log_from_event_table(
     if limit > 0:
         query.append("LIMIT ?")
         params.append(limit)
-    rows = session.sql(
-        "\n".join(line for line in query if line),
-        params=params,
-    ).collect()
-    return rows
+    rows = session._conn.run_query(
+        "\n".join(line for line in query if line), params=params, _force_qmark_paramstyle=True
+    )
+    if not rows or not isinstance(rows, dict) or not rows.get("data"):
+        return []
+    return rows["data"]
-def _get_service_info(session: snowpark.Session, job_id: str) -> Row:
-    (row,) = session.sql("DESCRIBE SERVICE IDENTIFIER(?)", params=(job_id,)).collect()
-    return row
+def _get_service_info(session: snowpark.Session, job_id: str) -> Any:
+    row = session._conn.run_query("DESCRIBE SERVICE IDENTIFIER(?)", params=(job_id,), _force_qmark_paramstyle=True)
+    # pass the type check
+    if not row or not isinstance(row, dict) or not row.get("data"):
+        raise errors.ProgrammingError("failed to retrieve service information")
+    return row["data"][0]
-def _get_compute_pool_info(session: snowpark.Session, compute_pool: str) -> Row:
+def _get_compute_pool_info(session: snowpark.Session, compute_pool: str) -> Any:
     """
     Check if the compute pool has enough available instances.
@@ -413,13 +498,64 @@ def _get_compute_pool_info(session: snowpark.Session, compute_pool: str) -> Row:
         compute_pool (str): The name of the compute pool.
     Returns:
-        Row: The compute pool information.
+        Any: The compute pool information.
+    Raises:
+        ValueError: If the compute pool is not found.
     """
-    (pool_info,) = session.sql("SHOW COMPUTE POOLS LIKE ?", params=(compute_pool,)).collect()
-    return pool_info
+    try:
+        compute_pool_info = session._conn.run_query(
+            "SHOW COMPUTE POOLS LIKE ?", params=(compute_pool,), _force_qmark_paramstyle=True
+        )
+        # pass the type check
+        if not compute_pool_info or not isinstance(compute_pool_info, dict) or not compute_pool_info.get("data"):
+            raise ValueError(f"Compute pool '{compute_pool}' not found")
+        return compute_pool_info["data"][0]
+    except ValueError as e:
+        if "not enough values to unpack" in str(e):
+            raise ValueError(f"Compute pool '{compute_pool}' not found")
+        raise
 @telemetry.send_api_usage_telemetry(project=_PROJECT, func_params_to_log=["job_id"])
 def _get_target_instances(session: snowpark.Session, job_id: str) -> int:
     row = _get_service_info(session, job_id)
-    return int(row["target_instances"]) if row["target_instances"] else 0
+    requested_attributes = query_helper.get_attribute_map(session, {"target_instances": 9})
+    return int(row[requested_attributes["target_instances"]])
+def _get_logs_spcs(
+    session: snowpark.Session,
+    fully_qualified_name: str,
+    limit: int = -1,
+    instance_id: Optional[int] = None,
+    container_name: Optional[str] = None,
+    start_time: Optional[str] = None,
+    end_time: Optional[str] = None,
+) -> list[Row]:
+    query = [
+        f"SELECT LOG FROM table({fully_qualified_name}!spcs_get_logs(",
+    ]
+    conditions_params = []
+    if start_time:
+        conditions_params.append(f"start_time => TO_TIMESTAMP_LTZ('{start_time}')")
+    if end_time:
+        conditions_params.append(f"end_time => TO_TIMESTAMP_LTZ('{end_time}')")
+    if len(conditions_params) > 0:
+        query.append(", ".join(conditions_params))
+    query.append("))")
+    query_params = []
+    if instance_id is not None:
+        query_params.append(f"INSTANCE_ID = {instance_id}")
+    if container_name:
+        query_params.append(f"CONTAINER_NAME = '{container_name}'")
+    if len(query_params) > 0:
+        query.append("WHERE " + " AND ".join(query_params))
+    query.append("ORDER BY TIMESTAMP ASC")
+    if limit > 0:
+        query.append(f" LIMIT {limit};")
+    rows = session.sql("\n".join(query)).collect()
+    return rows

snowflake-ml-python 1.8.5__py3-none-any.whl → 1.9.0__py3-none-any.whl

snowflake-ml-python 1.8.5py3-none-any.whl → 1.9.0py3-none-any.whl