PyPI - snowflake-ml-python - Versions diffs - 1.8.3__py3-none-any.whl → 1.8.5__py3-none-any.whl - Mend

snowflake-ml-python 1.8.3py3-none-any.whl → 1.8.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

snowflake/ml/jobs/decorators.py CHANGED Viewed

@@ -24,8 +24,11 @@ def remote(
     external_access_integrations: Optional[list[str]] = None,
     query_warehouse: Optional[str] = None,
     env_vars: Optional[dict[str, str]] = None,
-    num_instances: Optional[int] = None,
+    target_instances: int = 1,
+    min_instances: int = 1,
     enable_metrics: bool = False,
+    database: Optional[str] = None,
+    schema: Optional[str] = None,
     session: Optional[snowpark.Session] = None,
 ) -> Callable[[Callable[_Args, _ReturnValue]], Callable[_Args, jb.MLJob[_ReturnValue]]]:
     """
@@ -38,8 +41,12 @@ def remote(
         external_access_integrations: A list of external access integrations.
         query_warehouse: The query warehouse to use. Defaults to session warehouse.
         env_vars: Environment variables to set in container
-        num_instances: The number of nodes in the job. If none specified, create a single node job.
+        target_instances: The number of nodes in the job. If none specified, create a single node job.
+        min_instances: The minimum number of nodes required to start the job. If none specified, defaults to 1.
+            If set, the job will not start until the minimum number of nodes is available.
         enable_metrics: Whether to enable metrics publishing for the job.
+        database: The database to use for the job.
+        schema: The schema to use for the job.
         session: The Snowpark session to use. If none specified, uses active session.
     Returns:
@@ -65,8 +72,11 @@ def remote(
                 external_access_integrations=external_access_integrations,
                 query_warehouse=query_warehouse,
                 env_vars=env_vars,
-                num_instances=num_instances,
+                target_instances=target_instances,
+                min_instances=min_instances,
                 enable_metrics=enable_metrics,
+                database=database,
+                schema=schema,
                 session=session,
             )
             assert isinstance(job, jb.MLJob), f"Unexpected job type: {type(job)}"

snowflake/ml/jobs/job.py CHANGED Viewed

@@ -1,18 +1,25 @@
+import logging
+import os
 import time
+from functools import cached_property
 from typing import Any, Generic, Literal, Optional, TypeVar, Union, cast, overload
 import yaml
 from snowflake import snowpark
 from snowflake.ml._internal import telemetry
+from snowflake.ml._internal.utils import identifier
 from snowflake.ml.jobs._utils import constants, interop_utils, types
-from snowflake.snowpark import context as sp_context
+from snowflake.snowpark import Row, context as sp_context
+from snowflake.snowpark.exceptions import SnowparkSQLException
 _PROJECT = "MLJob"
 TERMINAL_JOB_STATUSES = {"FAILED", "DONE", "INTERNAL_ERROR"}
 T = TypeVar("T")
+logger = logging.getLogger(__name__)
 class MLJob(Generic[T]):
     def __init__(
@@ -28,6 +35,21 @@ class MLJob(Generic[T]):
         self._status: types.JOB_STATUS = "PENDING"
         self._result: Optional[interop_utils.ExecutionResult] = None
+    @cached_property
+    def name(self) -> str:
+        return identifier.parse_schema_level_object_identifier(self.id)[-1]
+    @cached_property
+    def target_instances(self) -> int:
+        return _get_target_instances(self._session, self.id)
+    @cached_property
+    def min_instances(self) -> int:
+        try:
+            return int(self._container_spec["env"].get(constants.MIN_INSTANCES_ENV_VAR, 1))
+        except TypeError:
+            return 1
     @property
     def id(self) -> str:
         """Get the unique job ID"""
@@ -41,6 +63,12 @@ class MLJob(Generic[T]):
             self._status = _get_status(self._session, self.id)
         return self._status
+    @cached_property
+    def _compute_pool(self) -> str:
+        """Get the job's compute pool name."""
+        row = _get_service_info(self._session, self.id)
+        return cast(str, row["compute_pool"])
     @property
     def _service_spec(self) -> dict[str, Any]:
         """Get the job's service spec."""
@@ -67,19 +95,38 @@ class MLJob(Generic[T]):
         """Get the job's result file location."""
         result_path = self._container_spec["env"].get(constants.RESULT_PATH_ENV_VAR)
         if result_path is None:
-            raise RuntimeError(f"Job {self.id} doesn't have a result path configured")
+            raise RuntimeError(f"Job {self.name} doesn't have a result path configured")
         return f"{self._stage_path}/{result_path}"
     @overload
-    def get_logs(self, limit: int = -1, instance_id: Optional[int] = None, *, as_list: Literal[True]) -> list[str]:
+    def get_logs(
+        self,
+        limit: int = -1,
+        instance_id: Optional[int] = None,
+        *,
+        as_list: Literal[True],
+        verbose: bool = constants.DEFAULT_VERBOSE_LOG,
+    ) -> list[str]:
         ...
     @overload
-    def get_logs(self, limit: int = -1, instance_id: Optional[int] = None, *, as_list: Literal[False] = False) -> str:
+    def get_logs(
+        self,
+        limit: int = -1,
+        instance_id: Optional[int] = None,
+        *,
+        as_list: Literal[False] = False,
+        verbose: bool = constants.DEFAULT_VERBOSE_LOG,
+    ) -> str:
         ...
     def get_logs(
-        self, limit: int = -1, instance_id: Optional[int] = None, *, as_list: bool = False
+        self,
+        limit: int = -1,
+        instance_id: Optional[int] = None,
+        *,
+        as_list: bool = False,
+        verbose: bool = constants.DEFAULT_VERBOSE_LOG,
     ) -> Union[str, list[str]]:
         """
         Return the job's execution logs.
@@ -89,17 +136,20 @@ class MLJob(Generic[T]):
             instance_id: Optional instance ID to get logs from a specific instance.
                          If not provided, returns logs from the head node.
             as_list: If True, returns logs as a list of lines. Otherwise, returns logs as a single string.
+            verbose: Whether to return the full log or just the user log.
         Returns:
             The job's execution logs.
         """
-        logs = _get_logs(self._session, self.id, limit, instance_id)
+        logs = _get_logs(self._session, self.id, limit, instance_id, verbose)
         assert isinstance(logs, str)  # mypy
         if as_list:
             return logs.splitlines()
         return logs
-    def show_logs(self, limit: int = -1, instance_id: Optional[int] = None) -> None:
+    def show_logs(
+        self, limit: int = -1, instance_id: Optional[int] = None, verbose: bool = constants.DEFAULT_VERBOSE_LOG
+    ) -> None:
         """
         Display the job's execution logs.
@@ -107,8 +157,9 @@ class MLJob(Generic[T]):
             limit: The maximum number of lines to display. Negative values are treated as no limit.
             instance_id: Optional instance ID to get logs from a specific instance.
                          If not provided, displays logs from the head node.
+            verbose: Whether to return the full log or just the user log.
         """
-        print(self.get_logs(limit, instance_id, as_list=False))  # noqa: T201: we need to print here.
+        print(self.get_logs(limit, instance_id, as_list=False, verbose=verbose))  # noqa: T201: we need to print here.
     @telemetry.send_api_usage_telemetry(project=_PROJECT, func_params_to_log=["timeout"])
     def wait(self, timeout: float = -1) -> types.JOB_STATUS:
@@ -126,9 +177,18 @@ class MLJob(Generic[T]):
         """
         delay = constants.JOB_POLL_INITIAL_DELAY_SECONDS  # Start with 100ms delay
         start_time = time.monotonic()
-        while self.status not in TERMINAL_JOB_STATUSES:
+        warning_shown = False
+        while (status := self.status) not in TERMINAL_JOB_STATUSES:
+            if status == "PENDING" and not warning_shown:
+                pool_info = _get_compute_pool_info(self._session, self._compute_pool)
+                if (pool_info.max_nodes - pool_info.active_nodes) < self.min_instances:
+                    logger.warning(
+                        f"Compute pool busy ({pool_info.active_nodes}/{pool_info.max_nodes} nodes in use)."
+                        " Job execution may be delayed."
+                    )
+                    warning_shown = True
             if timeout >= 0 and (elapsed := time.monotonic() - start_time) >= timeout:
-                raise TimeoutError(f"Job {self.id} did not complete within {elapsed} seconds")
+                raise TimeoutError(f"Job {self.name} did not complete within {elapsed} seconds")
             time.sleep(delay)
             delay = min(delay * 2, constants.JOB_POLL_MAX_DELAY_SECONDS)  # Exponential backoff
         return self.status
@@ -154,11 +214,11 @@ class MLJob(Generic[T]):
             try:
                 self._result = interop_utils.fetch_result(self._session, self._result_path)
             except Exception as e:
-                raise RuntimeError(f"Failed to retrieve result for job (id={self.id})") from e
+                raise RuntimeError(f"Failed to retrieve result for job (id={self.name})") from e
         if self._result.success:
             return cast(T, self._result.result)
-        raise RuntimeError(f"Job execution failed (id={self.id})") from self._result.exception
+        raise RuntimeError(f"Job execution failed (id={self.name})") from self._result.exception
 @telemetry.send_api_usage_telemetry(project=_PROJECT, func_params_to_log=["job_id", "instance_id"])
@@ -172,19 +232,21 @@ def _get_status(session: snowpark.Session, job_id: str, instance_id: Optional[in
                 return cast(types.JOB_STATUS, row["status"])
         raise ValueError(f"Instance {instance_id} not found in job {job_id}")
     else:
-        (row,) = session.sql("DESCRIBE SERVICE IDENTIFIER(?)", params=(job_id,)).collect()
+        row = _get_service_info(session, job_id)
         return cast(types.JOB_STATUS, row["status"])
 @telemetry.send_api_usage_telemetry(project=_PROJECT, func_params_to_log=["job_id"])
 def _get_service_spec(session: snowpark.Session, job_id: str) -> dict[str, Any]:
     """Retrieve job execution service spec."""
-    (row,) = session.sql("DESCRIBE SERVICE IDENTIFIER(?)", params=[job_id]).collect()
+    row = _get_service_info(session, job_id)
     return cast(dict[str, Any], yaml.safe_load(row["spec"]))
 @telemetry.send_api_usage_telemetry(project=_PROJECT, func_params_to_log=["job_id", "limit", "instance_id"])
-def _get_logs(session: snowpark.Session, job_id: str, limit: int = -1, instance_id: Optional[int] = None) -> str:
+def _get_logs(
+    session: snowpark.Session, job_id: str, limit: int = -1, instance_id: Optional[int] = None, verbose: bool = True
+) -> str:
     """
     Retrieve the job's execution logs.
@@ -193,13 +255,20 @@ def _get_logs(session: snowpark.Session, job_id: str, limit: int = -1, instance_
         limit: The maximum number of lines to return. Negative values are treated as no limit.
         session: The Snowpark session to use. If none specified, uses active session.
         instance_id: Optional instance ID to get logs from a specific instance.
+        verbose: Whether to return the full log or just the portion between START and END messages.
     Returns:
         The job's execution logs.
+    Raises:
+        RuntimeError: if failed to get head instance_id
     """
     # If instance_id is not specified, try to get the head instance ID
     if instance_id is None:
-        instance_id = _get_head_instance_id(session, job_id)
+        try:
+            instance_id = _get_head_instance_id(session, job_id)
+        except RuntimeError:
+            instance_id = None
     # Assemble params: [job_id, instance_id, container_name, (optional) limit]
     params: list[Any] = [
@@ -209,12 +278,50 @@ def _get_logs(session: snowpark.Session, job_id: str, limit: int = -1, instance_
     ]
     if limit > 0:
         params.append(limit)
+    try:
+        (row,) = session.sql(
+            f"SELECT SYSTEM$GET_SERVICE_LOGS(?, ?, ?{f', ?' if limit > 0 else ''})",
+            params=params,
+        ).collect()
+    except SnowparkSQLException as e:
+        if "Container Status: PENDING" in e.message:
+            logger.warning("Waiting for container to start. Logs will be shown when available.")
+            return ""
+        else:
+            # event table accepts job name, not fully qualified name
+            # cast is to resolve the type check error
+            db, schema, name = identifier.parse_schema_level_object_identifier(job_id)
+            db = cast(str, db or session.get_current_database())
+            schema = cast(str, schema or session.get_current_schema())
+            logs = _get_service_log_from_event_table(
+                session, db, schema, name, limit, instance_id if instance_id else None
+            )
+            if len(logs) == 0:
+                raise RuntimeError(
+                    "No logs were found. Please verify that the database, schema, and job ID are correct."
+                )
+            return os.linesep.join(row[0] for row in logs)
+    full_log = str(row[0])
+    # If verbose is True, return the complete log
+    if verbose:
+        return full_log
+    # Otherwise, extract only the portion between LOG_START_MSG and LOG_END_MSG
+    start_idx = full_log.find(constants.LOG_START_MSG)
+    if start_idx != -1:
+        start_idx += len(constants.LOG_START_MSG)
+    else:
+        # If start message not found, start from the beginning
+        start_idx = 0
-    (row,) = session.sql(
-        f"SELECT SYSTEM$GET_SERVICE_LOGS(?, ?, ?{f', ?' if limit > 0 else ''})",
-        params=params,
-    ).collect()
-    return str(row[0])
+    end_idx = full_log.find(constants.LOG_END_MSG, start_idx)
+    if end_idx == -1:
+        # If end message not found, return everything after start
+        end_idx = len(full_log)
+    return full_log[start_idx:end_idx].strip()
 @telemetry.send_api_usage_telemetry(project=_PROJECT, func_params_to_log=["job_id"])
@@ -223,18 +330,31 @@ def _get_head_instance_id(session: snowpark.Session, job_id: str) -> Optional[in
     Retrieve the head instance ID of a job.
     Args:
-        session: The Snowpark session to use.
-        job_id: The job ID.
+        session (Session): The Snowpark session to use.
+        job_id (str): The job ID.
     Returns:
-        The head instance ID of the job. Returns None if the head instance has not started yet.
+        Optional[int]: The head instance ID of the job, or None if the head instance has not started yet.
+     Raises:
+        RuntimeError: If the instances died or if some instances disappeared.
     """
-    rows = session.sql("SHOW SERVICE INSTANCES IN SERVICE IDENTIFIER(?)", params=(job_id,)).collect()
+    try:
+        rows = session.sql("SHOW SERVICE INSTANCES IN SERVICE IDENTIFIER(?)", params=(job_id,)).collect()
+    except SnowparkSQLException:
+        # service may be deleted
+        raise RuntimeError("Couldn’t retrieve instances")
     if not rows:
         return None
+    if _get_target_instances(session, job_id) > len(rows):
+        raise RuntimeError("Couldn’t retrieve head instance due to missing instances.")
     # Sort by start_time first, then by instance_id
-    sorted_instances = sorted(rows, key=lambda x: (x["start_time"], int(x["instance_id"])))
+    try:
+        sorted_instances = sorted(rows, key=lambda x: (x["start_time"], int(x["instance_id"])))
+    except TypeError:
+        raise RuntimeError("Job instance information unavailable.")
     head_instance = sorted_instances[0]
     if not head_instance["start_time"]:
         # If head instance hasn't started yet, return None
@@ -243,3 +363,63 @@ def _get_head_instance_id(session: snowpark.Session, job_id: str) -> Optional[in
         return int(head_instance["instance_id"])
     except (ValueError, TypeError):
         return 0
+def _get_service_log_from_event_table(
+    session: snowpark.Session, database: str, schema: str, name: str, limit: int, instance_id: Optional[int]
+) -> list[Row]:
+    params: list[Any] = [
+        database,
+        schema,
+        name,
+    ]
+    query = [
+        "SELECT VALUE FROM snowflake.telemetry.events_view",
+        'WHERE RESOURCE_ATTRIBUTES:"snow.database.name" = ?',
+        'AND RESOURCE_ATTRIBUTES:"snow.schema.name" = ?',
+        'AND RESOURCE_ATTRIBUTES:"snow.service.name" = ?',
+    ]
+    if instance_id:
+        query.append('AND RESOURCE_ATTRIBUTES:"snow.service.container.instance" = ?')
+        params.append(instance_id)
+    query.append("AND RECORD_TYPE = 'LOG'")
+    # sort by TIMESTAMP; although OBSERVED_TIMESTAMP is for log, it is NONE currently when record_type is log
+    query.append("ORDER BY TIMESTAMP")
+    if limit > 0:
+        query.append("LIMIT ?")
+        params.append(limit)
+    rows = session.sql(
+        "\n".join(line for line in query if line),
+        params=params,
+    ).collect()
+    return rows
+def _get_service_info(session: snowpark.Session, job_id: str) -> Row:
+    (row,) = session.sql("DESCRIBE SERVICE IDENTIFIER(?)", params=(job_id,)).collect()
+    return row
+def _get_compute_pool_info(session: snowpark.Session, compute_pool: str) -> Row:
+    """
+    Check if the compute pool has enough available instances.
+    Args:
+        session (Session): The Snowpark session to use.
+        compute_pool (str): The name of the compute pool.
+    Returns:
+        Row: The compute pool information.
+    """
+    (pool_info,) = session.sql("SHOW COMPUTE POOLS LIKE ?", params=(compute_pool,)).collect()
+    return pool_info
+@telemetry.send_api_usage_telemetry(project=_PROJECT, func_params_to_log=["job_id"])
+def _get_target_instances(session: snowpark.Session, job_id: str) -> int:
+    row = _get_service_info(session, job_id)
+    return int(row["target_instances"]) if row["target_instances"] else 0

snowflake/ml/jobs/manager.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import pathlib
 import textwrap
-from typing import Any, Callable, Literal, Optional, TypeVar, Union, overload
+from typing import Any, Callable, Literal, Optional, TypeVar, Union, cast, overload
 from uuid import uuid4
 import yaml
@@ -52,7 +52,7 @@ def list_jobs(
         query += f" LIMIT {limit}"
     df = session.sql(query)
     df = df.select(
-        df['"name"'].alias('"id"'),
+        df['"name"'],
         df['"owner"'],
         df['"status"'],
         df['"created_on"'],
@@ -65,16 +65,16 @@ def list_jobs(
 def get_job(job_id: str, session: Optional[snowpark.Session] = None) -> jb.MLJob[Any]:
     """Retrieve a job service from the backend."""
     session = session or get_active_session()
     try:
-        # Validate job_id
-        job_id = identifier.resolve_identifier(job_id)
+        database, schema, job_name = identifier.parse_schema_level_object_identifier(job_id)
+        database = identifier.resolve_identifier(cast(str, database or session.get_current_database()))
+        schema = identifier.resolve_identifier(cast(str, schema or session.get_current_schema()))
     except ValueError as e:
         raise ValueError(f"Invalid job ID: {job_id}") from e
+    job_id = f"{database}.{schema}.{job_name}"
     try:
         # Validate that job exists by doing a status check
-        # FIXME: Retrieve return path
         job = jb.MLJob[Any](job_id, session=session)
         _ = job.status
         return job
@@ -108,8 +108,11 @@ def submit_file(
     external_access_integrations: Optional[list[str]] = None,
     query_warehouse: Optional[str] = None,
     spec_overrides: Optional[dict[str, Any]] = None,
-    num_instances: Optional[int] = None,
+    target_instances: int = 1,
+    min_instances: int = 1,
     enable_metrics: bool = False,
+    database: Optional[str] = None,
+    schema: Optional[str] = None,
     session: Optional[snowpark.Session] = None,
 ) -> jb.MLJob[None]:
     """
@@ -125,8 +128,11 @@ def submit_file(
         external_access_integrations: A list of external access integrations.
         query_warehouse: The query warehouse to use. Defaults to session warehouse.
         spec_overrides: Custom service specification overrides to apply.
-        num_instances: The number of instances to use for the job. If none specified, single node job is created.
+        target_instances: The number of instances to use for the job. If none specified, single node job is created.
+        min_instances: The minimum number of nodes required to start the job. If none specified, defaults to 1.
         enable_metrics: Whether to enable metrics publishing for the job.
+        database: The database to use.
+        schema: The schema to use.
         session: The Snowpark session to use. If none specified, uses active session.
     Returns:
@@ -142,8 +148,11 @@ def submit_file(
         external_access_integrations=external_access_integrations,
         query_warehouse=query_warehouse,
         spec_overrides=spec_overrides,
-        num_instances=num_instances,
+        target_instances=target_instances,
+        min_instances=min_instances,
         enable_metrics=enable_metrics,
+        database=database,
+        schema=schema,
         session=session,
     )
@@ -161,8 +170,11 @@ def submit_directory(
     external_access_integrations: Optional[list[str]] = None,
     query_warehouse: Optional[str] = None,
     spec_overrides: Optional[dict[str, Any]] = None,
-    num_instances: Optional[int] = None,
+    target_instances: int = 1,
+    min_instances: int = 1,
     enable_metrics: bool = False,
+    database: Optional[str] = None,
+    schema: Optional[str] = None,
     session: Optional[snowpark.Session] = None,
 ) -> jb.MLJob[None]:
     """
@@ -179,8 +191,11 @@ def submit_directory(
         external_access_integrations: A list of external access integrations.
         query_warehouse: The query warehouse to use. Defaults to session warehouse.
         spec_overrides: Custom service specification overrides to apply.
-        num_instances: The number of instances to use for the job. If none specified, single node job is created.
+        target_instances: The number of instances to use for the job. If none specified, single node job is created.
+        min_instances: The minimum number of nodes required to start the job. If none specified, defaults to 1.
         enable_metrics: Whether to enable metrics publishing for the job.
+        database: The database to use.
+        schema: The schema to use.
         session: The Snowpark session to use. If none specified, uses active session.
     Returns:
@@ -197,8 +212,11 @@ def submit_directory(
         external_access_integrations=external_access_integrations,
         query_warehouse=query_warehouse,
         spec_overrides=spec_overrides,
-        num_instances=num_instances,
+        target_instances=target_instances,
+        min_instances=min_instances,
         enable_metrics=enable_metrics,
+        database=database,
+        schema=schema,
         session=session,
     )
@@ -216,8 +234,11 @@ def _submit_job(
     external_access_integrations: Optional[list[str]] = None,
     query_warehouse: Optional[str] = None,
     spec_overrides: Optional[dict[str, Any]] = None,
-    num_instances: Optional[int] = None,
+    target_instances: int = 1,
+    min_instances: int = 1,
     enable_metrics: bool = False,
+    database: Optional[str] = None,
+    schema: Optional[str] = None,
     session: Optional[snowpark.Session] = None,
 ) -> jb.MLJob[None]:
     ...
@@ -236,8 +257,11 @@ def _submit_job(
     external_access_integrations: Optional[list[str]] = None,
     query_warehouse: Optional[str] = None,
     spec_overrides: Optional[dict[str, Any]] = None,
-    num_instances: Optional[int] = None,
+    target_instances: int = 1,
+    min_instances: int = 1,
     enable_metrics: bool = False,
+    database: Optional[str] = None,
+    schema: Optional[str] = None,
     session: Optional[snowpark.Session] = None,
 ) -> jb.MLJob[T]:
     ...
@@ -251,7 +275,7 @@ def _submit_job(
         # TODO: Log lengths of args, env_vars, and spec_overrides values
         "pip_requirements",
         "external_access_integrations",
-        "num_instances",
+        "target_instances",
         "enable_metrics",
     ],
 )
@@ -267,8 +291,11 @@ def _submit_job(
     external_access_integrations: Optional[list[str]] = None,
     query_warehouse: Optional[str] = None,
     spec_overrides: Optional[dict[str, Any]] = None,
-    num_instances: Optional[int] = None,
+    target_instances: int = 1,
+    min_instances: int = 1,
     enable_metrics: bool = False,
+    database: Optional[str] = None,
+    schema: Optional[str] = None,
     session: Optional[snowpark.Session] = None,
 ) -> jb.MLJob[T]:
     """
@@ -285,8 +312,11 @@ def _submit_job(
         external_access_integrations: A list of external access integrations.
         query_warehouse: The query warehouse to use. Defaults to session warehouse.
         spec_overrides: Custom service specification overrides to apply.
-        num_instances: The number of instances to use for the job. If none specified, single node job is created.
+        target_instances: The number of instances to use for the job. If none specified, single node job is created.
+        min_instances: The minimum number of nodes required to start the job. If none specified, defaults to 1.
         enable_metrics: Whether to enable metrics publishing for the job.
+        database: The database to use.
+        schema: The schema to use.
         session: The Snowpark session to use. If none specified, uses active session.
     Returns:
@@ -294,17 +324,27 @@ def _submit_job(
     Raises:
         RuntimeError: If required Snowflake features are not enabled.
+        ValueError: If database or schema value(s) are invalid
     """
-    # Display warning about PrPr parameters
-    if num_instances is not None:
-        logger.warning(
-            "_submit_job() parameter 'num_instances' is in private preview since 1.8.2. Do not use it in production.",
-        )
+    if database and not schema:
+        raise ValueError("Schema must be specified if database is specified.")
+    if target_instances < 1 or min_instances < 1:
+        raise ValueError("target_instances and min_instances must be greater than 0.")
+    if min_instances > target_instances:
+        raise ValueError("min_instances must be less than or equal to target_instances.")
     session = session or get_active_session()
-    job_id = f"{JOB_ID_PREFIX}{str(uuid4()).replace('-', '_').upper()}"
-    stage_name = "@" + stage_name.lstrip("@").rstrip("/")
-    stage_path = pathlib.PurePosixPath(f"{stage_name}/{job_id}")
+    # Validate database and schema identifiers on client side since
+    # SQL parser for EXECUTE JOB SERVICE seems to struggle with this
+    database = identifier.resolve_identifier(cast(str, database or session.get_current_database()))
+    schema = identifier.resolve_identifier(cast(str, schema or session.get_current_schema()))
+    job_name = f"{JOB_ID_PREFIX}{str(uuid4()).replace('-', '_').upper()}"
+    job_id = f"{database}.{schema}.{job_name}"
+    stage_path_parts = identifier.parse_snowflake_stage_path(stage_name.lstrip("@"))
+    stage_name = f"@{'.'.join(filter(None, stage_path_parts[:3]))}"
+    stage_path = pathlib.PurePosixPath(f"{stage_name}{stage_path_parts[-1].rstrip('/')}/{job_name}")
     # Upload payload
     uploaded_payload = payload_utils.JobPayload(
@@ -319,7 +359,8 @@ def _submit_job(
         compute_pool=compute_pool,
         payload=uploaded_payload,
         args=args,
-        num_instances=num_instances,
+        target_instances=target_instances,
+        min_instances=min_instances,
         enable_metrics=enable_metrics,
     )
     spec_overrides = spec_utils.generate_spec_overrides(
@@ -331,31 +372,34 @@ def _submit_job(
     # Generate SQL command for job submission
     query_template = textwrap.dedent(
-        f"""\
+        """\
         EXECUTE JOB SERVICE
-        IN COMPUTE POOL {compute_pool}
+        IN COMPUTE POOL IDENTIFIER(?)
         FROM SPECIFICATION $$
-        {{}}
+        {}
         $$
-        NAME = {job_id}
+        NAME = IDENTIFIER(?)
         ASYNC = TRUE
         """
     )
+    params: list[Any] = [compute_pool, job_id]
     query = query_template.format(yaml.dump(spec)).splitlines()
     if external_access_integrations:
         external_access_integration_list = ",".join(f"{e}" for e in external_access_integrations)
         query.append(f"EXTERNAL_ACCESS_INTEGRATIONS = ({external_access_integration_list})")
     query_warehouse = query_warehouse or session.get_current_warehouse()
     if query_warehouse:
-        query.append(f"QUERY_WAREHOUSE = {query_warehouse}")
-    if num_instances:
-        query.append(f"REPLICAS = {num_instances}")
+        query.append("QUERY_WAREHOUSE = IDENTIFIER(?)")
+        params.append(query_warehouse)
+    if target_instances > 1:
+        query.append("REPLICAS = ?")
+        params.append(target_instances)
     # Submit job
     query_text = "\n".join(line for line in query if line)
     try:
-        _ = session.sql(query_text).collect()
+        _ = session.sql(query_text, params=params).collect()
     except SnowparkSQLException as e:
         if "invalid property 'ASYNC'" in e.message:
             raise RuntimeError(

snowflake-ml-python 1.8.3__py3-none-any.whl → 1.8.5__py3-none-any.whl

snowflake-ml-python 1.8.3py3-none-any.whl → 1.8.5py3-none-any.whl