PyPI - snowflake-ml-python - Versions diffs - 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl - Mend

snowflake-ml-python 1.8.2py3-none-any.whl → 1.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (322) hide show

snowflake/ml/fileset/sfcfs.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import collections
 import logging
 from functools import partial
-from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union, cast
+from typing import Any, Callable, Optional, Union, cast
 import fsspec
@@ -100,7 +100,7 @@ class SFFileSystem(fsspec.AbstractFileSystem):
             raise ValueError("Either sf_connection or snowpark_session has to be non-empty!")
         self._conn = self._session._conn._conn  # Telemetry wrappers expect connection under `conn_attr_name="_conn"``
         self._kwargs = kwargs
-        self._stage_fs_set: Dict[Tuple[str, str, str], stage_fs.SFStageFileSystem] = {}
+        self._stage_fs_set: dict[tuple[str, str, str], stage_fs.SFStageFileSystem] = {}
         super().__init__(**kwargs)
@@ -133,7 +133,7 @@ class SFFileSystem(fsspec.AbstractFileSystem):
         assert isinstance(session, snowpark.Session)
         return session
-    def __reduce__(self) -> Tuple[Callable[[], Type["SFFileSystem"]], Tuple[()], Dict[str, Any]]:
+    def __reduce__(self) -> tuple[Callable[[], type["SFFileSystem"]], tuple[()], dict[str, Any]]:
         """Returns a state dictionary for use in serialization.
         Returns:
@@ -145,7 +145,7 @@ class SFFileSystem(fsspec.AbstractFileSystem):
         return partial(self.__class__, **{_RECREATE_FROM_SERIALIZED: True}), (), state_dictionary
-    def __setstate__(self, state_dict: Dict[str, Any]) -> None:
+    def __setstate__(self, state_dict: dict[str, Any]) -> None:
         """Sets the dictionary state at deserialization time, and rebuilds a snowflake connection.
         Args:
@@ -191,7 +191,7 @@ class SFFileSystem(fsspec.AbstractFileSystem):
         func_params_to_log=["detail"],
         conn_attr_name="_conn",
     )
-    def ls(self, path: str, detail: bool = False, **kwargs: Any) -> Union[List[str], List[Dict[str, Any]]]:
+    def ls(self, path: str, detail: bool = False, **kwargs: Any) -> Union[list[str], list[dict[str, Any]]]:
         """Override fsspec `ls` method. List single "directory" with or without details.
         Args:
@@ -214,14 +214,14 @@ class SFFileSystem(fsspec.AbstractFileSystem):
         file_path = self._parse_file_path(path)
         stage_fs = self._get_stage_fs(file_path)
         stage_path_list = stage_fs.ls(file_path.filepath, detail=True, **kwargs)
-        stage_path_list = cast(List[Dict[str, Any]], stage_path_list)
+        stage_path_list = cast(list[dict[str, Any]], stage_path_list)
         return self._decorate_ls_res(stage_fs, stage_path_list, detail)
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,
         conn_attr_name="_conn",
     )
-    def optimize_read(self, files: Optional[List[str]] = None) -> None:
+    def optimize_read(self, files: Optional[list[str]] = None) -> None:
         """Prefetch and cache the presigned urls for all the given files to speed up the file opening.
         All the files introduced here will have their urls cached. Further open() on any of cached urls will lead to a
@@ -232,8 +232,8 @@ class SFFileSystem(fsspec.AbstractFileSystem):
         """
         if not files:
             return
-        stage_fs_dict: Dict[str, stage_fs.SFStageFileSystem] = {}
-        stage_file_paths: Dict[str, List[str]] = collections.defaultdict(list)
+        stage_fs_dict: dict[str, stage_fs.SFStageFileSystem] = {}
+        stage_file_paths: dict[str, list[str]] = collections.defaultdict(list)
         for file in files:
             path_info = self._parse_file_path(file)
             fs = self._get_stage_fs(path_info)
@@ -271,11 +271,11 @@ class SFFileSystem(fsspec.AbstractFileSystem):
         project=_PROJECT,
         conn_attr_name="_conn",
     )
-    def info(self, path: str, **kwargs: Any) -> Dict[str, Any]:
+    def info(self, path: str, **kwargs: Any) -> dict[str, Any]:
         """Override fsspec `info` method. Give details of entry at path."""
         file_path = self._parse_file_path(path)
         stage_fs = self._get_stage_fs(file_path)
-        res: Dict[str, Any] = stage_fs.info(file_path.filepath, **kwargs)
+        res: dict[str, Any] = stage_fs.info(file_path.filepath, **kwargs)
         if res:
             res["name"] = self._stage_path_to_absolute_path(stage_fs, res["name"])
         return res
@@ -283,9 +283,9 @@ class SFFileSystem(fsspec.AbstractFileSystem):
     def _decorate_ls_res(
         self,
         stage_fs: stage_fs.SFStageFileSystem,
-        stage_path_list: List[Dict[str, Any]],
+        stage_path_list: list[dict[str, Any]],
         detail: bool,
-    ) -> Union[List[str], List[Dict[str, Any]]]:
+    ) -> Union[list[str], list[dict[str, Any]]]:
         """Add the stage location as the prefix of file names returned by ls() of stagefs"""
         for path in stage_path_list:
             path["name"] = self._stage_path_to_absolute_path(stage_fs, path["name"])

snowflake/ml/fileset/stage_fs.py CHANGED Viewed

@@ -2,7 +2,7 @@ import inspect
 import logging
 import time
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple, Union, cast
+from typing import Any, Optional, Union, cast
 import fsspec
 from fsspec.implementations import http as httpfs
@@ -44,7 +44,7 @@ class _PresignedUrl:
         return not self.expire_at or time.time() > self.expire_at - headroom_sec
-def _get_httpfs_kwargs(**kwargs: Any) -> Dict[str, Any]:
+def _get_httpfs_kwargs(**kwargs: Any) -> dict[str, Any]:
     """Extract kwargs that are meaningful to HTTPFileSystem."""
     httpfs_related_keys = [
         "block_size",
@@ -124,7 +124,7 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
         self._db = db
         self._schema = schema
         self._stage = stage
-        self._url_cache: Dict[str, _PresignedUrl] = {}
+        self._url_cache: dict[str, _PresignedUrl] = {}
         httpfs_kwargs = _get_httpfs_kwargs(**kwargs)
         self._fs = httpfs.HTTPFileSystem(**httpfs_kwargs)
@@ -145,7 +145,7 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
         project=_PROJECT,
         func_params_to_log=["detail"],
     )
-    def ls(self, path: str, detail: bool = False) -> Union[List[str], List[Dict[str, Any]]]:
+    def ls(self, path: str, detail: bool = False) -> Union[list[str], list[dict[str, Any]]]:
         """Override fsspec `ls` method. List single "directory" with or without details.
         Args:
@@ -169,7 +169,7 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
             loc = self.stage_name
             path = path.lstrip("/")
             async_job: snowpark.AsyncJob = self._session.sql(f"LIST '{loc}/{path}'").collect(block=False)
-            objects: List[snowpark.Row] = _resolve_async_job(async_job)
+            objects: list[snowpark.Row] = _resolve_async_job(async_job)
         except snowpark_exceptions.SnowparkSQLException as e:
             if e.sql_error_code == fileset_errors.ERRNO_DOMAIN_NOT_EXIST:
                 raise snowml_exceptions.SnowflakeMLException(
@@ -192,7 +192,7 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,
     )
-    def optimize_read(self, files: Optional[List[str]] = None) -> None:
+    def optimize_read(self, files: Optional[list[str]] = None) -> None:
         """Prefetch and cache the presigned urls for all the given files to speed up the read performance.
         All the files introduced here will have their urls cached. Further open() on any of cached urls will lead to a
@@ -271,7 +271,7 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
                 original_exception=fileset_errors.StageFileNotFoundError(f"Stage file {path} doesn't exist."),
             )
-    def _open_with_snowpark(self, path: str, **kwargs: Dict[str, Any]) -> fsspec.spec.AbstractBufferedFile:
+    def _open_with_snowpark(self, path: str, **kwargs: dict[str, Any]) -> fsspec.spec.AbstractBufferedFile:
         """Open the a file for reading using snowflake.snowpark.file_operation
         Args:
@@ -299,7 +299,7 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
                     original_exception=e,
                 )
-    def _parse_list_result(self, list_result: List[snowpark.Row], search_path: str) -> List[Dict[str, Any]]:
+    def _parse_list_result(self, list_result: list[snowpark.Row], search_path: str) -> list[dict[str, Any]]:
         """Convert the result from LIST query to the expected format of fsspec ls() method.
         Note that Snowflake LIST query has different behavior with ls(). LIST query will return all the stage files
@@ -318,7 +318,7 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
         Returns:
             A list of dict, where each dict contains key-value pairs as the properties of a file.
         """
-        files: Dict[str, Dict[str, Any]] = {}
+        files: dict[str, dict[str, Any]] = {}
         search_path = search_path.strip("/")
         for row in list_result:
             name, size, md5, last_modified = row["name"], row["size"], row["md5"], row["last_modified"]
@@ -360,7 +360,7 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
     def _add_file_info_helper(
         self,
-        files: Dict[str, Dict[str, Any]],
+        files: dict[str, dict[str, Any]],
         object_path: str,
         file_size: int,
         file_type: str,
@@ -379,12 +379,12 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
         )
     def _fetch_presigned_urls(
-        self, files: List[str], url_lifetime: float = _PRESIGNED_URL_LIFETIME_SEC
-    ) -> List[Tuple[str, str]]:
+        self, files: list[str], url_lifetime: float = _PRESIGNED_URL_LIFETIME_SEC
+    ) -> list[tuple[str, str]]:
         """Fetch presigned urls for the given files."""
         file_df = self._session.create_dataframe(files).to_df("name")
         try:
-            presigned_urls: List[Tuple[str, str]] = file_df.select_expr(
+            presigned_urls: list[tuple[str, str]] = file_df.select_expr(
                 f"name, get_presigned_url('{self.stage_name}', name, {url_lifetime}) as url"
             ).collect(
                 statement_params=telemetry.get_function_usage_statement_params(
@@ -418,10 +418,10 @@ def _match_error_code(ex: snowpark_exceptions.SnowparkSQLException, error_code:
 @snowflake_plan.SnowflakePlan.Decorator.wrap_exception  # type: ignore[misc]
-def _resolve_async_job(async_job: snowpark.AsyncJob) -> List[snowpark.Row]:
+def _resolve_async_job(async_job: snowpark.AsyncJob) -> list[snowpark.Row]:
     # Make sure Snowpark exceptions are properly caught and converted by wrap_exception wrapper
     try:
-        query_result = cast(List[snowpark.Row], async_job.result("row"))
+        query_result = cast(list[snowpark.Row], async_job.result("row"))
         return query_result
     except snowpark_errors.DatabaseError as e:
         # HACK: Snowpark surfaces a generic exception if query doesn't complete immediately

snowflake/ml/jobs/_utils/constants.py CHANGED Viewed

@@ -13,7 +13,7 @@ STAGE_VOLUME_MOUNT_PATH = "/mnt/app"
 DEFAULT_IMAGE_REPO = "/snowflake/images/snowflake_images"
 DEFAULT_IMAGE_CPU = "st_plat/runtime/x86/runtime_image/snowbooks"
 DEFAULT_IMAGE_GPU = "st_plat/runtime/x86/generic_gpu/runtime_image/snowbooks"
-DEFAULT_IMAGE_TAG = "1.0.1"
+DEFAULT_IMAGE_TAG = "1.2.3"
 DEFAULT_ENTRYPOINT_PATH = "func.py"
 # Percent of container memory to allocate for /dev/shm volume

snowflake/ml/jobs/_utils/interop_utils.py CHANGED Viewed

@@ -10,7 +10,7 @@ import traceback
 from collections import namedtuple
 from dataclasses import dataclass
 from types import TracebackType
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union, cast
+from typing import Any, Callable, Optional, Union, cast
 from snowflake import snowpark
 from snowflake.snowpark import exceptions as sp_exceptions
@@ -33,7 +33,7 @@ class ExecutionResult:
     def success(self) -> bool:
         return self.exception is None
-    def to_dict(self) -> Dict[str, Any]:
+    def to_dict(self) -> dict[str, Any]:
         """Return the serializable dictionary."""
         if isinstance(self.exception, BaseException):
             exc_type = type(self.exception)
@@ -50,7 +50,7 @@ class ExecutionResult:
         }
     @classmethod
-    def from_dict(cls, result_dict: Dict[str, Any]) -> "ExecutionResult":
+    def from_dict(cls, result_dict: dict[str, Any]) -> "ExecutionResult":
         if not isinstance(result_dict.get("success"), bool):
             raise ValueError("Invalid result dictionary")
@@ -242,11 +242,11 @@ def _install_sys_excepthook() -> None:
         original_excepthook = sys.excepthook
         def custom_excepthook(
-            exc_type: Type[BaseException],
+            exc_type: type[BaseException],
             exc_value: BaseException,
             exc_tb: Optional[TracebackType],
             *,
-            seen_exc_ids: Optional[Set[int]] = None,
+            seen_exc_ids: Optional[set[int]] = None,
         ) -> None:
             if seen_exc_ids is None:
                 seen_exc_ids = set()
@@ -331,7 +331,7 @@ def _install_ipython_hook() -> bool:
     except ImportError:
         return False
-    def parse_traceback_str(traceback_str: str) -> List[Tuple[str, int, str, str]]:
+    def parse_traceback_str(traceback_str: str) -> list[tuple[str, int, str, str]]:
         return [
             (m.group("filename"), int(m.group("lineno")), m.group("name"), m.group("line"))
             for m in re.finditer(_TRACEBACK_ENTRY_PATTERN, traceback_str)
@@ -342,13 +342,13 @@ def _install_ipython_hook() -> bool:
         def custom_format_exception_as_a_whole(
             self: VerboseTB,
-            etype: Type[BaseException],
+            etype: type[BaseException],
             evalue: Optional[BaseException],
             etb: Optional[TracebackType],
             number_of_lines_of_context: int,
             tb_offset: Optional[int],
             **kwargs: Any,
-        ) -> List[List[str]]:
+        ) -> list[list[str]]:
             if (remote_err := _retrieve_remote_error_info(evalue)) and isinstance(remote_err, RemoteError):
                 # Implementation forked from IPython.core.ultratb.VerboseTB.format_exception_as_a_whole
                 head = self.prepare_header(remote_err.exc_type, long_version=False).replace(
@@ -388,7 +388,7 @@ def _install_ipython_hook() -> bool:
             etb: Optional[TracebackType],
             tb_offset: Optional[int] = None,
             **kwargs: Any,
-        ) -> List[str]:
+        ) -> list[str]:
             if (remote_err := _retrieve_remote_error_info(evalue)) and isinstance(remote_err, RemoteError):
                 tb_list = [
                     (m.group("filename"), m.group("lineno"), m.group("name"), m.group("line"))
@@ -400,7 +400,7 @@ def _install_ipython_hook() -> bool:
                         "(most recent call last)",
                         "(from remote execution)",
                     )
-                return cast(List[str], out_list)
+                return cast(list[str], out_list)
             return original_structured_traceback(  # type: ignore[no-any-return]
                 self, etype, evalue, etb, tb_offset, **kwargs
             )

snowflake/ml/jobs/_utils/payload_utils.py CHANGED Viewed

@@ -6,19 +6,10 @@ import pickle
 import sys
 import textwrap
 from pathlib import Path, PurePath
-from typing import (
-    Any,
-    Callable,
-    List,
-    Optional,
-    Type,
-    Union,
-    cast,
-    get_args,
-    get_origin,
-)
+from typing import Any, Callable, Optional, Union, cast, get_args, get_origin
 import cloudpickle as cp
+from packaging import version
 from snowflake import snowpark
 from snowflake.ml.jobs._utils import constants, types
@@ -107,11 +98,18 @@ _STARTUP_SCRIPT_CODE = textwrap.dedent(
         head_info=$(python3 get_instance_ip.py "$SNOWFLAKE_SERVICE_NAME" --head)
         if [ $? -eq 0 ]; then
             # Parse the output using read
-            read head_index head_ip <<< "$head_info"
+            read head_index head_ip head_status<<< "$head_info"
             # Use the parsed variables
             echo "Head Instance Index: $head_index"
             echo "Head Instance IP: $head_ip"
+            echo "Head Instance Status: $head_status"
+            # If the head status is not "READY" or "PENDING", exit early
+            if [ "$head_status" != "READY" ] && [ "$head_status" != "PENDING" ]; then
+                echo "Head instance status is not READY or PENDING. Exiting."
+                exit 0
+            fi
         else
             echo "Error: Failed to get head instance information."
@@ -277,7 +275,7 @@ class JobPayload:
         source: Union[str, Path, Callable[..., Any]],
         entrypoint: Optional[Union[str, Path]] = None,
         *,
-        pip_requirements: Optional[List[str]] = None,
+        pip_requirements: Optional[list[str]] = None,
     ) -> None:
         self.source = Path(source) if isinstance(source, str) else source
         self.entrypoint = Path(entrypoint) if isinstance(entrypoint, str) else entrypoint
@@ -288,17 +286,19 @@ class JobPayload:
         stage_path = PurePath(stage_path) if isinstance(stage_path, str) else stage_path
         source = resolve_source(self.source)
         entrypoint = resolve_entrypoint(source, self.entrypoint)
+        pip_requirements = self.pip_requirements or []
         # Create stage if necessary
         stage_name = stage_path.parts[0].lstrip("@")
         # Explicitly check if stage exists first since we may not have CREATE STAGE privilege
         try:
-            session.sql(f"describe stage {stage_name}").collect()
+            session.sql("describe stage identifier(?)", params=[stage_name]).collect()
         except sp_exceptions.SnowparkSQLException:
             session.sql(
-                f"create stage if not exists {stage_name}"
+                "create stage if not exists identifier(?)"
                 " encryption = ( type = 'SNOWFLAKE_SSE' )"
-                " comment = 'Created by snowflake.ml.jobs Python API'"
+                " comment = 'Created by snowflake.ml.jobs Python API'",
+                params=[stage_name],
             ).collect()
         # Upload payload to stage
@@ -311,6 +311,8 @@ class JobPayload:
                 overwrite=True,
             )
             source = Path(entrypoint.file_path.parent)
+            if not any(r.startswith("cloudpickle") for r in pip_requirements):
+                pip_requirements.append(f"cloudpickle~={version.parse(cp.__version__).major}.0")
         elif source.is_dir():
             # Manually traverse the directory and upload each file, since Snowflake PUT
             # can't handle directories. Reduce the number of PUT operations by using
@@ -335,10 +337,10 @@ class JobPayload:
         # Upload requirements
         # TODO: Check if payload includes both a requirements.txt file and pip_requirements
-        if self.pip_requirements:
+        if pip_requirements:
             # Upload requirements.txt to stage
             session.file.put_stream(
-                io.BytesIO("\n".join(self.pip_requirements).encode()),
+                io.BytesIO("\n".join(pip_requirements).encode()),
                 stage_location=stage_path.joinpath("requirements.txt").as_posix(),
                 auto_compress=False,
                 overwrite=True,
@@ -364,7 +366,7 @@ class JobPayload:
                     auto_compress=False,
                 )
-        python_entrypoint: List[Union[str, PurePath]] = [
+        python_entrypoint: list[Union[str, PurePath]] = [
             PurePath("mljob_launcher.py"),
             entrypoint.file_path.relative_to(source),
         ]
@@ -381,7 +383,7 @@ class JobPayload:
         )
-def _get_parameter_type(param: inspect.Parameter) -> Optional[Type[object]]:
+def _get_parameter_type(param: inspect.Parameter) -> Optional[type[object]]:
     # Unwrap Optional type annotations
     param_type = param.annotation
     if get_origin(param_type) is Union and len(get_args(param_type)) == 2 and type(None) in get_args(param_type):
@@ -390,10 +392,10 @@ def _get_parameter_type(param: inspect.Parameter) -> Optional[Type[object]]:
     # Return None for empty type annotations
     if param_type == inspect.Parameter.empty:
         return None
-    return cast(Type[object], param_type)
+    return cast(type[object], param_type)
-def _validate_parameter_type(param_type: Type[object], param_name: str) -> None:
+def _validate_parameter_type(param_type: type[object], param_name: str) -> None:
     # Validate param_type is a supported type
     if param_type not in _SUPPORTED_ARG_TYPES:
         raise ValueError(
@@ -505,13 +507,6 @@ def generate_python_code(func: Callable[..., Any], source_code_display: bool = F
     # https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/_internal/udf_utils.py
     source_code_comment = _generate_source_code_comment(func) if source_code_display else ""
-    func_code = f"""
-{source_code_comment}
-import pickle
-{_ENTRYPOINT_FUNC_NAME} = pickle.loads(bytes.fromhex('{_serialize_callable(func).hex()}'))
-"""
     arg_dict_name = "kwargs"
     if getattr(func, constants.IS_MLJOB_REMOTE_ATTR, None):
         param_code = f"{arg_dict_name} = {{}}"
@@ -519,25 +514,29 @@ import pickle
         param_code = _generate_param_handler_code(signature, arg_dict_name)
     return f"""
-### Version guard to check compatibility across Python versions ###
-import os
 import sys
-import warnings
-if sys.version_info.major != {sys.version_info.major} or sys.version_info.minor != {sys.version_info.minor}:
-    warnings.warn(
-        "Python version mismatch: job was created using"
-        " python{sys.version_info.major}.{sys.version_info.minor}"
-        f" but runtime environment uses python{{sys.version_info.major}}.{{sys.version_info.minor}}."
-        " Compatibility across Python versions is not guaranteed and may result in unexpected behavior."
-        " This will be fixed in a future release; for now, please use Python version"
-        f" {{sys.version_info.major}}.{{sys.version_info.minor}}.",
-        RuntimeWarning,
-        stacklevel=0,
-    )
-### End version guard ###
+import pickle
-{func_code.strip()}
+try:
+    {textwrap.indent(source_code_comment, '    ')}
+    {_ENTRYPOINT_FUNC_NAME} = pickle.loads(bytes.fromhex('{_serialize_callable(func).hex()}'))
+except (TypeError, pickle.PickleError):
+    if sys.version_info.major != {sys.version_info.major} or sys.version_info.minor != {sys.version_info.minor}:
+        raise RuntimeError(
+            "Failed to deserialize function due to Python version mismatch."
+            f" Runtime environment is Python {{sys.version_info.major}}.{{sys.version_info.minor}}"
+            " but function was serialized using Python {sys.version_info.major}.{sys.version_info.minor}."
+        ) from None
+    raise
+except AttributeError as e:
+    if 'cloudpickle' in str(e):
+        import cloudpickle as cp
+        raise RuntimeError(
+            "Failed to deserialize function due to cloudpickle version mismatch."
+            f" Runtime environment uses cloudpickle=={{cp.__version__}}"
+            " but job was serialized using cloudpickle=={cp.__version__}."
+        ) from e
+    raise
 if __name__ == '__main__':
 {textwrap.indent(param_code, '    ')}

snowflake/ml/jobs/_utils/scripts/get_instance_ip.py CHANGED Viewed

@@ -29,7 +29,7 @@ def get_self_ip() -> Optional[str]:
         return None
-def get_first_instance(service_name: str) -> Optional[tuple[str, str]]:
+def get_first_instance(service_name: str) -> Optional[tuple[str, str, str]]:
     """Get the first instance of a batch job based on start time and instance ID.
     Args:
@@ -42,7 +42,7 @@ def get_first_instance(service_name: str) -> Optional[tuple[str, str]]:
     session = session_utils.get_session()
     df = session.sql(f"show service instances in service {service_name}")
-    result = df.select('"instance_id"', '"ip_address"', '"start_time"').collect()
+    result = df.select('"instance_id"', '"ip_address"', '"start_time"', '"status"').collect()
     if not result:
         return None
@@ -57,7 +57,7 @@ def get_first_instance(service_name: str) -> Optional[tuple[str, str]]:
     ip_address = head_instance["ip_address"]
     try:
         socket.inet_aton(ip_address)  # Validate IPv4 address
-        return (head_instance["instance_id"], ip_address)
+        return (head_instance["instance_id"], ip_address, head_instance["status"])
     except OSError:
         logger.error(f"Error: Invalid IP address format: {ip_address}")
         return None
@@ -110,7 +110,7 @@ def main():
             head_info = get_first_instance(args.service_name)
             if head_info:
                 # Print to stdout to allow capture but don't use logger
-                sys.stdout.write(f"{head_info[0]} {head_info[1]}\n")
+                sys.stdout.write(" ".join(head_info) + "\n")
                 sys.exit(0)
             time.sleep(args.retry_interval)
         # If we get here, we've timed out

snowflake/ml/jobs/_utils/scripts/mljob_launcher.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import argparse
+import copy
 import importlib.util
 import json
 import os
@@ -7,7 +8,7 @@ import sys
 import traceback
 import warnings
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Optional
 import cloudpickle
@@ -27,7 +28,7 @@ except ImportError:
     from dataclasses import dataclass
     @dataclass(frozen=True)
-    class ExecutionResult:
+    class ExecutionResult:  # type: ignore[no-redef]
         result: Optional[Any] = None
         exception: Optional[BaseException] = None
@@ -35,7 +36,7 @@ except ImportError:
         def success(self) -> bool:
             return self.exception is None
-        def to_dict(self) -> Dict[str, Any]:
+        def to_dict(self) -> dict[str, Any]:
             """Return the serializable dictionary."""
             if isinstance(self.exception, BaseException):
                 exc_type = type(self.exception)
@@ -58,7 +59,7 @@ class SimpleJSONEncoder(json.JSONEncoder):
         try:
             return super().default(obj)
         except TypeError:
-            return str(obj)
+            return f"Unserializable object: {repr(obj)}"
 def run_script(script_path: str, *script_args: Any, main_func: Optional[str] = None) -> Any:
@@ -136,7 +137,9 @@ def main(script_path: str, *script_args: Any, script_main_func: Optional[str] =
         while tb and tb.tb_frame.f_code.co_filename in skip_files:
             # Skip any frames preceding user script execution
             tb = tb.tb_next
-        result_obj = ExecutionResult(exception=e.with_traceback(tb))
+        cleaned_ex = copy.copy(e)  # Need to create a mutable copy of exception to set __traceback__
+        cleaned_ex = cleaned_ex.with_traceback(tb)
+        result_obj = ExecutionResult(exception=cleaned_ex)
         raise
     finally:
         result_dict = result_obj.to_dict()

snowflake/ml/jobs/_utils/scripts/signal_workers.py CHANGED Viewed

@@ -9,7 +9,7 @@ import logging
 import socket
 import sys
 import time
-from typing import Any, Dict, List, Set
+from typing import Any
 import ray
 from constants import (
@@ -33,34 +33,34 @@ class ShutdownSignal:
         self.acknowledged_workers = set()
         logging.info(f"ShutdownSignal actor created on {self.hostname}")
-    def request_shutdown(self) -> Dict[str, Any]:
+    def request_shutdown(self) -> dict[str, Any]:
         """Signal workers to shut down"""
         self.shutdown_requested = True
         self.timestamp = time.time()
         logging.info(f"Shutdown requested by head node at {self.timestamp}")
         return {"status": "shutdown_requested", "timestamp": self.timestamp, "host": self.hostname}
-    def should_shutdown(self) -> Dict[str, Any]:
+    def should_shutdown(self) -> dict[str, Any]:
         """Check if shutdown has been requested"""
         return {"shutdown": self.shutdown_requested, "timestamp": self.timestamp, "host": self.hostname}
-    def ping(self) -> Dict[str, Any]:
+    def ping(self) -> dict[str, Any]:
         """Simple method to test connectivity"""
         return {"status": "alive", "host": self.hostname}
-    def acknowledge_shutdown(self, worker_id: str) -> Dict[str, Any]:
+    def acknowledge_shutdown(self, worker_id: str) -> dict[str, Any]:
         """Worker acknowledges it has received the shutdown signal and is terminating"""
         self.acknowledged_workers.add(worker_id)
         logging.info(f"Worker {worker_id} acknowledged shutdown. Total acknowledged: {len(self.acknowledged_workers)}")
         return {"status": "acknowledged", "worker_id": worker_id, "acknowledged_count": len(self.acknowledged_workers)}
-    def get_acknowledgment_workers(self) -> Set[str]:
+    def get_acknowledgment_workers(self) -> set[str]:
         """Get the set of workers who have acknowledged shutdown"""
         return self.acknowledged_workers
-def get_worker_node_ids() -> List[str]:
+def get_worker_node_ids() -> list[str]:
     """Get the IDs of all active worker nodes.
     Returns:
@@ -127,7 +127,7 @@ def verify_shutdown(shutdown_signal: ActorHandle) -> None:
     logging.debug(f"Shutdown status check: {check}")
-def wait_for_acknowledgments(shutdown_signal: ActorHandle, worker_node_ids: List[str], wait_time: int) -> None:
+def wait_for_acknowledgments(shutdown_signal: ActorHandle, worker_node_ids: list[str], wait_time: int) -> None:
     """Wait for workers to acknowledge shutdown.
     Args:

snowflake-ml-python 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl

snowflake-ml-python 1.8.2py3-none-any.whl → 1.8.4py3-none-any.whl