PyPI - snowflake-ml-python - Versions diffs - 1.8.0__py3-none-any.whl → 1.8.2__py3-none-any.whl - Mend

snowflake-ml-python 1.8.0py3-none-any.whl → 1.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

snowflake/cortex/_complete.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import json
 import logging
 import time
+import typing
 from io import BytesIO
 from typing import Any, Callable, Dict, Iterator, List, Optional, TypedDict, Union, cast
 from urllib.parse import urlunparse
 import requests
+from snowflake.core.rest import RESTResponse
 from typing_extensions import NotRequired, deprecated
 from snowflake import snowpark
@@ -72,6 +74,27 @@ class ResponseParseException(Exception):
     pass
+class MidStreamException(Exception):
+    """The SSE (Server-sent Event) stream can contain error messages in the middle of the stream,
+    using the “error” event type. This exception is raised when there is such a mid-stream error."""
+    def __init__(
+        self,
+        reason: typing.Optional[str] = None,
+        http_resp: typing.Optional["RESTResponse"] = None,
+        request_id: typing.Optional[str] = None,
+    ) -> None:
+        message = ""
+        if reason is not None:
+            message = reason
+        if http_resp:
+            message = f"Error in stream (HTTP Response: {http_resp.status}) - {http_resp.reason}"
+        if request_id != "":
+            # add request_id to error message
+            message += f" (Request ID: {request_id})"
+        super().__init__(message)
 class GuardrailsOptions(TypedDict):
     enabled: bool
     """A boolean value that controls whether Cortex Guard filters unsafe or harmful responses
@@ -120,6 +143,18 @@ def _make_common_request_headers() -> Dict[str, str]:
     return headers
+def _get_request_id(resp: Dict[str, Any]) -> Optional[Any]:
+    request_id = None
+    if "headers" in resp:
+        for key, value in resp["headers"].items():
+            # Note: There is some whitespace in the headers making it not possible
+            # to directly index the header reliably.
+            if key.strip().lower() == "x-snowflake-request-id":
+                request_id = value
+                break
+    return request_id
 def _validate_response_format_object(options: CompleteOptions) -> None:
     """Validate the response format object for structured-output mode.
@@ -188,13 +223,7 @@ def _xp_dict_to_response(raw_resp: Dict[str, Any]) -> requests.Response:
     response.status_code = int(raw_resp["status"])
     response.headers = raw_resp["headers"]
-    request_id = None
-    for key, value in raw_resp["headers"].items():
-        # Note: there is some whitespace in the headers making it not possible
-        # to directly index the header reliably.
-        if key.strip().lower() == "x-snowflake-request-id":
-            request_id = value
-            break
+    request_id = _get_request_id(raw_resp)
     data = raw_resp["content"]
     try:
@@ -276,7 +305,12 @@ def _call_complete_rest(
     )
-def _return_stream_response(response: requests.Response, deadline: Optional[float]) -> Iterator[str]:
+def _return_stream_response(
+    response: requests.Response,
+    deadline: Optional[float],
+    session: Optional[snowpark.Session] = None,
+) -> Iterator[str]:
+    request_id = _get_request_id(dict(response.headers))
     client = SSEClient(response)
     for event in client.events():
         if deadline is not None and time.time() > deadline:
@@ -294,7 +328,7 @@ def _return_stream_response(response: requests.Response, deadline: Optional[floa
             # This is the case of midstream errors which were introduced specifically for structured output.
             # TODO: discuss during code review
             if parsed_resp.get("error"):
-                yield json.dumps(parsed_resp)
+                raise MidStreamException(reason=response.text, request_id=request_id)
             else:
                 pass
@@ -375,7 +409,7 @@ def _complete_rest(
     else:
         response = _call_complete_rest(model=model, prompt=prompt, options=options, session=session, deadline=deadline)
     assert response.status_code >= 200 and response.status_code < 300
-    return _return_stream_response(response, deadline)
+    return _return_stream_response(response, deadline, session)
 def _complete_impl(

snowflake/ml/_internal/platform_capabilities.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+from contextlib import contextmanager
 from typing import Any, Dict, Optional
 from absl import logging
@@ -27,16 +28,45 @@ class PlatformCapabilities:
     """
     _instance: Optional["PlatformCapabilities"] = None
+    # Used for unittesting only. This is to avoid the need to mock the session object or reaching out to Snowflake
+    _mock_features: Optional[Dict[str, Any]] = None
     @classmethod
     def get_instance(cls, session: Optional[snowpark_session.Session] = None) -> "PlatformCapabilities":
+        # Used for unittesting only. In this situation, _instance is not initialized.
+        if cls._mock_features is not None:
+            return cls(features=cls._mock_features)
         if not cls._instance:
-            cls._instance = cls(session)
+            cls._instance = cls(session=session)
         return cls._instance
+    @classmethod
+    def set_mock_features(cls, features: Optional[Dict[str, Any]] = None) -> None:
+        cls._mock_features = features
+    @classmethod
+    def clear_mock_features(cls) -> None:
+        cls._mock_features = None
+    # For contextmanager, we need to have return type Iterator[Never]. However, Never type is introduced only in
+    # Python 3.11. So, we are ignoring the type for this method.
+    @classmethod  # type: ignore[arg-type]
+    @contextmanager
+    def mock_features(cls, features: Dict[str, Any]) -> None:  # type: ignore[misc]
+        logging.debug(f"Setting mock features: {features}")
+        cls.set_mock_features(features)
+        try:
+            yield
+        finally:
+            logging.debug(f"Clearing mock features: {features}")
+            cls.clear_mock_features()
     def is_nested_function_enabled(self) -> bool:
         return self._get_bool_feature("SPCS_MODEL_ENABLE_EMBEDDED_SERVICE_FUNCTIONS", False)
+    def is_inlined_deployment_spec_enabled(self) -> bool:
+        return self._get_bool_feature("ENABLE_INLINE_DEPLOYMENT_SPEC", False)
     def is_live_commit_enabled(self) -> bool:
         return self._get_bool_feature("ENABLE_BUNDLE_MODULE_CHECKOUT", False)
@@ -68,11 +98,17 @@ class PlatformCapabilities:
             # This can happen is server side is older than 9.2. That is fine.
         return {}
-    def __init__(self, session: Optional[snowpark_session.Session] = None) -> None:
+    def __init__(
+        self, *, session: Optional[snowpark_session.Session] = None, features: Optional[Dict[str, Any]] = None
+    ) -> None:
+        # This is for testing purposes only.
+        if features:
+            self.features = features
+            return
         if not session:
             session = next(iter(snowpark_session._get_active_sessions()))
             assert session, "Missing active session object"
-        self.features: Dict[str, Any] = PlatformCapabilities._get_features(session)
+        self.features = PlatformCapabilities._get_features(session)
     def _get_bool_feature(self, feature_name: str, default_value: bool) -> bool:
         value = self.features.get(feature_name, default_value)

snowflake/ml/data/data_connector.py CHANGED Viewed

@@ -27,6 +27,7 @@ from snowflake.snowpark import context as sf_context
 if TYPE_CHECKING:
     import pandas as pd
+    import ray
     import tensorflow as tf
     from torch.utils import data as torch_data
@@ -241,6 +242,30 @@ class DataConnector:
         """
         return self._ingestor.to_pandas(limit)
+    @telemetry.send_api_usage_telemetry(
+        project=_PROJECT,
+        subproject_extractor=lambda self: type(self).__name__,
+        func_params_to_log=["limit"],
+    )
+    def to_ray_dataset(self) -> "ray.data.Dataset":
+        """Retrieve the Snowflake data as a Ray Dataset.
+        Returns:
+            A Ray Dataset.
+        Raises:
+            ImportError: If Ray is not installed in the local environment.
+        """
+        if hasattr(self._ingestor, "to_ray_dataset"):
+            return self._ingestor.to_ray_dataset()
+        try:
+            import ray
+            return ray.data.from_pandas(self._ingestor.to_pandas())
+        except ImportError as e:
+            raise ImportError("Ray is not installed, please install ray in your local environment.") from e
 # Switch to use Runtime's Data Ingester if running in ML runtime
 # Fail silently if the data ingester is not found

snowflake/ml/dataset/dataset_reader.py CHANGED Viewed

@@ -5,6 +5,7 @@ from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.lineage import lineage_utils
 from snowflake.ml.data import data_connector, data_ingestor, data_source, ingestor_utils
 from snowflake.ml.fileset import snowfs
+from snowflake.snowpark._internal import utils as snowpark_utils
 _PROJECT = "Dataset"
 _SUBPROJECT = "DatasetReader"
@@ -94,7 +95,10 @@ class DatasetReader(data_connector.DataConnector):
         dfs: List[snowpark.DataFrame] = []
         for source in self.data_sources:
             assert isinstance(source, data_source.DatasetInfo) and source.url is not None
-            df = self._session.read.option("pattern", file_path_pattern).parquet(source.url)
+            stage_reader = self._session.read.option("pattern", file_path_pattern)
+            if "INFER_SCHEMA_OPTIONS" in snowpark_utils.NON_FORMAT_TYPE_OPTIONS:
+                stage_reader = stage_reader.option("INFER_SCHEMA_OPTIONS", {"MAX_FILE_COUNT": 1})
+            df = stage_reader.parquet(source.url)
             if only_feature_cols and source.exclude_cols:
                 df = df.drop(source.exclude_cols)
             dfs.append(df)

snowflake/ml/jobs/_utils/constants.py CHANGED Viewed

@@ -4,6 +4,7 @@ from snowflake.ml.jobs._utils.types import ComputeResources
 # SPCS specification constants
 DEFAULT_CONTAINER_NAME = "main"
 PAYLOAD_DIR_ENV_VAR = "MLRS_PAYLOAD_DIR"
+RESULT_PATH_ENV_VAR = "MLRS_RESULT_PATH"
 MEMORY_VOLUME_NAME = "dshm"
 STAGE_VOLUME_NAME = "stage-volume"
 STAGE_VOLUME_MOUNT_PATH = "/mnt/app"
@@ -12,16 +13,12 @@ STAGE_VOLUME_MOUNT_PATH = "/mnt/app"
 DEFAULT_IMAGE_REPO = "/snowflake/images/snowflake_images"
 DEFAULT_IMAGE_CPU = "st_plat/runtime/x86/runtime_image/snowbooks"
 DEFAULT_IMAGE_GPU = "st_plat/runtime/x86/generic_gpu/runtime_image/snowbooks"
-DEFAULT_IMAGE_TAG = "0.9.2"
+DEFAULT_IMAGE_TAG = "1.0.1"
 DEFAULT_ENTRYPOINT_PATH = "func.py"
 # Percent of container memory to allocate for /dev/shm volume
 MEMORY_VOLUME_SIZE = 0.3
-# Multi Node Headless prototype constants
-# TODO: Replace this placeholder with the actual container runtime image tag.
-MULTINODE_HEADLESS_IMAGE_TAG = "latest"
 # Ray port configuration
 RAY_PORTS = {
     "HEAD_CLIENT_SERVER_PORT": "10001",
@@ -48,6 +45,7 @@ JOB_POLL_MAX_DELAY_SECONDS = 1
 # Magic attributes
 IS_MLJOB_REMOTE_ATTR = "_is_mljob_remote_callable"
+RESULT_PATH_DEFAULT_VALUE = "mljob_result.pkl"
 # Compute pool resource information
 # TODO: Query Snowflake for resource information instead of relying on this hardcoded

snowflake-ml-python 1.8.0__py3-none-any.whl → 1.8.2__py3-none-any.whl

snowflake-ml-python 1.8.0py3-none-any.whl → 1.8.2py3-none-any.whl