PyPI - snowflake-ml-python - Versions diffs - 1.7.5__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

snowflake-ml-python 1.7.5py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

snowflake/cortex/_complete.py CHANGED Viewed

@@ -23,6 +23,15 @@ logger = logging.getLogger(__name__)
 _REST_COMPLETE_URL = "/api/v2/cortex/inference:complete"
+class ResponseFormat(TypedDict):
+    """Represents an object describing response format config for structured-output mode"""
+    type: str
+    """The response format type (e.g. "json")"""
+    schema: Dict[str, Any]
+    """The schema defining the structure of the response. For json it should be a valid json schema object"""
 class ConversationMessage(TypedDict):
     """Represents an conversation interaction."""
@@ -53,6 +62,9 @@ class CompleteOptions(TypedDict):
     """ A boolean value that controls whether Cortex Guard filters unsafe or harmful responses
     from the language model. """
+    response_format: NotRequired[ResponseFormat]
+    """ An object describing response format config for structured-output mode """
 class ResponseParseException(Exception):
     """This exception is raised when the server response cannot be parsed."""
@@ -108,6 +120,32 @@ def _make_common_request_headers() -> Dict[str, str]:
     return headers
+def _validate_response_format_object(options: CompleteOptions) -> None:
+    """Validate the response format object for structured-output mode.
+    More details can be found in:
+    docs.snowflake.com/en/user-guide/snowflake-cortex/complete-structured-outputs#using-complete-structured-outputs
+    Args:
+        options: The complete options object.
+    Raises:
+        ValueError: If the response format object is invalid or missing required fields.
+    """
+    if options is not None and options.get("response_format") is not None:
+        options_obj = options.get("response_format")
+        if not isinstance(options_obj, dict):
+            raise ValueError("'response_format' should be an object")
+        if options_obj.get("type") is None:
+            raise ValueError("'type' cannot be empty for 'response_format' object")
+        if not isinstance(options_obj.get("type"), str):
+            raise ValueError("'type' needs to be a str for 'response_format' object")
+        if options_obj.get("schema") is None:
+            raise ValueError("'schema' cannot be empty for 'response_format' object")
+        if not isinstance(options_obj.get("schema"), dict):
+            raise ValueError("'schema' needs to be a dict for 'response_format' object")
 def _make_request_body(
     model: str,
     prompt: Union[str, List[ConversationMessage]],
@@ -136,12 +174,16 @@ def _make_request_body(
                 "response_when_unsafe": "Response filtered by Cortex Guard",
             }
             data["guardrails"] = guardrails_options
+        if "response_format" in options:
+            data["response_format"] = options["response_format"]
     return data
 # XP endpoint returns a dict response which needs to be converted to a format which can
 # be consumed by the SSEClient. This method does that.
 def _xp_dict_to_response(raw_resp: Dict[str, Any]) -> requests.Response:
     response = requests.Response()
     response.status_code = int(raw_resp["status"])
     response.headers = raw_resp["headers"]
@@ -159,7 +201,6 @@ def _xp_dict_to_response(raw_resp: Dict[str, Any]) -> requests.Response:
         data = json.loads(data)
     except json.JSONDecodeError:
         raise ValueError(f"Request failed (request id: {request_id})")
     if response.status_code < 200 or response.status_code >= 300:
         if "message" not in data:
             raise ValueError(f"Request failed (request id: {request_id})")
@@ -241,11 +282,21 @@ def _return_stream_response(response: requests.Response, deadline: Optional[floa
         if deadline is not None and time.time() > deadline:
             raise TimeoutError()
         try:
-            yield json.loads(event.data)["choices"][0]["delta"]["content"]
+            parsed_resp = json.loads(event.data)
+        except json.JSONDecodeError:
+            raise ResponseParseException("Server response cannot be parsed")
+        try:
+            yield parsed_resp["choices"][0]["delta"]["content"]
         except (json.JSONDecodeError, KeyError, IndexError):
             # For the sake of evolution of the output format,
             # ignore stream messages that don't match the expected format.
-            pass
+            # This is the case of midstream errors which were introduced specifically for structured output.
+            # TODO: discuss during code review
+            if parsed_resp.get("error"):
+                yield json.dumps(parsed_resp)
+            else:
+                pass
 def _complete_call_sql_function_snowpark(
@@ -291,6 +342,8 @@ def _complete_non_streaming_impl(
         raise ValueError("'model' cannot be a snowpark.Column when 'prompt' is a string.")
     if isinstance(options, snowpark.Column):
         raise ValueError("'options' cannot be a snowpark.Column when 'prompt' is a string.")
+    if options and not isinstance(options, snowpark.Column):
+        _validate_response_format_object(options)
     return _complete_non_streaming_immediate(
         snow_api_xp_request_handler=snow_api_xp_request_handler,
         model=model,
@@ -309,6 +362,8 @@ def _complete_rest(
     session: Optional[snowpark.Session] = None,
     deadline: Optional[float] = None,
 ) -> Iterator[str]:
+    if options:
+        _validate_response_format_object(options)
     if snow_api_xp_request_handler is not None:
         response = _call_complete_xp(
             snow_api_xp_request_handler=snow_api_xp_request_handler,

snowflake/ml/_internal/file_utils.py CHANGED Viewed

@@ -23,6 +23,7 @@ from typing import (
     Tuple,
     Union,
 )
+from urllib import parse
 import cloudpickle
@@ -294,7 +295,7 @@ def _retry_on_sql_error(exception: Exception) -> bool:
 def upload_directory_to_stage(
     session: snowpark.Session,
     local_path: pathlib.Path,
-    stage_path: pathlib.PurePosixPath,
+    stage_path: Union[pathlib.PurePosixPath, parse.ParseResult],
     *,
     statement_params: Optional[Dict[str, Any]] = None,
 ) -> None:
@@ -314,9 +315,22 @@ def upload_directory_to_stage(
         root_path = pathlib.Path(root)
         for filename in filenames:
             local_file_path = root_path / filename
-            stage_dir_path = (
-                stage_path / pathlib.PurePosixPath(local_file_path.relative_to(local_path).as_posix()).parent
-            )
+            relative_path = pathlib.PurePosixPath(local_file_path.relative_to(local_path).as_posix())
+            if isinstance(stage_path, parse.ParseResult):
+                relative_stage_path = (pathlib.PosixPath(stage_path.path) / relative_path).parent
+                new_url = parse.ParseResult(
+                    scheme=stage_path.scheme,
+                    netloc=stage_path.netloc,
+                    path=str(relative_stage_path),
+                    params=stage_path.params,
+                    query=stage_path.query,
+                    fragment=stage_path.fragment,
+                )
+                stage_dir_path = parse.urlunparse(new_url)
+            else:
+                stage_dir_path = str((stage_path / relative_path).parent)
             retrying.retry(
                 retry_on_exception=_retry_on_sql_error,
                 stop_max_attempt_number=5,

snowflake/ml/_internal/platform_capabilities.py CHANGED Viewed

@@ -37,6 +37,9 @@ class PlatformCapabilities:
     def is_nested_function_enabled(self) -> bool:
         return self._get_bool_feature("SPCS_MODEL_ENABLE_EMBEDDED_SERVICE_FUNCTIONS", False)
+    def is_live_commit_enabled(self) -> bool:
+        return self._get_bool_feature("ENABLE_BUNDLE_MODULE_CHECKOUT", False)
     @staticmethod
     def _get_features(session: snowpark_session.Session) -> Dict[str, Any]:
         try:

snowflake/ml/_internal/telemetry.py CHANGED Viewed

@@ -353,6 +353,10 @@ def get_function_usage_statement_params(
                 statement_params[TelemetryField.KEY_API_CALLS.value].append({TelemetryField.NAME.value: api_call})
     if custom_tags:
         statement_params[TelemetryField.KEY_CUSTOM_TAGS.value] = custom_tags
+    # Snowpark doesn't support None value in statement_params from version 1.29
+    for k in statement_params:
+        if statement_params[k] is None:
+            statement_params[k] = ""
     return statement_params

snowflake/ml/fileset/fileset.py CHANGED Viewed

@@ -257,7 +257,6 @@ class FileSet:
                     function_name=telemetry.get_statement_params_full_func_name(
                         inspect.currentframe(), cls.__class__.__name__
                     ),
-                    api_calls=[snowpark.DataFrameWriter.copy_into_location],
                 ),
             )
         except snowpark_exceptions.SnowparkSQLException as e:

snowflake/ml/jobs/_utils/constants.py CHANGED Viewed

@@ -18,6 +18,30 @@ DEFAULT_ENTRYPOINT_PATH = "func.py"
 # Percent of container memory to allocate for /dev/shm volume
 MEMORY_VOLUME_SIZE = 0.3
+# Multi Node Headless prototype constants
+# TODO: Replace this placeholder with the actual container runtime image tag.
+MULTINODE_HEADLESS_IMAGE_TAG = "latest"
+# Ray port configuration
+RAY_PORTS = {
+    "HEAD_CLIENT_SERVER_PORT": "10001",
+    "HEAD_GCS_PORT": "12001",
+    "HEAD_DASHBOARD_GRPC_PORT": "12002",
+    "HEAD_DASHBOARD_PORT": "12003",
+    "OBJECT_MANAGER_PORT": "12011",
+    "NODE_MANAGER_PORT": "12012",
+    "RUNTIME_ENV_AGENT_PORT": "12013",
+    "DASHBOARD_AGENT_GRPC_PORT": "12014",
+    "DASHBOARD_AGENT_LISTEN_PORT": "12015",
+    "MIN_WORKER_PORT": "12031",
+    "MAX_WORKER_PORT": "13000",
+}
+# Node health check configuration
+# TODO(SNOW-1937020): Revisit the health check configuration
+ML_RUNTIME_HEALTH_CHECK_PORT = "5001"
+ENABLE_HEALTH_CHECKS = "false"
 # Job status polling constants
 JOB_POLL_INITIAL_DELAY_SECONDS = 0.1
 JOB_POLL_MAX_DELAY_SECONDS = 1

snowflake/ml/jobs/_utils/payload_utils.py CHANGED Viewed

@@ -73,8 +73,17 @@ _STARTUP_SCRIPT_CODE = textwrap.dedent(
     ##### Ray configuration #####
     shm_size=$(df --output=size --block-size=1 /dev/shm | tail -n 1)
+    # Check if the instance ip retrieval module exists, which is a prerequisite for multi node jobs
+    HELPER_EXISTS=$(
+        python3 -c "import snowflake.runtime.utils.get_instance_ip" 2>/dev/null && echo "true" || echo "false"
+    )
     # Configure IP address and logging directory
-    eth0Ip=$(ifconfig eth0 2>/dev/null | sed -En -e 's/.*inet ([0-9.]+).*/\1/p')
+    if [ "$HELPER_EXISTS" = "true" ]; then
+        eth0Ip=$(python3 -m snowflake.runtime.utils.get_instance_ip "$SNOWFLAKE_SERVICE_NAME" --instance-index=-1)
+    else
+        eth0Ip=$(ifconfig eth0 2>/dev/null | sed -En -e 's/.*inet ([0-9.]+).*/\1/p')
+    fi
     log_dir="/tmp/ray"
     # Check if eth0Ip is a valid IP address and fall back to default if necessary
@@ -82,6 +91,38 @@ _STARTUP_SCRIPT_CODE = textwrap.dedent(
         eth0Ip="127.0.0.1"
     fi
+    # Get the environment values of SNOWFLAKE_JOBS_COUNT and SNOWFLAKE_JOB_INDEX for batch jobs
+    # These variables don't exist for non-batch jobs, so set defaults
+    if [ -z "$SNOWFLAKE_JOBS_COUNT" ]; then
+        SNOWFLAKE_JOBS_COUNT=1
+    fi
+    if [ -z "$SNOWFLAKE_JOB_INDEX" ]; then
+        SNOWFLAKE_JOB_INDEX=0
+    fi
+    # Determine if it should be a worker or a head node for batch jobs
+    if [[ "$SNOWFLAKE_JOBS_COUNT" -gt 1 && "$HELPER_EXISTS" = "true" ]]; then
+        head_info=$(python3 -m snowflake.runtime.utils.get_instance_ip "$SNOWFLAKE_SERVICE_NAME" --head)
+        if [ $? -eq 0 ]; then
+            # Parse the output using read
+            read head_index head_ip <<< "$head_info"
+            # Use the parsed variables
+            echo "Head Instance Index: $head_index"
+            echo "Head Instance IP: $head_ip"
+        else
+            echo "Error: Failed to get head instance information."
+            echo "$head_info" # Print the error message
+            exit 1
+        fi
+        if [ "$SNOWFLAKE_JOB_INDEX" -ne "$head_index" ]; then
+            NODE_TYPE="worker"
+        fi
+    fi
     # Common parameters for both head and worker nodes
     common_params=(
         "--node-ip-address=$eth0Ip"
@@ -97,29 +138,62 @@ _STARTUP_SCRIPT_CODE = textwrap.dedent(
         "--disable-usage-stats"
     )
-    # Additional head-specific parameters
-    head_params=(
-        "--head"
-        "--port=${{RAY_HEAD_GCS_PORT:-12001}}"                                  # Port of Ray (GCS server)
-        "--ray-client-server-port=${{RAY_HEAD_CLIENT_SERVER_PORT:-10001}}"      # Listening port for Ray Client Server
-        "--dashboard-host=${{NODE_IP_ADDRESS}}"                                 # Host to bind the dashboard server
-        "--dashboard-grpc-port=${{RAY_HEAD_DASHBOARD_GRPC_PORT:-12002}}"        # Dashboard head to listen for grpc on
-        "--dashboard-port=${{DASHBOARD_PORT}}"                  # Port to bind the dashboard server for local debugging
-        "--resources={{\\"node_tag:head\\":1}}"                   # Resource tag for selecting head as coordinator
-    )
+    if [ "$NODE_TYPE" = "worker" ]; then
+        # Use head_ip as head address if it exists
+        if [ ! -z "$head_ip" ]; then
+            RAY_HEAD_ADDRESS="$head_ip"
+        fi
+        # If RAY_HEAD_ADDRESS is still empty, exit with an error
+        if [ -z "$RAY_HEAD_ADDRESS" ]; then
+            echo "Error: Failed to determine head node address using default instance-index=0"
+            exit 1
+        fi
+        if [ -z "$SERVICE_NAME" ]; then
+            SERVICE_NAME="$SNOWFLAKE_SERVICE_NAME"
+        fi
+        if [ -z "$RAY_HEAD_ADDRESS" ] || [ -z "$SERVICE_NAME" ]; then
+            echo "Error: RAY_HEAD_ADDRESS and SERVICE_NAME must be set."
+            exit 1
+        fi
+        # Additional worker-specific parameters
+        worker_params=(
+            "--address=${{RAY_HEAD_ADDRESS}}:12001"       # Connect to head node
+            "--resources={{\\"${{SERVICE_NAME}}\\":1, \\"node_tag:worker\\":1}}"  # Tag for node identification
+            "--object-store-memory=${{shm_size}}"
+        )
-    # Start Ray on the head node
-    ray start "${{common_params[@]}}" "${{head_params[@]}}" &
-    ##### End Ray configuration #####
+        # Start Ray on a worker node
+        ray start "${{common_params[@]}}" "${{worker_params[@]}}" -v --block
+    else
+        # Additional head-specific parameters
+        head_params=(
+            "--head"
+            "--port=${{RAY_HEAD_GCS_PORT:-12001}}"                                  # Port of Ray (GCS server)
+            "--ray-client-server-port=${{RAY_HEAD_CLIENT_SERVER_PORT:-10001}}"      # Rort for Ray Client Server
+            "--dashboard-host=${{NODE_IP_ADDRESS}}"                                 # Host to bind the dashboard server
+            "--dashboard-grpc-port=${{RAY_HEAD_DASHBOARD_GRPC_PORT:-12002}}"        # Dashboard head to listen for grpc
+            "--dashboard-port=${{DASHBOARD_PORT}}"                  # Port to bind the dashboard server for debugging
+            "--resources={{\\"node_tag:head\\":1}}"                   # Resource tag for selecting head as coordinator
+        )
+        # Start Ray on the head node
+        ray start "${{common_params[@]}}" "${{head_params[@]}}" -v
+        ##### End Ray configuration #####
-    # TODO: Monitor MLRS and handle process crashes
-    python -m web.ml_runtime_grpc_server &
+        # TODO: Monitor MLRS and handle process crashes
+        python -m web.ml_runtime_grpc_server &
-    # TODO: Launch worker service(s) using SQL if Ray and MLRS successfully started
+        # TODO: Launch worker service(s) using SQL if Ray and MLRS successfully started
-    # Run user's Python entrypoint
-    echo Running command: python "$@"
-    python "$@"
+        # Run user's Python entrypoint
+        echo Running command: python "$@"
+        python "$@"
+    fi
     """
 ).strip()

snowflake/ml/jobs/_utils/spec_utils.py CHANGED Viewed

@@ -26,19 +26,22 @@ def _get_node_resources(session: snowpark.Session, compute_pool: str) -> types.C
     )
-def _get_image_spec(session: snowpark.Session, compute_pool: str) -> types.ImageSpec:
+def _get_image_spec(session: snowpark.Session, compute_pool: str, image_tag: Optional[str] = None) -> types.ImageSpec:
     # Retrieve compute pool node resources
     resources = _get_node_resources(session, compute_pool=compute_pool)
     # Use MLRuntime image
     image_repo = constants.DEFAULT_IMAGE_REPO
     image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
-    image_tag = constants.DEFAULT_IMAGE_TAG
     # Try to pull latest image tag from server side if possible
-    query_result = session.sql("SHOW PARAMETERS LIKE 'constants.RUNTIME_BASE_IMAGE_TAG' IN ACCOUNT").collect()
-    if query_result:
-        image_tag = query_result[0]["value"]
+    if not image_tag:
+        query_result = session.sql("SHOW PARAMETERS LIKE 'constants.RUNTIME_BASE_IMAGE_TAG' IN ACCOUNT").collect()
+        if query_result:
+            image_tag = query_result[0]["value"]
+    if image_tag is None:
+        image_tag = constants.DEFAULT_IMAGE_TAG
     # TODO: Should each instance consume the entire pod?
     return types.ImageSpec(
@@ -93,6 +96,7 @@ def generate_service_spec(
     compute_pool: str,
     payload: types.UploadedPayload,
     args: Optional[List[str]] = None,
+    num_instances: Optional[int] = None,
 ) -> Dict[str, Any]:
     """
     Generate a service specification for a job.
@@ -102,12 +106,21 @@ def generate_service_spec(
         compute_pool: Compute pool for job execution
         payload: Uploaded job payload
         args: Arguments to pass to entrypoint script
+        num_instances: Number of instances for multi-node job
     Returns:
         Job service specification
     """
+    is_multi_node = num_instances is not None and num_instances > 1
     # Set resource requests/limits, including nvidia.com/gpu quantity if applicable
-    image_spec = _get_image_spec(session, compute_pool)
+    if is_multi_node:
+        # If the job is of multi-node, we will need a different image which contains
+        # module snowflake.runtime.utils.get_instance_ip
+        # TODO(SNOW-1961849): Remove the hard-coded image name
+        image_spec = _get_image_spec(session, compute_pool, constants.MULTINODE_HEADLESS_IMAGE_TAG)
+    else:
+        image_spec = _get_image_spec(session, compute_pool)
     resource_requests: Dict[str, Union[str, int]] = {
         "cpu": f"{int(image_spec.resource_requests.cpu * 1000)}m",
         "memory": f"{image_spec.resource_limits.memory}Gi",
@@ -176,31 +189,53 @@ def generate_service_spec(
     # TODO: Add hooks for endpoints for integration with TensorBoard etc
-    # Assemble into service specification dict
-    spec = {
-        "spec": {
-            "containers": [
-                {
-                    "name": constants.DEFAULT_CONTAINER_NAME,
-                    "image": image_spec.full_name,
-                    "command": ["/usr/local/bin/_entrypoint.sh"],
-                    "args": [
-                        stage_mount.joinpath(v).as_posix() if isinstance(v, PurePath) else v for v in payload.entrypoint
-                    ]
-                    + (args or []),
-                    "env": {
-                        constants.PAYLOAD_DIR_ENV_VAR: stage_mount.as_posix(),
-                    },
-                    "volumeMounts": volume_mounts,
-                    "resources": {
-                        "requests": resource_requests,
-                        "limits": resource_limits,
-                    },
+    env_vars = {constants.PAYLOAD_DIR_ENV_VAR: stage_mount.as_posix()}
+    endpoints = []
+    if is_multi_node:
+        # Update environment variables for multi-node job
+        env_vars.update(constants.RAY_PORTS)
+        env_vars["ENABLE_HEALTH_CHECKS"] = constants.ENABLE_HEALTH_CHECKS
+        # Define Ray endpoints for intra-service instance communication
+        ray_endpoints = [
+            {"name": "ray-client-server-endpoint", "port": 10001, "protocol": "TCP"},
+            {"name": "ray-gcs-endpoint", "port": 12001, "protocol": "TCP"},
+            {"name": "ray-dashboard-grpc-endpoint", "port": 12002, "protocol": "TCP"},
+            {"name": "ray-object-manager-endpoint", "port": 12011, "protocol": "TCP"},
+            {"name": "ray-node-manager-endpoint", "port": 12012, "protocol": "TCP"},
+            {"name": "ray-runtime-agent-endpoint", "port": 12013, "protocol": "TCP"},
+            {"name": "ray-dashboard-agent-grpc-endpoint", "port": 12014, "protocol": "TCP"},
+            {"name": "ephemeral-port-range", "portRange": "32768-60999", "protocol": "TCP"},
+            {"name": "ray-worker-port-range", "portRange": "12031-13000", "protocol": "TCP"},
+        ]
+        endpoints.extend(ray_endpoints)
+    spec_dict = {
+        "containers": [
+            {
+                "name": constants.DEFAULT_CONTAINER_NAME,
+                "image": image_spec.full_name,
+                "command": ["/usr/local/bin/_entrypoint.sh"],
+                "args": [
+                    (stage_mount.joinpath(v).as_posix() if isinstance(v, PurePath) else v) for v in payload.entrypoint
+                ]
+                + (args or []),
+                "env": env_vars,
+                "volumeMounts": volume_mounts,
+                "resources": {
+                    "requests": resource_requests,
+                    "limits": resource_limits,
                 },
-            ],
-            "volumes": volumes,
-        }
+            },
+        ],
+        "volumes": volumes,
     }
+    if endpoints:
+        spec_dict["endpoints"] = endpoints
+    # Assemble into service specification dict
+    spec = {"spec": spec_dict}
     return spec
@@ -248,7 +283,10 @@ def merge_patch(base: Any, patch: Any, display_name: str = "") -> Any:
 def _merge_lists_of_dicts(
-    base: List[Dict[str, Any]], patch: List[Dict[str, Any]], merge_key: str = "name", display_name: str = ""
+    base: List[Dict[str, Any]],
+    patch: List[Dict[str, Any]],
+    merge_key: str = "name",
+    display_name: str = "",
 ) -> List[Dict[str, Any]]:
     """
     Attempts to merge lists of dicts by matching on a merge key (default "name").
@@ -288,7 +326,11 @@ def _merge_lists_of_dicts(
         # Apply patch
         if key in result:
-            d = merge_patch(result[key], d, display_name=f"{display_name}[{merge_key}={d[merge_key]}]")
+            d = merge_patch(
+                result[key],
+                d,
+                display_name=f"{display_name}[{merge_key}={d[merge_key]}]",
+            )
             # TODO: Should we drop the item if the patch result is empty save for the merge key?
             #       Can check `d.keys() <= {merge_key}`
         result[key] = d

snowflake/ml/jobs/decorators.py CHANGED Viewed

@@ -25,6 +25,7 @@ def remote(
     query_warehouse: Optional[str] = None,
     env_vars: Optional[Dict[str, str]] = None,
     session: Optional[snowpark.Session] = None,
+    num_instances: Optional[int] = None,
 ) -> Callable[[Callable[_Args, _ReturnValue]], Callable[_Args, jb.MLJob]]:
     """
     Submit a job to the compute pool.
@@ -37,6 +38,7 @@ def remote(
         query_warehouse: The query warehouse to use. Defaults to session warehouse.
         env_vars: Environment variables to set in container
         session: The Snowpark session to use. If none specified, uses active session.
+        num_instances: The number of nodes in the job. If none specified, create a single node job.
     Returns:
         Decorator that dispatches invocations of the decorated function as remote jobs.
@@ -62,6 +64,7 @@ def remote(
                 query_warehouse=query_warehouse,
                 env_vars=env_vars,
                 session=session,
+                num_instances=num_instances,
             )
             assert isinstance(job, jb.MLJob), f"Unexpected job type: {type(job)}"
             return job

snowflake/ml/jobs/manager.py CHANGED Viewed

@@ -213,6 +213,7 @@ def _submit_job(
     query_warehouse: Optional[str] = None,
     spec_overrides: Optional[Dict[str, Any]] = None,
     session: Optional[snowpark.Session] = None,
+    num_instances: Optional[int] = None,
 ) -> jb.MLJob:
     """
     Submit a job to the compute pool.
@@ -229,6 +230,7 @@ def _submit_job(
         query_warehouse: The query warehouse to use. Defaults to session warehouse.
         spec_overrides: Custom service specification overrides to apply.
         session: The Snowpark session to use. If none specified, uses active session.
+        num_instances: The number of instances to use for the job. If none specified, single node job is created.
     Returns:
         An object representing the submitted job.
@@ -254,6 +256,7 @@ def _submit_job(
         compute_pool=compute_pool,
         payload=uploaded_payload,
         args=args,
+        num_instances=num_instances,
     )
     spec_overrides = spec_utils.generate_spec_overrides(
         environment_vars=env_vars,
@@ -281,6 +284,8 @@ def _submit_job(
     query_warehouse = query_warehouse or session.get_current_warehouse()
     if query_warehouse:
         query.append(f"QUERY_WAREHOUSE = {query_warehouse}")
+    if num_instances:
+        query.append(f"REPLICAS = {num_instances}")
     # Submit job
     query_text = "\n".join(line for line in query if line)

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -746,7 +746,7 @@ class ModelVersion(lineage_node.LineageNode):
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
         memory_requests: Optional[str] = None,
-        gpu_requests: Optional[str] = None,
+        gpu_requests: Optional[Union[str, int]] = None,
         num_workers: Optional[int] = None,
         max_batch_rows: Optional[int] = None,
         force_rebuild: bool = False,

snowflake-ml-python 1.7.5__py3-none-any.whl → 1.8.0__py3-none-any.whl

snowflake-ml-python 1.7.5py3-none-any.whl → 1.8.0py3-none-any.whl