PyPI - snowflake-ml-python - Versions diffs - 1.6.4__py3-none-any.whl → 1.7.1__py3-none-any.whl - Mend

snowflake-ml-python 1.6.4py3-none-any.whl → 1.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (176) hide show

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -614,6 +614,102 @@ class ModelVersion(lineage_node.LineageNode):
             version_name=sql_identifier.SqlIdentifier(version),
         )
+    @overload
+    def create_service(
+        self,
+        *,
+        service_name: str,
+        image_build_compute_pool: Optional[str] = None,
+        service_compute_pool: str,
+        image_repo: str,
+        ingress_enabled: bool = False,
+        max_instances: int = 1,
+        cpu_requests: Optional[str] = None,
+        memory_requests: Optional[str] = None,
+        gpu_requests: Optional[str] = None,
+        num_workers: Optional[int] = None,
+        max_batch_rows: Optional[int] = None,
+        force_rebuild: bool = False,
+        build_external_access_integration: Optional[str] = None,
+    ) -> str:
+        """Create an inference service with the given spec.
+        Args:
+            service_name: The name of the service, can be fully qualified. If not fully qualified, the database or
+                schema of the model will be used.
+            image_build_compute_pool: The name of the compute pool used to build the model inference image. It uses
+                the service compute pool if None.
+            service_compute_pool: The name of the compute pool used to run the inference service.
+            image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
+                or schema of the model will be used.
+            ingress_enabled: If true, creates an service endpoint associated with the service. User must have
+                BIND SERVICE ENDPOINT privilege on the account.
+            max_instances: The maximum number of inference service instances to run. The same value it set to
+                MIN_INSTANCES property of the service.
+            cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
+                None, we attempt to utilize all the vCPU of the node.
+            memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
+                requires a unit (GiB, MiB). If None, we attempt to utilize all the memory of the node.
+            gpu_requests: The gpu limit for GPU based inference. Can be integer, fractional or string values. Use CPU
+                if None.
+            num_workers: The number of workers to run the inference service for handling requests in parallel within an
+                instance of the service. By default, it is set to 2*vCPU+1 of the node for CPU based inference and 1 for
+                GPU based inference. For GPU based inference, please see best practices before playing with this value.
+            max_batch_rows: The maximum number of rows to batch for inference. Auto determined if None. Minimum 32.
+            force_rebuild: Whether to force a model inference image rebuild.
+            build_external_access_integration: (Deprecated) The external access integration for image build. This is
+                usually permitting access to conda & PyPI repositories.
+        """
+        ...
+    @overload
+    def create_service(
+        self,
+        *,
+        service_name: str,
+        image_build_compute_pool: Optional[str] = None,
+        service_compute_pool: str,
+        image_repo: str,
+        ingress_enabled: bool = False,
+        max_instances: int = 1,
+        cpu_requests: Optional[str] = None,
+        memory_requests: Optional[str] = None,
+        gpu_requests: Optional[str] = None,
+        num_workers: Optional[int] = None,
+        max_batch_rows: Optional[int] = None,
+        force_rebuild: bool = False,
+        build_external_access_integrations: Optional[List[str]] = None,
+    ) -> str:
+        """Create an inference service with the given spec.
+        Args:
+            service_name: The name of the service, can be fully qualified. If not fully qualified, the database or
+                schema of the model will be used.
+            image_build_compute_pool: The name of the compute pool used to build the model inference image. It uses
+                the service compute pool if None.
+            service_compute_pool: The name of the compute pool used to run the inference service.
+            image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
+                or schema of the model will be used.
+            ingress_enabled: If true, creates an service endpoint associated with the service. User must have
+                BIND SERVICE ENDPOINT privilege on the account.
+            max_instances: The maximum number of inference service instances to run. The same value it set to
+                MIN_INSTANCES property of the service.
+            cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
+                None, we attempt to utilize all the vCPU of the node.
+            memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
+                requires a unit (GiB, MiB). If None, we attempt to utilize all the memory of the node.
+            gpu_requests: The gpu limit for GPU based inference. Can be integer, fractional or string values. Use CPU
+                if None.
+            num_workers: The number of workers to run the inference service for handling requests in parallel within an
+                instance of the service. By default, it is set to 2*vCPU+1 of the node for CPU based inference and 1 for
+                GPU based inference. For GPU based inference, please see best practices before playing with this value.
+            max_batch_rows: The maximum number of rows to batch for inference. Auto determined if None. Minimum 32.
+            force_rebuild: Whether to force a model inference image rebuild.
+            build_external_access_integrations: The external access integrations for image build. This is usually
+                permitting access to conda & PyPI repositories.
+        """
+        ...
     @telemetry.send_api_usage_telemetry(
         project=_TELEMETRY_PROJECT,
         subproject=_TELEMETRY_SUBPROJECT,
@@ -638,11 +734,14 @@ class ModelVersion(lineage_node.LineageNode):
         image_repo: str,
         ingress_enabled: bool = False,
         max_instances: int = 1,
+        cpu_requests: Optional[str] = None,
+        memory_requests: Optional[str] = None,
         gpu_requests: Optional[str] = None,
         num_workers: Optional[int] = None,
         max_batch_rows: Optional[int] = None,
         force_rebuild: bool = False,
-        build_external_access_integration: str,
+        build_external_access_integration: Optional[str] = None,
+        build_external_access_integrations: Optional[List[str]] = None,
     ) -> str:
         """Create an inference service with the given spec.
@@ -658,6 +757,10 @@ class ModelVersion(lineage_node.LineageNode):
                 BIND SERVICE ENDPOINT privilege on the account.
             max_instances: The maximum number of inference service instances to run. The same value it set to
                 MIN_INSTANCES property of the service.
+            cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
+                None, we attempt to utilize all the vCPU of the node.
+            memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
+                requires a unit (GiB, MiB). If None, we attempt to utilize all the memory of the node.
             gpu_requests: The gpu limit for GPU based inference. Can be integer, fractional or string values. Use CPU
                 if None.
             num_workers: The number of workers to run the inference service for handling requests in parallel within an
@@ -665,9 +768,14 @@ class ModelVersion(lineage_node.LineageNode):
                 GPU based inference. For GPU based inference, please see best practices before playing with this value.
             max_batch_rows: The maximum number of rows to batch for inference. Auto determined if None. Minimum 32.
             force_rebuild: Whether to force a model inference image rebuild.
-            build_external_access_integration: The external access integration for image build. This is usually
+            build_external_access_integration: (Deprecated) The external access integration for image build. This is
+                usually permitting access to conda & PyPI repositories.
+            build_external_access_integrations: The external access integrations for image build. This is usually
                 permitting access to conda & PyPI repositories.
+        Raises:
+            ValueError: Illegal external access integration arguments.
         Returns:
             Result information about service creation from server.
         """
@@ -675,6 +783,20 @@ class ModelVersion(lineage_node.LineageNode):
             project=_TELEMETRY_PROJECT,
             subproject=_TELEMETRY_SUBPROJECT,
         )
+        if build_external_access_integration is not None:
+            msg = (
+                "`build_external_access_integration` is deprecated. "
+                "Please use `build_external_access_integrations` instead."
+            )
+            warnings.warn(msg, DeprecationWarning, stacklevel=2)
+            if build_external_access_integrations is not None:
+                msg = (
+                    "`build_external_access_integration` and `build_external_access_integrations` cannot be set at the"
+                    "same time. Please use `build_external_access_integrations` only."
+                )
+                raise ValueError(msg)
+            build_external_access_integrations = [build_external_access_integration]
         service_db_id, service_schema_id, service_id = sql_identifier.parse_fully_qualified_name(service_name)
         image_repo_db_id, image_repo_schema_id, image_repo_id = sql_identifier.parse_fully_qualified_name(image_repo)
         return self._service_ops.create_service(
@@ -696,11 +818,17 @@ class ModelVersion(lineage_node.LineageNode):
             image_repo_name=image_repo_id,
             ingress_enabled=ingress_enabled,
             max_instances=max_instances,
+            cpu_requests=cpu_requests,
+            memory_requests=memory_requests,
             gpu_requests=gpu_requests,
             num_workers=num_workers,
             max_batch_rows=max_batch_rows,
             force_rebuild=force_rebuild,
-            build_external_access_integration=sql_identifier.SqlIdentifier(build_external_access_integration),
+            build_external_access_integrations=(
+                None
+                if build_external_access_integrations is None
+                else [sql_identifier.SqlIdentifier(eai) for eai in build_external_access_integrations]
+            ),
             statement_params=statement_params,
         )
@@ -710,7 +838,7 @@ class ModelVersion(lineage_node.LineageNode):
     )
     def list_services(
         self,
-    ) -> List[str]:
+    ) -> pd.DataFrame:
         """List all the service names using this model version.
         Returns:
@@ -722,12 +850,14 @@ class ModelVersion(lineage_node.LineageNode):
             subproject=_TELEMETRY_SUBPROJECT,
         )
-        return self._model_ops.list_inference_services(
-            database_name=None,
-            schema_name=None,
-            model_name=self._model_name,
-            version_name=self._version_name,
-            statement_params=statement_params,
+        return pd.DataFrame(
+            self._model_ops.show_services(
+                database_name=None,
+                schema_name=None,
+                model_name=self._model_name,
+                version_name=self._version_name,
+                statement_params=statement_params,
+            )
         )
     @telemetry.send_api_usage_telemetry(
@@ -755,12 +885,16 @@ class ModelVersion(lineage_node.LineageNode):
             project=_TELEMETRY_PROJECT,
             subproject=_TELEMETRY_SUBPROJECT,
         )
+        database_name_id, schema_name_id, service_name_id = sql_identifier.parse_fully_qualified_name(service_name)
         self._model_ops.delete_service(
             database_name=None,
             schema_name=None,
             model_name=self._model_name,
             version_name=self._version_name,
-            service_name=service_name,
+            service_database_name=database_name_id,
+            service_schema_name=schema_name_id,
+            service_name=service_name_id,
             statement_params=statement_params,
         )

snowflake/ml/model/_client/ops/model_ops.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import pathlib
 import tempfile
 import warnings
-from typing import Any, Dict, List, Literal, Optional, Union, cast, overload
+from typing import Any, Dict, List, Literal, Optional, TypedDict, Union, cast, overload
 import yaml
@@ -31,7 +31,15 @@ from snowflake.snowpark import dataframe, row, session
 from snowflake.snowpark._internal import utils as snowpark_utils
+class ServiceInfo(TypedDict):
+    name: str
+    inference_endpoint: Optional[str]
 class ModelOperator:
+    INFERENCE_SERVICE_ENDPOINT_NAME = "inference"
+    INGRESS_ENDPOINT_URL_SUFFIX = "snowflakecomputing.app"
     def __init__(
         self,
         session: session.Session,
@@ -514,7 +522,7 @@ class ModelOperator:
             statement_params=statement_params,
         )
-    def list_inference_services(
+    def show_services(
         self,
         *,
         database_name: Optional[sql_identifier.SqlIdentifier],
@@ -522,7 +530,7 @@ class ModelOperator:
         model_name: sql_identifier.SqlIdentifier,
         version_name: sql_identifier.SqlIdentifier,
         statement_params: Optional[Dict[str, Any]] = None,
-    ) -> List[str]:
+    ) -> List[ServiceInfo]:
         res = self._model_client.show_versions(
             database_name=database_name,
             schema_name=schema_name,
@@ -530,8 +538,8 @@ class ModelOperator:
             version_name=version_name,
             statement_params=statement_params,
         )
-        col_name = self._model_client.MODEL_VERSION_INFERENCE_SERVICES_COL_NAME
-        if col_name not in res[0]:
+        service_col_name = self._model_client.MODEL_VERSION_INFERENCE_SERVICES_COL_NAME
+        if service_col_name not in res[0]:
             # User need to opt into BCR 2024_08
             raise exceptions.SnowflakeMLException(
                 error_code=error_codes.OPT_IN_REQUIRED,
@@ -540,9 +548,31 @@ class ModelOperator:
                     "https://docs.snowflake.com/en/release-notes/bcr-bundles/2024_08_bundle)."
                 ),
             )
-        json_array = json.loads(res[0][col_name])
+        json_array = json.loads(res[0][service_col_name])
         # TODO(sdas): Figure out a better way to filter out MODEL_BUILD_ services server side.
-        return [str(service) for service in json_array if "MODEL_BUILD_" not in service]
+        fully_qualified_service_names = [str(service) for service in json_array if "MODEL_BUILD_" not in service]
+        result = []
+        ingress_url: Optional[str] = None
+        for fully_qualified_service_name in fully_qualified_service_names:
+            db, schema, service_name = sql_identifier.parse_fully_qualified_name(fully_qualified_service_name)
+            for res_row in self._service_client.show_endpoints(
+                database_name=db, schema_name=schema, service_name=service_name, statement_params=statement_params
+            ):
+                if (
+                    res_row[self._service_client.MODEL_INFERENCE_SERVICE_ENDPOINT_NAME_COL_NAME]
+                    == self.INFERENCE_SERVICE_ENDPOINT_NAME
+                    and res_row[self._service_client.MODEL_INFERENCE_SERVICE_ENDPOINT_INGRESS_URL_COL_NAME] is not None
+                ):
+                    ingress_url = str(
+                        res_row[self._service_client.MODEL_INFERENCE_SERVICE_ENDPOINT_INGRESS_URL_COL_NAME]
+                    )
+                    if not ingress_url.endswith(ModelOperator.INGRESS_ENDPOINT_URL_SUFFIX):
+                        ingress_url = None
+            result.append(ServiceInfo(name=fully_qualified_service_name, inference_endpoint=ingress_url))
+        return result
     def delete_service(
         self,
@@ -551,32 +581,42 @@ class ModelOperator:
         schema_name: Optional[sql_identifier.SqlIdentifier],
         model_name: sql_identifier.SqlIdentifier,
         version_name: sql_identifier.SqlIdentifier,
-        service_name: str,
+        service_database_name: Optional[sql_identifier.SqlIdentifier],
+        service_schema_name: Optional[sql_identifier.SqlIdentifier],
+        service_name: sql_identifier.SqlIdentifier,
         statement_params: Optional[Dict[str, Any]] = None,
     ) -> None:
-        services = self.list_inference_services(
+        services = self.show_services(
             database_name=database_name,
             schema_name=schema_name,
             model_name=model_name,
             version_name=version_name,
             statement_params=statement_params,
         )
-        db, schema, service_name = sql_identifier.parse_fully_qualified_name(service_name)
+        # Fall back to the model's database and schema.
+        # database_name or schema_name are set if the model is created or get using fully qualified name
+        # Otherwise, the model's database and schema are same as registry's database and schema, which are set in the
+        # self._model_client.
+        service_database_name = service_database_name or database_name or self._model_client._database_name
+        service_schema_name = service_schema_name or schema_name or self._model_client._schema_name
         fully_qualified_service_name = sql_identifier.get_fully_qualified_name(
-            db, schema, service_name, self._session.get_current_database(), self._session.get_current_schema()
+            service_database_name, service_schema_name, service_name
         )
-        for service in services:
-            if service == fully_qualified_service_name:
+        for service_info in services:
+            if service_info["name"] == fully_qualified_service_name:
                 self._service_client.drop_service(
-                    database_name=db,
-                    schema_name=schema,
+                    database_name=service_database_name,
+                    schema_name=service_schema_name,
                     service_name=service_name,
                     statement_params=statement_params,
                 )
                 return
         raise ValueError(
-            f"Service '{service_name}' does not exist or unauthorized or not associated with this model version."
+            f"Service '{fully_qualified_service_name}' does not exist "
+            "or unauthorized or not associated with this model version."
         )
     def get_model_version_manifest(

snowflake/ml/model/_client/ops/service_ops.py CHANGED Viewed

@@ -100,13 +100,26 @@ class ServiceOperator:
         image_repo_name: sql_identifier.SqlIdentifier,
         ingress_enabled: bool,
         max_instances: int,
+        cpu_requests: Optional[str],
+        memory_requests: Optional[str],
         gpu_requests: Optional[str],
         num_workers: Optional[int],
         max_batch_rows: Optional[int],
         force_rebuild: bool,
-        build_external_access_integration: sql_identifier.SqlIdentifier,
+        build_external_access_integrations: Optional[List[sql_identifier.SqlIdentifier]],
         statement_params: Optional[Dict[str, Any]] = None,
     ) -> str:
+        # Fall back to the registry's database and schema if not provided
+        database_name = database_name or self._database_name
+        schema_name = schema_name or self._schema_name
+        # Fall back to the model's database and schema if not provided then to the registry's database and schema
+        service_database_name = service_database_name or database_name or self._database_name
+        service_schema_name = service_schema_name or schema_name or self._schema_name
+        image_repo_database_name = image_repo_database_name or database_name or self._database_name
+        image_repo_schema_name = image_repo_schema_name or schema_name or self._schema_name
         # create a temp stage
         stage_name = sql_identifier.SqlIdentifier(
             snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.STAGE)
@@ -119,9 +132,17 @@ class ServiceOperator:
         )
         stage_path = self._stage_client.fully_qualified_object_name(database_name, schema_name, stage_name)
+        # TODO(hayu): Remove the version check after Snowflake 8.40.0 release
+        if (
+            snowflake_env.get_current_snowflake_version(self._session, statement_params=statement_params)
+            < version.parse("8.40.0")
+            and build_external_access_integrations is None
+        ):
+            raise ValueError("External access integrations are required in Snowflake < 8.40.0.")
         self._model_deployment_spec.save(
-            database_name=database_name or self._database_name,
-            schema_name=schema_name or self._schema_name,
+            database_name=database_name,
+            schema_name=schema_name,
             model_name=model_name,
             version_name=version_name,
             service_database_name=service_database_name,
@@ -134,11 +155,13 @@ class ServiceOperator:
             image_repo_name=image_repo_name,
             ingress_enabled=ingress_enabled,
             max_instances=max_instances,
+            cpu=cpu_requests,
+            memory=memory_requests,
             gpu=gpu_requests,
             num_workers=num_workers,
             max_batch_rows=max_batch_rows,
             force_rebuild=force_rebuild,
-            external_access_integration=build_external_access_integration,
+            external_access_integrations=build_external_access_integrations,
         )
         file_utils.upload_directory_to_stage(
             self._session,
@@ -163,32 +186,25 @@ class ServiceOperator:
             statement_params=statement_params,
         )
-        # TODO(hayu): Remove the version check after Snowflake 8.37.0 release
-        if snowflake_env.get_current_snowflake_version(
-            self._session, statement_params=statement_params
-        ) >= version.parse("8.37.0"):
-            # stream service logs in a thread
-            model_build_service_name = sql_identifier.SqlIdentifier(self._get_model_build_service_name(query_id))
-            model_build_service = ServiceLogInfo(
-                database_name=service_database_name,
-                schema_name=service_schema_name,
-                service_name=model_build_service_name,
-                container_name="model-build",
-            )
-            model_inference_service = ServiceLogInfo(
-                database_name=service_database_name,
-                schema_name=service_schema_name,
-                service_name=service_name,
-                container_name="model-inference",
-            )
-            services = [model_build_service, model_inference_service]
-            log_thread = self._start_service_log_streaming(
-                async_job, services, model_inference_service_exists, force_rebuild, statement_params
-            )
-            log_thread.join()
-        else:
-            while not async_job.is_done():
-                time.sleep(5)
+        # stream service logs in a thread
+        model_build_service_name = sql_identifier.SqlIdentifier(self._get_model_build_service_name(query_id))
+        model_build_service = ServiceLogInfo(
+            database_name=service_database_name,
+            schema_name=service_schema_name,
+            service_name=model_build_service_name,
+            container_name="model-build",
+        )
+        model_inference_service = ServiceLogInfo(
+            database_name=service_database_name,
+            schema_name=service_schema_name,
+            service_name=service_name,
+            container_name="model-inference",
+        )
+        services = [model_build_service, model_inference_service]
+        log_thread = self._start_service_log_streaming(
+            async_job, services, model_inference_service_exists, force_rebuild, statement_params
+        )
+        log_thread.join()
         res = cast(str, cast(List[row.Row], async_job.result())[0][0])
         module_logger.info(f"Inference service {service_name} deployment complete: {res}")

snowflake/ml/model/_client/service/model_deployment_spec.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import pathlib
-from typing import Optional
+from typing import List, Optional
 import yaml
@@ -36,11 +36,13 @@ class ModelDeploymentSpec:
         image_repo_name: sql_identifier.SqlIdentifier,
         ingress_enabled: bool,
         max_instances: int,
+        cpu: Optional[str],
+        memory: Optional[str],
         gpu: Optional[str],
         num_workers: Optional[int],
         max_batch_rows: Optional[int],
         force_rebuild: bool,
-        external_access_integration: sql_identifier.SqlIdentifier,
+        external_access_integrations: Optional[List[sql_identifier.SqlIdentifier]],
     ) -> None:
         # create the deployment spec
         # models spec
@@ -55,12 +57,15 @@ class ModelDeploymentSpec:
         fq_image_repo_name = identifier.get_schema_level_object_identifier(
             saved_image_repo_database.identifier(), saved_image_repo_schema.identifier(), image_repo_name.identifier()
         )
-        image_build_dict = model_deployment_spec_schema.ImageBuildDict(
-            compute_pool=image_build_compute_pool_name.identifier(),
-            image_repo=fq_image_repo_name,
-            force_rebuild=force_rebuild,
-            external_access_integrations=[external_access_integration.identifier()],
-        )
+        image_build_dict: model_deployment_spec_schema.ImageBuildDict = {
+            "compute_pool": image_build_compute_pool_name.identifier(),
+            "image_repo": fq_image_repo_name,
+            "force_rebuild": force_rebuild,
+        }
+        if external_access_integrations is not None:
+            image_build_dict["external_access_integrations"] = [
+                eai.identifier() for eai in external_access_integrations
+            ]
         # service spec
         saved_service_database = service_database_name or database_name
@@ -74,6 +79,12 @@ class ModelDeploymentSpec:
             ingress_enabled=ingress_enabled,
             max_instances=max_instances,
         )
+        if cpu:
+            service_dict["cpu"] = cpu
+        if memory:
+            service_dict["memory"] = memory
         if gpu:
             service_dict["gpu"] = gpu

snowflake/ml/model/_client/service/model_deployment_spec_schema.py CHANGED Viewed

@@ -12,7 +12,7 @@ class ImageBuildDict(TypedDict):
     compute_pool: Required[str]
     image_repo: Required[str]
     force_rebuild: Required[bool]
-    external_access_integrations: Required[List[str]]
+    external_access_integrations: NotRequired[List[str]]
 class ServiceDict(TypedDict):
@@ -20,6 +20,8 @@ class ServiceDict(TypedDict):
     compute_pool: Required[str]
     ingress_enabled: Required[bool]
     max_instances: Required[int]
+    cpu: NotRequired[str]
+    memory: NotRequired[str]
     gpu: NotRequired[str]
     num_workers: NotRequired[int]
     max_batch_rows: NotRequired[int]

snowflake/ml/model/_client/sql/service.py CHANGED Viewed

@@ -10,7 +10,7 @@ from snowflake.ml._internal.utils import (
     sql_identifier,
 )
 from snowflake.ml.model._client.sql import _base
-from snowflake.snowpark import dataframe, functions as F, types as spt
+from snowflake.snowpark import dataframe, functions as F, row, types as spt
 from snowflake.snowpark._internal import utils as snowpark_utils
@@ -26,6 +26,9 @@ class ServiceStatus(enum.Enum):
 class ServiceSQLClient(_base._BaseSQLClient):
+    MODEL_INFERENCE_SERVICE_ENDPOINT_NAME_COL_NAME = "name"
+    MODEL_INFERENCE_SERVICE_ENDPOINT_INGRESS_URL_COL_NAME = "ingress_url"
     def build_model_container(
         self,
         *,
@@ -216,3 +219,24 @@ class ServiceSQLClient(_base._BaseSQLClient):
             f"DROP SERVICE {self.fully_qualified_object_name(database_name, schema_name, service_name)}",
             statement_params=statement_params,
         ).has_dimensions(expected_rows=1, expected_cols=1).validate()
+    def show_endpoints(
+        self,
+        *,
+        database_name: Optional[sql_identifier.SqlIdentifier],
+        schema_name: Optional[sql_identifier.SqlIdentifier],
+        service_name: sql_identifier.SqlIdentifier,
+        statement_params: Optional[Dict[str, Any]] = None,
+    ) -> List[row.Row]:
+        fully_qualified_service_name = self.fully_qualified_object_name(database_name, schema_name, service_name)
+        res = (
+            query_result_checker.SqlResultValidator(
+                self._session,
+                (f"SHOW ENDPOINTS IN SERVICE {fully_qualified_service_name}"),
+                statement_params=statement_params,
+            )
+            .has_column(ServiceSQLClient.MODEL_INFERENCE_SERVICE_ENDPOINT_NAME_COL_NAME, allow_empty=True)
+            .has_column(ServiceSQLClient.MODEL_INFERENCE_SERVICE_ENDPOINT_INGRESS_URL_COL_NAME, allow_empty=True)
+        )
+        return res.validate()

snowflake/ml/model/_model_composer/model_composer.py CHANGED Viewed

@@ -86,6 +86,7 @@ class ModelComposer:
         metadata: Optional[Dict[str, str]] = None,
         conda_dependencies: Optional[List[str]] = None,
         pip_requirements: Optional[List[str]] = None,
+        target_platforms: Optional[List[model_types.TargetPlatform]] = None,
         python_version: Optional[str] = None,
         ext_modules: Optional[List[ModuleType]] = None,
         code_paths: Optional[List[str]] = None,
@@ -131,6 +132,7 @@ class ModelComposer:
             model_rel_path=pathlib.PurePosixPath(ModelComposer.MODEL_DIR_REL_PATH),
             options=options,
             data_sources=self._get_data_sources(model, sample_input_data),
+            target_platforms=target_platforms,
         )
         file_utils.upload_directory_to_stage(

snowflake/ml/model/_model_composer/model_manifest/model_manifest.py CHANGED Viewed

@@ -44,6 +44,7 @@ class ModelManifest:
         model_rel_path: pathlib.PurePosixPath,
         options: Optional[type_hints.ModelSaveOption] = None,
         data_sources: Optional[List[data_source.DataSource]] = None,
+        target_platforms: Optional[List[type_hints.TargetPlatform]] = None,
     ) -> None:
         if options is None:
             options = {}
@@ -132,6 +133,9 @@ class ModelManifest:
         if lineage_sources:
             manifest_dict["lineage_sources"] = lineage_sources
+        if target_platforms:
+            manifest_dict["target_platforms"] = [platform.value for platform in target_platforms]
         with (self.workspace_path / ModelManifest.MANIFEST_FILE_REL_PATH).open("w", encoding="utf-8") as f:
             # Anchors are not supported in the server, avoid that.
             yaml.SafeDumper.ignore_aliases = lambda *args: True  # type: ignore[method-assign]

snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py CHANGED Viewed

@@ -95,3 +95,4 @@ class ModelManifestDict(TypedDict):
     methods: Required[List[ModelMethodDict]]
     user_data: NotRequired[Dict[str, Any]]
     lineage_sources: NotRequired[List[LineageSourceDict]]
+    target_platforms: NotRequired[List[str]]

snowflake/ml/model/_model_composer/model_method/infer_function.py_template CHANGED Viewed

@@ -5,6 +5,7 @@ import sys
 import anyio
 import pandas as pd
+import numpy as np
 from _snowflake import vectorized
 from snowflake.ml.model._packager import model_packager
@@ -47,4 +48,4 @@ def {function_name}(df: pd.DataFrame) -> dict:
     df.columns = input_cols
     input_df = df.astype(dtype=dtype_map)
     predictions_df = runner(input_df[input_cols])
-    return predictions_df.to_dict("records")
+    return predictions_df.replace({{pd.NA: None, np.nan: None}}).to_dict("records")

snowflake-ml-python 1.6.4__py3-none-any.whl → 1.7.1__py3-none-any.whl

snowflake-ml-python 1.6.4py3-none-any.whl → 1.7.1py3-none-any.whl