PyPI - snowflake-ml-python - Versions diffs - 1.7.1__py3-none-any.whl → 1.7.3__py3-none-any.whl - Mend

snowflake-ml-python 1.7.1py3-none-any.whl → 1.7.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

snowflake/ml/_internal/utils/jwt_generator.py ADDED Viewed

@@ -0,0 +1,141 @@
+import base64
+import hashlib
+import logging
+from datetime import datetime, timedelta, timezone
+from typing import Optional
+import jwt
+from cryptography.hazmat.primitives import serialization
+from cryptography.hazmat.primitives.asymmetric import types
+logger = logging.getLogger(__name__)
+ISSUER = "iss"
+EXPIRE_TIME = "exp"
+ISSUE_TIME = "iat"
+SUBJECT = "sub"
+class JWTGenerator:
+    """
+    Creates and signs a JWT with the specified private key file, username, and account identifier. The JWTGenerator
+    keeps the generated token and only regenerates the token if a specified period of time has passed.
+    """
+    _DEFAULT_LIFETIME = timedelta(minutes=59)  # The tokens will have a 59-minute lifetime
+    _DEFAULT_RENEWAL_DELTA = timedelta(minutes=54)  # Tokens will be renewed after 54 minutes
+    ALGORITHM = "RS256"  # Tokens will be generated using RSA with SHA256
+    def __init__(
+        self,
+        account: str,
+        user: str,
+        private_key: types.PRIVATE_KEY_TYPES,
+        lifetime: Optional[timedelta] = None,
+        renewal_delay: Optional[timedelta] = None,
+    ) -> None:
+        """
+        Create a new JWTGenerator object.
+        Args:
+            account: The account identifier.
+            user: The username.
+            private_key: The private key used to sign the JWT.
+            lifetime: The lifetime of the token.
+            renewal_delay: The time before the token expires to renew it.
+        """
+        # Construct the fully qualified name of the user in uppercase.
+        self.account = JWTGenerator._prepare_account_name_for_jwt(account)
+        self.user = user.upper()
+        self.qualified_username = self.account + "." + self.user
+        self.private_key = private_key
+        self.public_key_fp = JWTGenerator._calculate_public_key_fingerprint(self.private_key)
+        self.issuer = self.qualified_username + "." + self.public_key_fp
+        self.lifetime = lifetime or JWTGenerator._DEFAULT_LIFETIME
+        self.renewal_delay = renewal_delay or JWTGenerator._DEFAULT_RENEWAL_DELTA
+        self.renew_time = datetime.now(timezone.utc)
+        self.token: Optional[str] = None
+        logger.info(
+            """Creating JWTGenerator with arguments
+            account : %s, user : %s, lifetime : %s, renewal_delay : %s""",
+            self.account,
+            self.user,
+            self.lifetime,
+            self.renewal_delay,
+        )
+    @staticmethod
+    def _prepare_account_name_for_jwt(raw_account: str) -> str:
+        account = raw_account
+        if ".global" not in account:
+            # Handle the general case.
+            idx = account.find(".")
+            if idx > 0:
+                account = account[0:idx]
+        else:
+            # Handle the replication case.
+            idx = account.find("-")
+            if idx > 0:
+                account = account[0:idx]
+        # Use uppercase for the account identifier.
+        return account.upper()
+    def get_token(self) -> str:
+        now = datetime.now(timezone.utc)  # Fetch the current time
+        if self.token is not None and self.renew_time > now:
+            return self.token
+        # If the token has expired or doesn't exist, regenerate the token.
+        logger.info(
+            "Generating a new token because the present time (%s) is later than the renewal time (%s)",
+            now,
+            self.renew_time,
+        )
+        # Calculate the next time we need to renew the token.
+        self.renew_time = now + self.renewal_delay
+        # Create our payload
+        payload = {
+            # Set the issuer to the fully qualified username concatenated with the public key fingerprint.
+            ISSUER: self.issuer,
+            # Set the subject to the fully qualified username.
+            SUBJECT: self.qualified_username,
+            # Set the issue time to now.
+            ISSUE_TIME: now,
+            # Set the expiration time, based on the lifetime specified for this object.
+            EXPIRE_TIME: now + self.lifetime,
+        }
+        # Regenerate the actual token
+        token = jwt.encode(payload, key=self.private_key, algorithm=JWTGenerator.ALGORITHM)
+        # If you are using a version of PyJWT prior to 2.0, jwt.encode returns a byte string instead of a string.
+        # If the token is a byte string, convert it to a string.
+        if isinstance(token, bytes):
+            token = token.decode("utf-8")
+        self.token = token
+        logger.info(
+            "Generated a JWT with the following payload: %s",
+            jwt.decode(self.token, key=self.private_key.public_key(), algorithms=[JWTGenerator.ALGORITHM]),
+        )
+        return token
+    @staticmethod
+    def _calculate_public_key_fingerprint(private_key: types.PRIVATE_KEY_TYPES) -> str:
+        # Get the raw bytes of public key.
+        public_key_raw = private_key.public_key().public_bytes(
+            serialization.Encoding.DER, serialization.PublicFormat.SubjectPublicKeyInfo
+        )
+        # Get the sha256 hash of the raw bytes.
+        sha256hash = hashlib.sha256()
+        sha256hash.update(public_key_raw)
+        # Base64-encode the value and prepend the prefix 'SHA256:'.
+        public_key_fp = "SHA256:" + base64.b64encode(sha256hash.digest()).decode("utf-8")
+        logger.info("Public key fingerprint is %s", public_key_fp)
+        return public_key_fp

snowflake/ml/data/__init__.py CHANGED Viewed

@@ -1,5 +1,8 @@
+from pkgutil import extend_path
 from .data_connector import DataConnector
 from .data_ingestor import DataIngestor, DataIngestorType
 from .data_source import DataFrameInfo, DatasetInfo, DataSource
 __all__ = ["DataConnector", "DataSource", "DataFrameInfo", "DatasetInfo", "DataIngestor", "DataIngestorType"]
+__path__ = extend_path(__path__, __name__)

snowflake/ml/data/_internal/arrow_ingestor.py CHANGED Viewed

@@ -2,7 +2,7 @@ import collections
 import logging
 import os
 import time
-from typing import Any, Deque, Dict, Iterator, List, Optional, Union
+from typing import Any, Deque, Dict, Iterator, List, Optional, Sequence, Union
 import numpy as np
 import numpy.typing as npt
@@ -47,7 +47,7 @@ class ArrowIngestor(data_ingestor.DataIngestor):
     def __init__(
         self,
         session: snowpark.Session,
-        data_sources: List[data_source.DataSource],
+        data_sources: Sequence[data_source.DataSource],
         format: Optional[str] = None,
         **kwargs: Any,
     ) -> None:
@@ -60,14 +60,14 @@ class ArrowIngestor(data_ingestor.DataIngestor):
             kwargs: Miscellaneous arguments passed to underlying PyArrow Dataset initializer.
         """
         self._session = session
-        self._data_sources = data_sources
+        self._data_sources = list(data_sources)
         self._format = format
         self._kwargs = kwargs
         self._schema: Optional[pa.Schema] = None
     @classmethod
-    def from_sources(cls, session: snowpark.Session, sources: List[data_source.DataSource]) -> "ArrowIngestor":
+    def from_sources(cls, session: snowpark.Session, sources: Sequence[data_source.DataSource]) -> "ArrowIngestor":
         return cls(session, sources)
     @property

snowflake/ml/data/data_connector.py CHANGED Viewed

@@ -1,5 +1,16 @@
 import os
-from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Type, TypeVar
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Generator,
+    List,
+    Optional,
+    Sequence,
+    Type,
+    TypeVar,
+    cast,
+)
 import numpy.typing as npt
 from typing_extensions import deprecated
@@ -12,6 +23,7 @@ from snowflake.ml.modeling._internal.constants import (
     IN_ML_RUNTIME_ENV_VAR,
     USE_OPTIMIZED_DATA_INGESTOR,
 )
+from snowflake.snowpark import context as sf_context
 if TYPE_CHECKING:
     import pandas as pd
@@ -35,8 +47,10 @@ class DataConnector:
     def __init__(
         self,
         ingestor: data_ingestor.DataIngestor,
+        **kwargs: Any,
     ) -> None:
         self._ingestor = ingestor
+        self._kwargs = kwargs
     @classmethod
     @snowpark._internal.utils.private_preview(version="1.6.0")
@@ -44,20 +58,34 @@ class DataConnector:
         cls: Type[DataConnectorType],
         df: snowpark.DataFrame,
         ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> DataConnectorType:
         if len(df.queries["queries"]) != 1 or len(df.queries["post_actions"]) != 0:
             raise ValueError("DataFrames with multiple queries and/or post-actions not supported")
-        source = data_source.DataFrameInfo(df.queries["queries"][0])
-        assert df._session is not None
-        return cls.from_sources(df._session, [source], ingestor_class=ingestor_class, **kwargs)
+        return cast(
+            DataConnectorType,
+            cls.from_sql(df.queries["queries"][0], session=df._session, ingestor_class=ingestor_class, **kwargs),
+        )
+    @classmethod
+    @snowpark._internal.utils.private_preview(version="1.7.3")
+    def from_sql(
+        cls: Type[DataConnectorType],
+        query: str,
+        session: Optional[snowpark.Session] = None,
+        ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
+        **kwargs: Any,
+    ) -> DataConnectorType:
+        session = session or sf_context.get_active_session()
+        source = data_source.DataFrameInfo(query)
+        return cls.from_sources(session, [source], ingestor_class=ingestor_class, **kwargs)
     @classmethod
     def from_dataset(
         cls: Type[DataConnectorType],
         ds: "dataset.Dataset",
         ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> DataConnectorType:
         dsv = ds.selected_version
         assert dsv is not None
@@ -75,9 +103,9 @@ class DataConnector:
     def from_sources(
         cls: Type[DataConnectorType],
         session: snowpark.Session,
-        sources: List[data_source.DataSource],
+        sources: Sequence[data_source.DataSource],
         ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> DataConnectorType:
         ingestor_class = ingestor_class or cls.DEFAULT_INGESTOR_CLASS
         ingestor = ingestor_class.from_sources(session, sources)
@@ -130,7 +158,11 @@ class DataConnector:
         func_params_to_log=["batch_size", "shuffle", "drop_last_batch"],
     )
     def to_torch_datapipe(
-        self, *, batch_size: int, shuffle: bool = False, drop_last_batch: bool = True
+        self,
+        *,
+        batch_size: int,
+        shuffle: bool = False,
+        drop_last_batch: bool = True,
     ) -> "torch_data.IterDataPipe":  # type: ignore[type-arg]
         """Transform the Snowflake data into a ready-to-use Pytorch datapipe.
@@ -149,8 +181,13 @@ class DataConnector:
         """
         from snowflake.ml.data import torch_utils
+        expand_dims = self._kwargs.get("expand_dims", True)
         return torch_utils.TorchDataPipeWrapper(
-            self._ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last_batch
+            self._ingestor,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            drop_last=drop_last_batch,
+            expand_dims=expand_dims,
         )
     @telemetry.send_api_usage_telemetry(
@@ -179,8 +216,13 @@ class DataConnector:
         """
         from snowflake.ml.data import torch_utils
+        expand_dims = self._kwargs.get("expand_dims", True)
         return torch_utils.TorchDatasetWrapper(
-            self._ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last_batch
+            self._ingestor,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            drop_last=drop_last_batch,
+            expand_dims=expand_dims,
         )
     @telemetry.send_api_usage_telemetry(

snowflake/ml/data/data_ingestor.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import (
     List,
     Optional,
     Protocol,
+    Sequence,
     Type,
     TypeVar,
 )
@@ -25,7 +26,7 @@ DataIngestorType = TypeVar("DataIngestorType", bound="DataIngestor")
 class DataIngestor(Protocol):
     @classmethod
     def from_sources(
-        cls: Type[DataIngestorType], session: snowpark.Session, sources: List[data_source.DataSource]
+        cls: Type[DataIngestorType], session: snowpark.Session, sources: Sequence[data_source.DataSource]
     ) -> DataIngestorType:
         raise NotImplementedError

snowflake/ml/data/torch_utils.py CHANGED Viewed

@@ -17,6 +17,7 @@ class TorchDatasetWrapper(torch.utils.data.IterableDataset[Dict[str, Any]]):
         batch_size: Optional[int],
         shuffle: bool = False,
         drop_last: bool = False,
+        expand_dims: bool = True,
     ) -> None:
         """Not intended for direct usage. Use DataConnector.to_torch_dataset() instead"""
         squeeze = False
@@ -29,6 +30,7 @@ class TorchDatasetWrapper(torch.utils.data.IterableDataset[Dict[str, Any]]):
         self._shuffle = shuffle
         self._drop_last = drop_last
         self._squeeze_outputs = squeeze
+        self._expand_dims = expand_dims
     def __iter__(self) -> Iterator[Dict[str, Union[npt.NDArray[Any], List[Any]]]]:
         max_idx = 0
@@ -47,7 +49,10 @@ class TorchDatasetWrapper(torch.utils.data.IterableDataset[Dict[str, Any]]):
         ):
             # Skip indices during multi-process data loading to prevent data duplication
             if counter == filter_idx:
-                yield {k: _preprocess_array(v, squeeze=self._squeeze_outputs) for k, v in batch.items()}
+                yield {
+                    k: _preprocess_array(v, squeeze=self._squeeze_outputs, expand_dims=self._expand_dims)
+                    for k, v in batch.items()
+                }
             if counter < max_idx:
                 counter += 1
             else:
@@ -58,13 +63,21 @@ class TorchDataPipeWrapper(TorchDatasetWrapper, torch.utils.data.IterDataPipe[Di
     """Wrap a DataIngestor into a PyTorch IterDataPipe"""
     def __init__(
-        self, ingestor: data_ingestor.DataIngestor, *, batch_size: int, shuffle: bool = False, drop_last: bool = False
+        self,
+        ingestor: data_ingestor.DataIngestor,
+        *,
+        batch_size: int,
+        shuffle: bool = False,
+        drop_last: bool = False,
+        expand_dims: bool = True,
     ) -> None:
         """Not intended for direct usage. Use DataConnector.to_torch_datapipe() instead"""
-        super().__init__(ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
+        super().__init__(ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, expand_dims=expand_dims)
-def _preprocess_array(arr: npt.NDArray[Any], squeeze: bool = False) -> Union[npt.NDArray[Any], List[np.object_]]:
+def _preprocess_array(
+    arr: npt.NDArray[Any], squeeze: bool = False, expand_dims: bool = True
+) -> Union[npt.NDArray[Any], List[np.object_]]:
     """Preprocesses batch column values."""
     single_dimensional = arr.ndim < 2 and not arr.dtype == np.object_
@@ -73,7 +86,7 @@ def _preprocess_array(arr: npt.NDArray[Any], squeeze: bool = False) -> Union[npt
         arr = arr.squeeze(axis=0)
     # For single dimensional data,
-    if single_dimensional:
+    if single_dimensional and expand_dims:
         axis = 0 if arr.ndim == 0 else 1
         arr = np.expand_dims(arr, axis=axis)

snowflake/ml/feature_store/examples/example_helper.py CHANGED Viewed

@@ -45,8 +45,9 @@ class ExampleHelper:
         """Return a dataframe object about descriptions of all examples."""
         root_dir = Path(__file__).parent
         rows = []
+        hide_folders = ["citibike_trip_features", "source_data"]
         for f_name in os.listdir(root_dir):
-            if os.path.isdir(os.path.join(root_dir, f_name)) and f_name[0].isalpha() and f_name != "source_data":
+            if os.path.isdir(os.path.join(root_dir, f_name)) and f_name[0].isalpha() and f_name not in hide_folders:
                 source_file_path = root_dir.joinpath(f"{f_name}/source.yaml")
                 source_dict = self._read_yaml(str(source_file_path))
                 rows.append((f_name, source_dict["model_category"], source_dict["desc"], source_dict["label_columns"]))

snowflake/ml/fileset/fileset.py CHANGED Viewed

@@ -11,11 +11,9 @@ from snowflake.ml._internal.exceptions import (
     fileset_error_messages,
     fileset_errors,
 )
-from snowflake.ml._internal.utils import (
-    identifier,
-    import_utils,
-    snowpark_dataframe_utils,
-)
+from snowflake.ml._internal.utils import identifier, snowpark_dataframe_utils
+from snowflake.ml.data import data_connector
+from snowflake.ml.data._internal import arrow_ingestor
 from snowflake.ml.fileset import sfcfs
 from snowflake.snowpark import exceptions as snowpark_exceptions, functions
@@ -285,6 +283,16 @@ class FileSet:
         """Get the Snowflake absolute path to this FileSet directory."""
         return _fileset_absolute_path(self._target_stage_loc, self.name)
+    def _to_data_connector(self) -> data_connector.DataConnector:
+        self._fs.optimize_read(self._list_files())
+        ingester = arrow_ingestor.ArrowIngestor(
+            self._snowpark_session,
+            self._list_files(),
+            format="parquet",
+            filesystem=self._fs,
+        )
+        return data_connector.DataConnector(ingester, expand_dims=False)
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,
     )
@@ -362,13 +370,9 @@ class FileSet:
         ----
         {'_COL_1':[10]}
         """
-        IterableWrapper, _ = import_utils.import_or_get_dummy("torchdata.datapipes.iter.IterableWrapper")
-        torch_datapipe_module, _ = import_utils.import_or_get_dummy("snowflake.ml.fileset.torch_datapipe")
-        self._fs.optimize_read(self._list_files())
-        input_dp = IterableWrapper(self._list_files())
-        return torch_datapipe_module.ReadAndParseParquet(input_dp, self._fs, batch_size, shuffle, drop_last_batch)
+        return self._to_data_connector().to_torch_datapipe(
+            batch_size=batch_size, shuffle=shuffle, drop_last_batch=drop_last_batch
+        )
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,
@@ -402,12 +406,8 @@ class FileSet:
         ----
         {'_COL_1': <tf.Tensor: shape=(1,), dtype=int64, numpy=[10]>}
         """
-        tf_dataset_module, _ = import_utils.import_or_get_dummy("snowflake.ml.fileset.tf_dataset")
-        self._fs.optimize_read(self._list_files())
-        return tf_dataset_module.read_and_parse_parquet(
-            self._list_files(), self._fs, batch_size, shuffle, drop_last_batch
+        return self._to_data_connector().to_tf_dataset(
+            batch_size=batch_size, shuffle=shuffle, drop_last_batch=drop_last_batch
         )
     @telemetry.send_api_usage_telemetry(

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -14,7 +14,7 @@ from snowflake.ml.model._client.ops import metadata_ops, model_ops, service_ops
 from snowflake.ml.model._model_composer import model_composer
 from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
 from snowflake.ml.model._packager.model_handlers import snowmlmodel
-from snowflake.snowpark import Session, dataframe
+from snowflake.snowpark import Session, async_job, dataframe
 _TELEMETRY_PROJECT = "MLOps"
 _TELEMETRY_SUBPROJECT = "ModelManagement"
@@ -447,13 +447,15 @@ class ModelVersion(lineage_node.LineageNode):
             target_function_info = functions[0]
         if service_name:
+            database_name_id, schema_name_id, service_name_id = sql_identifier.parse_fully_qualified_name(service_name)
             return self._model_ops.invoke_method(
                 method_name=sql_identifier.SqlIdentifier(target_function_info["name"]),
                 signature=target_function_info["signature"],
                 X=X,
-                database_name=None,
-                schema_name=None,
-                service_name=sql_identifier.SqlIdentifier(service_name),
+                database_name=database_name_id,
+                schema_name=schema_name_id,
+                service_name=service_name_id,
                 strict_input_validation=strict_input_validation,
                 statement_params=statement_params,
             )
@@ -631,7 +633,8 @@ class ModelVersion(lineage_node.LineageNode):
         max_batch_rows: Optional[int] = None,
         force_rebuild: bool = False,
         build_external_access_integration: Optional[str] = None,
-    ) -> str:
+        block: bool = True,
+    ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
         Args:
@@ -659,6 +662,9 @@ class ModelVersion(lineage_node.LineageNode):
             force_rebuild: Whether to force a model inference image rebuild.
             build_external_access_integration: (Deprecated) The external access integration for image build. This is
                 usually permitting access to conda & PyPI repositories.
+            block: A bool value indicating whether this function will wait until the service is available.
+                When it is ``False``, this function executes the underlying service creation asynchronously
+                and returns an :class:`AsyncJob`.
         """
         ...
@@ -679,7 +685,8 @@ class ModelVersion(lineage_node.LineageNode):
         max_batch_rows: Optional[int] = None,
         force_rebuild: bool = False,
         build_external_access_integrations: Optional[List[str]] = None,
-    ) -> str:
+        block: bool = True,
+    ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
         Args:
@@ -707,6 +714,9 @@ class ModelVersion(lineage_node.LineageNode):
             force_rebuild: Whether to force a model inference image rebuild.
             build_external_access_integrations: The external access integrations for image build. This is usually
                 permitting access to conda & PyPI repositories.
+            block: A bool value indicating whether this function will wait until the service is available.
+                When it is ``False``, this function executes the underlying service creation asynchronously
+                and returns an :class:`AsyncJob`.
         """
         ...
@@ -742,7 +752,8 @@ class ModelVersion(lineage_node.LineageNode):
         force_rebuild: bool = False,
         build_external_access_integration: Optional[str] = None,
         build_external_access_integrations: Optional[List[str]] = None,
-    ) -> str:
+        block: bool = True,
+    ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
         Args:
@@ -772,12 +783,16 @@ class ModelVersion(lineage_node.LineageNode):
                 usually permitting access to conda & PyPI repositories.
             build_external_access_integrations: The external access integrations for image build. This is usually
                 permitting access to conda & PyPI repositories.
+            block: A bool value indicating whether this function will wait until the service is available.
+                When it is False, this function executes the underlying service creation asynchronously
+                and returns an AsyncJob.
         Raises:
             ValueError: Illegal external access integration arguments.
         Returns:
-            Result information about service creation from server.
+            If `block=True`, return result information about service creation from server.
+            Otherwise, return the service creation AsyncJob.
         """
         statement_params = telemetry.get_statement_params(
             project=_TELEMETRY_PROJECT,
@@ -829,6 +844,7 @@ class ModelVersion(lineage_node.LineageNode):
                 if build_external_access_integrations is None
                 else [sql_identifier.SqlIdentifier(eai) for eai in build_external_access_integrations]
             ),
+            block=block,
             statement_params=statement_params,
         )

snowflake/ml/model/_client/ops/model_ops.py CHANGED Viewed

@@ -168,14 +168,10 @@ class ModelOperator:
         schema_name: Optional[sql_identifier.SqlIdentifier],
         model_name: sql_identifier.SqlIdentifier,
         version_name: sql_identifier.SqlIdentifier,
+        model_exists: bool,
         statement_params: Optional[Dict[str, Any]] = None,
     ) -> None:
-        if self.validate_existence(
-            database_name=database_name,
-            schema_name=schema_name,
-            model_name=model_name,
-            statement_params=statement_params,
-        ):
+        if model_exists:
             return self._model_version_client.add_version_from_model_version(
                 source_database_name=source_database_name,
                 source_schema_name=source_schema_name,

snowflake/ml/model/_client/ops/service_ops.py CHANGED Viewed

@@ -6,7 +6,7 @@ import re
 import tempfile
 import threading
 import time
-from typing import Any, Dict, List, Optional, Tuple, cast
+from typing import Any, Dict, List, Optional, Tuple, Union, cast
 from packaging import version
@@ -15,7 +15,7 @@ from snowflake.ml._internal import file_utils
 from snowflake.ml._internal.utils import service_logger, snowflake_env, sql_identifier
 from snowflake.ml.model._client.service import model_deployment_spec
 from snowflake.ml.model._client.sql import service as service_sql, stage as stage_sql
-from snowflake.snowpark import exceptions, row, session
+from snowflake.snowpark import async_job, exceptions, row, session
 from snowflake.snowpark._internal import utils as snowpark_utils
 module_logger = service_logger.get_logger(__name__, service_logger.LogColor.GREY)
@@ -107,8 +107,9 @@ class ServiceOperator:
         max_batch_rows: Optional[int],
         force_rebuild: bool,
         build_external_access_integrations: Optional[List[sql_identifier.SqlIdentifier]],
+        block: bool,
         statement_params: Optional[Dict[str, Any]] = None,
-    ) -> str:
+    ) -> Union[str, async_job.AsyncJob]:
         # Fall back to the registry's database and schema if not provided
         database_name = database_name or self._database_name
@@ -204,11 +205,15 @@ class ServiceOperator:
         log_thread = self._start_service_log_streaming(
             async_job, services, model_inference_service_exists, force_rebuild, statement_params
         )
-        log_thread.join()
-        res = cast(str, cast(List[row.Row], async_job.result())[0][0])
-        module_logger.info(f"Inference service {service_name} deployment complete: {res}")
-        return res
+        if block:
+            log_thread.join()
+            res = cast(str, cast(List[row.Row], async_job.result())[0][0])
+            module_logger.info(f"Inference service {service_name} deployment complete: {res}")
+            return res
+        else:
+            return async_job
     def _start_service_log_streaming(
         self,

snowflake/ml/model/_client/sql/model_version.py CHANGED Viewed

@@ -10,6 +10,7 @@ from snowflake.ml._internal.utils import (
     sql_identifier,
 )
 from snowflake.ml.model._client.sql import _base
+from snowflake.ml.model._model_composer.model_method import constants
 from snowflake.snowpark import dataframe, functions as F, row, types as spt
 from snowflake.snowpark._internal import utils as snowpark_utils
@@ -333,6 +334,11 @@ class ModelVersionSQLClient(_base._BaseSQLClient):
         args_sql = ", ".join(args_sql_list)
+        wide_input = len(input_args) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT
+        if wide_input:
+            input_args_sql = ", ".join(f"'{arg}', {arg.identifier()}" for arg in input_args)
+            args_sql = f"object_construct_keep_null({input_args_sql})"
         sql = textwrap.dedent(
             f"""WITH {','.join(with_statements)}
                 SELECT *,
@@ -412,6 +418,11 @@ class ModelVersionSQLClient(_base._BaseSQLClient):
         args_sql = ", ".join(args_sql_list)
+        wide_input = len(input_args) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT
+        if wide_input:
+            input_args_sql = ", ".join(f"'{arg}', {arg.identifier()}" for arg in input_args)
+            args_sql = f"object_construct_keep_null({input_args_sql})"
         sql = textwrap.dedent(
             f"""WITH {','.join(with_statements)}
                 SELECT *,

snowflake-ml-python 1.7.1__py3-none-any.whl → 1.7.3__py3-none-any.whl

snowflake-ml-python 1.7.1py3-none-any.whl → 1.7.3py3-none-any.whl