PyPI - snowflake-ml-python - Versions diffs - 1.8.5__py3-none-any.whl → 1.9.0__py3-none-any.whl - Mend

snowflake-ml-python 1.8.5py3-none-any.whl → 1.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

snowflake/ml/_internal/telemetry.py +6 -9
snowflake/ml/_internal/utils/connection_params.py +196 -0
snowflake/ml/_internal/utils/identifier.py +1 -1
snowflake/ml/_internal/utils/mixins.py +61 -0
snowflake/ml/jobs/__init__.py +2 -0
snowflake/ml/jobs/_utils/constants.py +3 -2
snowflake/ml/jobs/_utils/function_payload_utils.py +43 -0
snowflake/ml/jobs/_utils/interop_utils.py +63 -4
snowflake/ml/jobs/_utils/payload_utils.py +89 -40
snowflake/ml/jobs/_utils/query_helper.py +9 -0
snowflake/ml/jobs/_utils/scripts/constants.py +19 -3
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +8 -26
snowflake/ml/jobs/_utils/spec_utils.py +29 -5
snowflake/ml/jobs/_utils/stage_utils.py +119 -0
snowflake/ml/jobs/_utils/types.py +5 -1
snowflake/ml/jobs/decorators.py +20 -28
snowflake/ml/jobs/job.py +197 -61
snowflake/ml/jobs/manager.py +253 -121
snowflake/ml/model/_client/model/model_impl.py +58 -0
snowflake/ml/model/_client/model/model_version_impl.py +90 -0
snowflake/ml/model/_client/ops/model_ops.py +18 -6
snowflake/ml/model/_client/ops/service_ops.py +23 -6
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +2 -0
snowflake/ml/model/_client/sql/service.py +68 -20
snowflake/ml/model/_client/sql/stage.py +5 -2
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -10
snowflake/ml/model/_packager/model_env/model_env.py +35 -27
snowflake/ml/model/_packager/model_handlers/pytorch.py +5 -1
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +103 -73
snowflake/ml/model/_packager/model_meta/model_meta.py +3 -1
snowflake/ml/model/_signatures/core.py +24 -0
snowflake/ml/model/_signatures/snowpark_handler.py +55 -3
snowflake/ml/model/target_platform.py +11 -0
snowflake/ml/model/task.py +9 -0
snowflake/ml/model/type_hints.py +5 -13
snowflake/ml/modeling/metrics/metrics_utils.py +2 -0
snowflake/ml/monitoring/explain_visualize.py +2 -2
snowflake/ml/monitoring/model_monitor.py +0 -4
snowflake/ml/registry/_manager/model_manager.py +30 -15
snowflake/ml/registry/registry.py +144 -47
snowflake/ml/utils/connection_params.py +1 -1
snowflake/ml/utils/html_utils.py +263 -0
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/METADATA +64 -19
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/RECORD +48 -41
snowflake/ml/monitoring/model_monitor_version.py +0 -1
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/top_level.txt +0 -0

snowflake/ml/_internal/telemetry.py CHANGED Viewed

@@ -411,16 +411,13 @@ def send_custom_usage(
     **kwargs: Any,
 ) -> None:
     conn = _get_snowflake_connection()
-    if conn is None:
-        raise ValueError(
-            """Snowflake connection is required to send custom telemetry. This means there
-            must be at least one active session, or that telemetry is being sent from within an SPCS service."""
-        )
-    client = _SourceTelemetryClient(conn=conn, project=project, subproject=subproject)
-    common_metrics = client._create_basic_telemetry_data(telemetry_type=telemetry_type)
-    data = {**common_metrics, TelemetryField.KEY_DATA.value: data, **kwargs}
-    client._send(msg=data)
+    # Send telemetry if Snowflake connection is available.
+    if conn is not None:
+        client = _SourceTelemetryClient(conn=conn, project=project, subproject=subproject)
+        common_metrics = client._create_basic_telemetry_data(telemetry_type=telemetry_type)
+        data = {**common_metrics, TelemetryField.KEY_DATA.value: data, **kwargs}
+        client._send(msg=data)
 def send_api_usage_telemetry(

snowflake/ml/_internal/utils/connection_params.py ADDED Viewed

@@ -0,0 +1,196 @@
+import configparser
+import os
+from typing import Optional, Union
+from absl import logging
+from cryptography.hazmat import backends
+from cryptography.hazmat.primitives import serialization
+_DEFAULT_CONNECTION_FILE = "~/.snowsql/config"
+def _read_token(token_file: str = "") -> str:
+    """
+    Reads token from environment or file provided.
+    First tries to read the token from environment variable
+    (`SNOWFLAKE_TOKEN`) followed by the token file.
+    Both the options are tried out in SnowServices.
+    Args:
+        token_file: File from which token needs to be read. Optional.
+    Returns:
+        the token.
+    """
+    token = os.getenv("SNOWFLAKE_TOKEN", "")
+    if token:
+        return token
+    if token_file and os.path.exists(token_file):
+        with open(token_file) as f:
+            token = f.read()
+    return token
+_ENCRYPTED_PKCS8_PK_HEADER = b"-----BEGIN ENCRYPTED PRIVATE KEY-----"
+_UNENCRYPTED_PKCS8_PK_HEADER = b"-----BEGIN PRIVATE KEY-----"
+def _load_pem_to_der(private_key_path: str) -> bytes:
+    """Given a private key file path (in PEM format), decode key data into DER format."""
+    with open(private_key_path, "rb") as f:
+        private_key_pem = f.read()
+    private_key_passphrase: Optional[str] = os.getenv("SNOWFLAKE_PRIVATE_KEY_PASSPHRASE", None)
+    # Only PKCS#8 format key will be accepted. However, openssl
+    # transparently handle PKCS#8 and PKCS#1 format (by some fallback
+    # logic) and their is no function to distinguish between them. By
+    # reading openssl source code, apparently they also relies on header
+    # to determine if give bytes is PKCS#8 format or not
+    if not private_key_pem.startswith(_ENCRYPTED_PKCS8_PK_HEADER) and not private_key_pem.startswith(
+        _UNENCRYPTED_PKCS8_PK_HEADER
+    ):
+        raise Exception("Private key provided is not in PKCS#8 format. Please use correct format.")
+    if private_key_pem.startswith(_ENCRYPTED_PKCS8_PK_HEADER) and private_key_passphrase is None:
+        raise Exception(
+            "Private key is encrypted but passphrase could not be found. "
+            "Please set SNOWFLAKE_PRIVATE_KEY_PASSPHRASE env variable."
+        )
+    if private_key_pem.startswith(_UNENCRYPTED_PKCS8_PK_HEADER):
+        private_key_passphrase = None
+    private_key = serialization.load_pem_private_key(
+        private_key_pem,
+        str.encode(private_key_passphrase) if private_key_passphrase is not None else private_key_passphrase,
+        backends.default_backend(),
+    )
+    return private_key.private_bytes(
+        encoding=serialization.Encoding.DER,
+        format=serialization.PrivateFormat.PKCS8,
+        encryption_algorithm=serialization.NoEncryption(),
+    )
+def _connection_properties_from_env() -> dict[str, str]:
+    """Returns a dict with all possible login related env variables."""
+    sf_conn_prop = {
+        # Mandatory fields
+        "account": os.environ["SNOWFLAKE_ACCOUNT"],
+        "database": os.environ["SNOWFLAKE_DATABASE"],
+        # With a default value
+        "token_file": os.getenv("SNOWFLAKE_TOKEN_FILE", "/snowflake/session/token"),
+        "ssl": os.getenv("SNOWFLAKE_SSL", "on"),
+        "protocol": os.getenv("SNOWFLAKE_PROTOCOL", "https"),
+    }
+    # With empty default value
+    for key, env_var in {
+        "user": "SNOWFLAKE_USER",
+        "authenticator": "SNOWFLAKE_AUTHENTICATOR",
+        "password": "SNOWFLAKE_PASSWORD",
+        "host": "SNOWFLAKE_HOST",
+        "port": "SNOWFLAKE_PORT",
+        "schema": "SNOWFLAKE_SCHEMA",
+        "warehouse": "SNOWFLAKE_WAREHOUSE",
+        "private_key_path": "SNOWFLAKE_PRIVATE_KEY_PATH",
+    }.items():
+        value = os.getenv(env_var, "")
+        if value:
+            sf_conn_prop[key] = value
+    return sf_conn_prop
+def _load_from_snowsql_config_file(connection_name: str, login_file: str = "") -> dict[str, str]:
+    """Loads the dictionary from snowsql config file."""
+    snowsql_config_file = login_file if login_file else os.path.expanduser(_DEFAULT_CONNECTION_FILE)
+    if not os.path.exists(snowsql_config_file):
+        logging.error(f"Connection name given but snowsql config file is not found at: {snowsql_config_file}")
+        raise Exception("Snowflake SnowSQL config not found.")
+    config = configparser.ConfigParser(inline_comment_prefixes="#")
+    snowflake_connection_name = os.getenv("SNOWFLAKE_CONNECTION_NAME")
+    if snowflake_connection_name is not None:
+        connection_name = snowflake_connection_name
+    if connection_name:
+        if not connection_name.startswith("connections."):
+            connection_name = "connections." + connection_name
+    else:
+        # See https://docs.snowflake.com/en/user-guide/snowsql-start.html#configuring-default-connection-settings
+        connection_name = "connections"
+    logging.info(f"Reading {snowsql_config_file} for connection parameters defined as {connection_name}")
+    config.read(snowsql_config_file)
+    conn_params = dict(config[connection_name])
+    # Remap names to appropriate args in Python Connector API
+    # Note: "dbname" should become "database"
+    conn_params = {k.replace("name", ""): v.strip('"') for k, v in conn_params.items()}
+    if "db" in conn_params:
+        conn_params["database"] = conn_params["db"]
+        del conn_params["db"]
+    return conn_params
+def SnowflakeLoginOptions(connection_name: str = "", login_file: Optional[str] = None) -> dict[str, Union[str, bytes]]:
+    """Returns a dict that can be used directly into snowflake python connector or Snowpark session config.
+    NOTE: Token/Auth information is sideloaded in all cases above, if provided in following order:
+      1. If SNOWFLAKE_TOKEN is defined in the environment, it will be used.
+      2. If SNOWFLAKE_TOKEN_FILE is defined in the environment and file matching the value found, content of the file
+         will be used.
+    If token is found, username, password will be reset and 'authenticator' will be set to 'oauth'.
+    Python Connector:
+    >> ctx = snowflake.connector.connect(**(SnowflakeLoginOptions()))
+    Snowpark Session:
+    >> session = Session.builder.configs(SnowflakeLoginOptions()).create()
+    Usage Note:
+      Ideally one should have a snowsql config file. Read more here:
+      https://docs.snowflake.com/en/user-guide/snowsql-start.html#configuring-default-connection-settings
+      If snowsql config file does not exist, it tries auth from env variables.
+    Args:
+        connection_name: Name of the connection to look for inside the config file. If environment variable
+            SNOWFLAKE_CONNECTION_NAME is provided, it will override the input connection_name.
+        login_file: If provided, this is used as config file instead of default one (_DEFAULT_CONNECTION_FILE).
+    Returns:
+        A dict with connection parameters.
+    Raises:
+        Exception: if none of config file and environment variable are present.
+    """
+    conn_prop: dict[str, Union[str, bytes]] = {}
+    login_file = login_file or os.path.expanduser(_DEFAULT_CONNECTION_FILE)
+    # If login file exists, use this exclusively.
+    if os.path.exists(login_file):
+        conn_prop = {**(_load_from_snowsql_config_file(connection_name, login_file))}
+    else:
+        # If environment exists for SNOWFLAKE_ACCOUNT, assume everything
+        # comes from environment. Mixing it not allowed.
+        account = os.getenv("SNOWFLAKE_ACCOUNT", "")
+        if account:
+            conn_prop = {**_connection_properties_from_env()}
+        else:
+            raise Exception("Snowflake credential is neither set in env nor a login file was provided.")
+    # Token, if specified, is always side-loaded in all cases.
+    token = _read_token(str(conn_prop["token_file"]) if "token_file" in conn_prop else "")
+    if token:
+        conn_prop["token"] = token
+        if "authenticator" not in conn_prop or conn_prop["authenticator"]:
+            conn_prop["authenticator"] = "oauth"
+    elif "private_key_path" in conn_prop and "private_key" not in conn_prop:
+        conn_prop["private_key"] = _load_pem_to_der(str(conn_prop["private_key_path"]))
+    if "ssl" in conn_prop and conn_prop["ssl"].lower() == "off":
+        conn_prop["protocol"] = "http"
+    return conn_prop

snowflake/ml/_internal/utils/identifier.py CHANGED Viewed

@@ -240,7 +240,7 @@ def get_schema_level_object_identifier(
     """
     for identifier in (db, schema, object_name):
-        if identifier is not None and SF_IDENTIFIER_RE.match(identifier) is None:
+        if identifier is not None and SF_IDENTIFIER_RE.fullmatch(identifier) is None:
             raise ValueError(f"Invalid identifier {identifier}")
     if others is None:

snowflake/ml/_internal/utils/mixins.py ADDED Viewed

@@ -0,0 +1,61 @@
+from typing import Any, Optional
+from snowflake.ml._internal.utils import identifier
+from snowflake.snowpark import session
+class SerializableSessionMixin:
+    """Mixin that provides pickling capabilities for objects with Snowpark sessions."""
+    def __getstate__(self) -> dict[str, Any]:
+        """Customize pickling to exclude non-serializable session and related components."""
+        state = self.__dict__.copy()
+        # Save session metadata for validation during unpickling
+        if hasattr(self, "_session") and self._session is not None:
+            try:
+                state["__session-account__"] = self._session.get_current_account()
+                state["__session-role__"] = self._session.get_current_role()
+                state["__session-database__"] = self._session.get_current_database()
+                state["__session-schema__"] = self._session.get_current_schema()
+            except Exception:
+                pass
+        state["_session"] = None
+        return state
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        """Restore session from context during unpickling."""
+        saved_account = state.pop("__session-account__", None)
+        saved_role = state.pop("__session-role__", None)
+        saved_database = state.pop("__session-database__", None)
+        saved_schema = state.pop("__session-schema__", None)
+        self.__dict__.update(state)
+        if saved_account is not None:
+            def identifiers_match(saved: Optional[str], current: Optional[str]) -> bool:
+                saved_resolved = identifier.resolve_identifier(saved) if saved is not None else saved
+                current_resolved = identifier.resolve_identifier(current) if current is not None else current
+                return saved_resolved == current_resolved
+            for active_session in session._get_active_sessions():
+                try:
+                    current_account = active_session.get_current_account()
+                    current_role = active_session.get_current_role()
+                    current_database = active_session.get_current_database()
+                    current_schema = active_session.get_current_schema()
+                    if (
+                        identifiers_match(saved_account, current_account)
+                        and identifiers_match(saved_role, current_role)
+                        and identifiers_match(saved_database, current_database)
+                        and identifiers_match(saved_schema, current_schema)
+                    ):
+                        self._session = active_session
+                        return
+                except Exception:
+                    continue
+        # No matching session found or no metadata available
+        raise RuntimeError("No active Snowpark session available. Please create a session.")

snowflake/ml/jobs/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ from snowflake.ml.jobs.manager import (
     list_jobs,
     submit_directory,
     submit_file,
+    submit_from_stage,
 )
 __all__ = [
@@ -18,4 +19,5 @@ __all__ = [
     "delete_job",
     "MLJob",
     "JOB_STATUS",
+    "submit_from_stage",
 ]

snowflake/ml/jobs/_utils/constants.py CHANGED Viewed

@@ -6,6 +6,7 @@ DEFAULT_CONTAINER_NAME = "main"
 PAYLOAD_DIR_ENV_VAR = "MLRS_PAYLOAD_DIR"
 RESULT_PATH_ENV_VAR = "MLRS_RESULT_PATH"
 MIN_INSTANCES_ENV_VAR = "MLRS_MIN_INSTANCES"
+RUNTIME_IMAGE_TAG_ENV_VAR = "MLRS_CONTAINER_IMAGE_TAG"
 MEMORY_VOLUME_NAME = "dshm"
 STAGE_VOLUME_NAME = "stage-volume"
 STAGE_VOLUME_MOUNT_PATH = "/mnt/app"
@@ -14,7 +15,7 @@ STAGE_VOLUME_MOUNT_PATH = "/mnt/app"
 DEFAULT_IMAGE_REPO = "/snowflake/images/snowflake_images"
 DEFAULT_IMAGE_CPU = "st_plat/runtime/x86/runtime_image/snowbooks"
 DEFAULT_IMAGE_GPU = "st_plat/runtime/x86/generic_gpu/runtime_image/snowbooks"
-DEFAULT_IMAGE_TAG = "1.2.3"
+DEFAULT_IMAGE_TAG = "1.5.0"
 DEFAULT_ENTRYPOINT_PATH = "func.py"
 # Percent of container memory to allocate for /dev/shm volume
@@ -43,7 +44,7 @@ ENABLE_HEALTH_CHECKS = "false"
 # Job status polling constants
 JOB_POLL_INITIAL_DELAY_SECONDS = 0.1
-JOB_POLL_MAX_DELAY_SECONDS = 1
+JOB_POLL_MAX_DELAY_SECONDS = 30
 # Magic attributes
 IS_MLJOB_REMOTE_ATTR = "_is_mljob_remote_callable"

snowflake/ml/jobs/_utils/function_payload_utils.py ADDED Viewed

@@ -0,0 +1,43 @@
+import inspect
+from typing import Any, Callable, Optional
+from snowflake import snowpark
+from snowflake.snowpark import context as sp_context
+class FunctionPayload:
+    def __init__(
+        self,
+        func: Callable[..., Any],
+        session: Optional[snowpark.Session] = None,
+        session_argument: str = "",
+        *args: Any,
+        **kwargs: Any
+    ) -> None:
+        self.function = func
+        self.args = args
+        self.kwargs = kwargs
+        self._session = session
+        self._session_argument = session_argument
+    @property
+    def session(self) -> Optional[snowpark.Session]:
+        return self._session
+    def __getstate__(self) -> dict[str, Any]:
+        """Customize pickling to exclude session."""
+        state = self.__dict__.copy()
+        state["_session"] = None
+        return state
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        """Restore session from context during unpickling."""
+        self.__dict__.update(state)
+        self._session = sp_context.get_active_session()
+    def __call__(self) -> Any:
+        sig = inspect.signature(self.function)
+        bound = sig.bind_partial(*self.args, **self.kwargs)
+        bound.arguments[self._session_argument] = self._session
+        return self.function(*bound.args, **bound.kwargs)

snowflake/ml/jobs/_utils/interop_utils.py CHANGED Viewed

@@ -75,16 +75,75 @@ def fetch_result(session: snowpark.Session, result_path: str) -> ExecutionResult
     Returns:
         A dictionary containing the execution result if available, None otherwise.
+    Raises:
+        RuntimeError: If both pickle and JSON result retrieval fail.
     """
     try:
         # TODO: Check if file exists
         with session.file.get_stream(result_path) as result_stream:
             return ExecutionResult.from_dict(pickle.load(result_stream))
-    except (sp_exceptions.SnowparkSQLException, pickle.UnpicklingError, TypeError, ImportError):
+    except (
+        sp_exceptions.SnowparkSQLException,
+        pickle.UnpicklingError,
+        TypeError,
+        ImportError,
+        AttributeError,
+        MemoryError,
+    ) as pickle_error:
         # Fall back to JSON result if loading pickled result fails for any reason
-        result_json_path = os.path.splitext(result_path)[0] + ".json"
-        with session.file.get_stream(result_json_path) as result_stream:
-            return ExecutionResult.from_dict(json.load(result_stream))
+        try:
+            result_json_path = os.path.splitext(result_path)[0] + ".json"
+            with session.file.get_stream(result_json_path) as result_stream:
+                return ExecutionResult.from_dict(json.load(result_stream))
+        except Exception as json_error:
+            # Both pickle and JSON failed - provide helpful error message
+            raise RuntimeError(_fetch_result_error_message(pickle_error, result_path, json_error)) from pickle_error
+def _fetch_result_error_message(error: Exception, result_path: str, json_error: Optional[Exception] = None) -> str:
+    """Create helpful error messages for common result retrieval failures."""
+    # Package import issues
+    if isinstance(error, ImportError):
+        return f"Failed to retrieve job result: Package not installed in your local environment. Error: {str(error)}"
+    # Package versions differ between runtime and local environment
+    if isinstance(error, AttributeError):
+        return f"Failed to retrieve job result: Package version mismatch. Error: {str(error)}"
+    # Serialization issues
+    if isinstance(error, TypeError):
+        return f"Failed to retrieve job result: Non-serializable objects were returned. Error: {str(error)}"
+    # Python version pickling incompatibility
+    if isinstance(error, pickle.UnpicklingError) and "protocol" in str(error).lower():
+        # TODO: Update this once we support different Python versions
+        client_version = f"Python {sys.version_info.major}.{sys.version_info.minor}"
+        runtime_version = "Python 3.10"
+        return (
+            f"Failed to retrieve job result: Python version mismatch - job ran on {runtime_version}, "
+            f"local environment using Python {client_version}. Error: {str(error)}"
+        )
+    # File access issues
+    if isinstance(error, sp_exceptions.SnowparkSQLException):
+        if "not found" in str(error).lower() or "does not exist" in str(error).lower():
+            return (
+                f"Failed to retrieve job result: No result file found. Check job.get_logs() for execution "
+                f"errors. Error: {str(error)}"
+            )
+        else:
+            return f"Failed to retrieve job result: Cannot access result file. Error: {str(error)}"
+    if isinstance(error, MemoryError):
+        return f"Failed to retrieve job result: Result too large for memory. Error: {str(error)}"
+    # Generic fallback
+    base_message = f"Failed to retrieve job result: {str(error)}"
+    if json_error:
+        base_message += f" (JSON fallback also failed: {str(json_error)})"
+    return base_message
 def load_exception(exc_type_name: str, exc_value: Union[Exception, str], exc_tb: str) -> Exception:

snowflake-ml-python 1.8.5__py3-none-any.whl → 1.9.0__py3-none-any.whl

snowflake-ml-python 1.8.5py3-none-any.whl → 1.9.0py3-none-any.whl