PyPI - snowflake-ml-python - Versions diffs - 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl - Mend

snowflake-ml-python 1.20.0py3-none-any.whl → 1.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

snowflake/ml/_internal/platform_capabilities.py +36 -0
snowflake/ml/_internal/utils/url.py +42 -0
snowflake/ml/data/_internal/arrow_ingestor.py +67 -2
snowflake/ml/data/data_connector.py +103 -1
snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +8 -2
snowflake/ml/experiment/callback/__init__.py +0 -0
snowflake/ml/experiment/callback/keras.py +25 -2
snowflake/ml/experiment/callback/lightgbm.py +27 -2
snowflake/ml/experiment/callback/xgboost.py +25 -2
snowflake/ml/experiment/experiment_tracking.py +93 -3
snowflake/ml/experiment/utils.py +6 -0
snowflake/ml/feature_store/feature_view.py +34 -24
snowflake/ml/jobs/_interop/protocols.py +3 -0
snowflake/ml/jobs/_utils/constants.py +1 -0
snowflake/ml/jobs/_utils/payload_utils.py +354 -356
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +95 -8
snowflake/ml/jobs/_utils/scripts/start_mlruntime.sh +92 -0
snowflake/ml/jobs/_utils/scripts/startup.sh +112 -0
snowflake/ml/jobs/_utils/spec_utils.py +1 -445
snowflake/ml/jobs/_utils/stage_utils.py +22 -1
snowflake/ml/jobs/_utils/types.py +14 -7
snowflake/ml/jobs/job.py +2 -8
snowflake/ml/jobs/manager.py +57 -135
snowflake/ml/lineage/lineage_node.py +1 -1
snowflake/ml/model/__init__.py +6 -0
snowflake/ml/model/_client/model/batch_inference_specs.py +16 -1
snowflake/ml/model/_client/model/model_version_impl.py +130 -14
snowflake/ml/model/_client/ops/deployment_step.py +36 -0
snowflake/ml/model/_client/ops/model_ops.py +93 -8
snowflake/ml/model/_client/ops/service_ops.py +32 -52
snowflake/ml/model/_client/service/import_model_spec_schema.py +23 -0
snowflake/ml/model/_client/service/model_deployment_spec.py +12 -4
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +3 -0
snowflake/ml/model/_client/sql/model_version.py +30 -6
snowflake/ml/model/_client/sql/service.py +94 -5
snowflake/ml/model/_model_composer/model_composer.py +1 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +5 -0
snowflake/ml/model/_model_composer/model_method/model_method.py +61 -2
snowflake/ml/model/_packager/model_handler.py +8 -2
snowflake/ml/model/_packager/model_handlers/custom.py +52 -0
snowflake/ml/model/_packager/model_handlers/{huggingface_pipeline.py → huggingface.py} +203 -76
snowflake/ml/model/_packager/model_handlers/mlflow.py +6 -1
snowflake/ml/model/_packager/model_handlers/xgboost.py +26 -1
snowflake/ml/model/_packager/model_meta/model_meta.py +40 -7
snowflake/ml/model/_packager/model_packager.py +1 -1
snowflake/ml/model/_signatures/core.py +390 -8
snowflake/ml/model/_signatures/utils.py +13 -4
snowflake/ml/model/code_path.py +104 -0
snowflake/ml/model/compute_pool.py +2 -0
snowflake/ml/model/custom_model.py +55 -13
snowflake/ml/model/model_signature.py +13 -1
snowflake/ml/model/models/huggingface.py +285 -0
snowflake/ml/model/models/huggingface_pipeline.py +19 -208
snowflake/ml/model/type_hints.py +7 -1
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
snowflake/ml/monitoring/_client/model_monitor_sql_client.py +12 -0
snowflake/ml/monitoring/_manager/model_monitor_manager.py +12 -0
snowflake/ml/monitoring/entities/model_monitor_config.py +5 -0
snowflake/ml/registry/_manager/model_manager.py +230 -15
snowflake/ml/registry/registry.py +4 -4
snowflake/ml/utils/html_utils.py +67 -1
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/METADATA +81 -7
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/RECORD +67 -59
snowflake/ml/jobs/_utils/runtime_env_utils.py +0 -63
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/top_level.txt +0 -0

snowflake/ml/jobs/manager.py CHANGED Viewed

@@ -1,27 +1,25 @@
 import json
 import logging
+import os
 import pathlib
 import sys
-import textwrap
 from pathlib import PurePath
 from typing import Any, Callable, Optional, TypeVar, Union, cast, overload
 from uuid import uuid4
 import pandas as pd
-import yaml
 from snowflake import snowpark
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import identifier
 from snowflake.ml.jobs import job as jb
 from snowflake.ml.jobs._utils import (
+    constants,
     feature_flags,
     payload_utils,
     query_helper,
-    spec_utils,
     types,
 )
-from snowflake.snowpark._internal import utils as sp_utils
 from snowflake.snowpark.context import get_active_session
 from snowflake.snowpark.exceptions import SnowparkSQLException
 from snowflake.snowpark.functions import coalesce, col, lit, when
@@ -259,7 +257,7 @@ def submit_directory(
     dir_path: str,
     compute_pool: str,
     *,
-    entrypoint: str,
+    entrypoint: Union[str, list[str]],
     stage_name: str,
     args: Optional[list[str]] = None,
     target_instances: int = 1,
@@ -274,7 +272,11 @@ def submit_directory(
     Args:
         dir_path: The path to the directory containing the job payload.
         compute_pool: The compute pool to use for the job.
-        entrypoint: The relative path to the entry point script inside the source directory.
+        entrypoint: The entry point for job execution. Can be:
+            - A string path to the entry point script inside the source directory.
+            - A list of strings representing a custom command (e.g., ["arctic_training"])
+              which is passed through as-is without local resolution or validation.
+              This is useful for entrypoints that are installed via pip_requirements.
         stage_name: The name of the stage where the job payload will be uploaded.
         args: A list of arguments to pass to the job.
         target_instances: The number of nodes in the job. If none specified, create a single node job.
@@ -315,7 +317,7 @@ def submit_from_stage(
     source: str,
     compute_pool: str,
     *,
-    entrypoint: str,
+    entrypoint: Union[str, list[str]],
     stage_name: str,
     args: Optional[list[str]] = None,
     target_instances: int = 1,
@@ -330,7 +332,11 @@ def submit_from_stage(
     Args:
         source: a stage path or a stage containing the job payload.
         compute_pool: The compute pool to use for the job.
-        entrypoint: a stage path containing the entry point script inside the source directory.
+        entrypoint: The entry point for job execution. Can be:
+            - A string path to the entry point script inside the source directory.
+            - A list of strings representing a custom command (e.g., ["arctic_training"])
+              which is passed through as-is without local resolution or validation.
+              This is useful for entrypoints that are installed via pip_requirements.
         stage_name: The name of the stage where the job payload will be uploaded.
         args: A list of arguments to pass to the job.
         target_instances: The number of nodes in the job. If none specified, create a single node job.
@@ -375,7 +381,7 @@ def _submit_job(
     compute_pool: str,
     *,
     stage_name: str,
-    entrypoint: Optional[str] = None,
+    entrypoint: Optional[Union[str, list[str]]] = None,
     args: Optional[list[str]] = None,
     target_instances: int = 1,
     pip_requirements: Optional[list[str]] = None,
@@ -392,7 +398,7 @@ def _submit_job(
     compute_pool: str,
     *,
     stage_name: str,
-    entrypoint: Optional[str] = None,
+    entrypoint: Optional[Union[str, list[str]]] = None,
     args: Optional[list[str]] = None,
     target_instances: int = 1,
     pip_requirements: Optional[list[str]] = None,
@@ -424,7 +430,7 @@ def _submit_job(
     compute_pool: str,
     *,
     stage_name: str,
-    entrypoint: Optional[str] = None,
+    entrypoint: Optional[Union[str, list[str]]] = None,
     args: Optional[list[str]] = None,
     target_instances: int = 1,
     session: Optional[snowpark.Session] = None,
@@ -437,7 +443,11 @@ def _submit_job(
         source: The file/directory path containing payload source code or a serializable Python callable.
         compute_pool: The compute pool to use for the job.
         stage_name: The name of the stage where the job payload will be uploaded.
-        entrypoint: The entry point for the job execution. Required if source is a directory.
+        entrypoint: The entry point for the job execution. Can be:
+            - A string path to a Python script (required if source is a directory).
+            - A list of strings representing a custom command (e.g., ["arctic_training"])
+              which is passed through as-is without local resolution or validation.
+              This is useful for entrypoints that are installed via pip_requirements.
         args: A list of arguments to pass to the job.
         target_instances: The number of instances to use for the job. If none specified, single node job is created.
         session: The Snowpark session to use. If none specified, uses active session.
@@ -449,7 +459,6 @@ def _submit_job(
     Raises:
         ValueError: If database or schema value(s) are invalid
         RuntimeError: If schema is not specified in session context or job submission
-        SnowparkSQLException: if failed to upload payload
     """
     session = _ensure_session(session)
@@ -481,7 +490,8 @@ def _submit_job(
     enable_metrics = kwargs.pop("enable_metrics", True)
     query_warehouse = kwargs.pop("query_warehouse", session.get_current_warehouse())
     imports = kwargs.pop("imports", None) or imports
-    runtime_environment = kwargs.pop("runtime_environment", None)
+    # if the mljob is submitted from a notebook, we use the same image tag as the notebook
+    runtime_environment = kwargs.pop("runtime_environment", os.environ.get(constants.RUNTIME_IMAGE_TAG_ENV_VAR, None))
     # Warn if there are unknown kwargs
     if kwargs:
@@ -513,7 +523,7 @@ def _submit_job(
     try:
         # Upload payload
         uploaded_payload = payload_utils.JobPayload(
-            source, entrypoint=entrypoint, pip_requirements=pip_requirements, additional_payloads=imports
+            source, entrypoint=entrypoint, pip_requirements=pip_requirements, imports=imports
         ).upload(session, stage_path)
     except SnowparkSQLException as e:
         if e.sql_error_code == 90106:
@@ -528,125 +538,36 @@ def _submit_job(
             ) from e
         raise
-    if feature_flags.FeatureFlags.USE_SUBMIT_JOB_V2.is_enabled(default=True):
-        # Add default env vars (extracted from spec_utils.generate_service_spec)
-        combined_env_vars = {**uploaded_payload.env_vars, **(env_vars or {})}
+    combined_env_vars = {**uploaded_payload.env_vars, **(env_vars or {})}
-        try:
-            return _do_submit_job_v2(
-                session=session,
-                payload=uploaded_payload,
-                args=args,
-                env_vars=combined_env_vars,
-                spec_overrides=spec_overrides,
-                compute_pool=compute_pool,
-                job_id=job_id,
-                external_access_integrations=external_access_integrations,
-                query_warehouse=query_warehouse,
-                target_instances=target_instances,
-                min_instances=min_instances,
-                enable_metrics=enable_metrics,
-                use_async=True,
-                runtime_environment=runtime_environment,
-            )
-        except SnowparkSQLException as e:
-            if not (e.sql_error_code == 90237 and sp_utils.is_in_stored_procedure()):  # type: ignore[no-untyped-call]
-                raise
-            elif e.sql_error_code == 3001 and "schema" in str(e).lower():
-                raise RuntimeError(
-                    "please grant privileges on schema before submitting a job, see",
-                    "https://docs.snowflake.com/en/developer-guide/snowflake-ml/ml-jobs/access-control-requirements"
-                    " for more details",
-                ) from e
-            # SNOW-2390287: SYSTEM$EXECUTE_ML_JOB() is erroneously blocked in owner's rights
-            # stored procedures. This will be fixed in an upcoming release.
-            logger.warning(
-                "Job submission using V2 failed with error {}. Falling back to V1.".format(
-                    str(e).split("\n", 1)[0],
-                )
-            )
-    # Fall back to v1
-    # Generate service spec
-    spec = spec_utils.generate_service_spec(
-        session,
-        compute_pool=compute_pool,
-        payload=uploaded_payload,
-        args=args,
-        target_instances=target_instances,
-        min_instances=min_instances,
-        enable_metrics=enable_metrics,
-        runtime_environment=runtime_environment,
-    )
-    # Generate spec overrides
-    spec_overrides = spec_utils.generate_spec_overrides(
-        environment_vars=env_vars,
-        custom_overrides=spec_overrides,
-    )
-    if spec_overrides:
-        spec = spec_utils.merge_patch(spec, spec_overrides, display_name="spec_overrides")
-    return _do_submit_job_v1(
-        session, spec, external_access_integrations, query_warehouse, target_instances, compute_pool, job_id
-    )
-def _do_submit_job_v1(
-    session: snowpark.Session,
-    spec: dict[str, Any],
-    external_access_integrations: list[str],
-    query_warehouse: Optional[str],
-    target_instances: int,
-    compute_pool: str,
-    job_id: str,
-) -> jb.MLJob[Any]:
-    """
-    Generate the SQL query for job submission.
-    Args:
-        session: The Snowpark session to use.
-        spec: The service spec for the job.
-        external_access_integrations: The external access integrations for the job.
-        query_warehouse: The query warehouse for the job.
-        target_instances: The number of instances for the job.
-        session: The Snowpark session to use.
-        compute_pool: The compute pool to use for the job.
-        job_id: The ID of the job.
-    Returns:
-        The job object.
-    """
-    query_template = textwrap.dedent(
-        """\
-        EXECUTE JOB SERVICE
-        IN COMPUTE POOL IDENTIFIER(?)
-        FROM SPECIFICATION $$
-        {}
-        $$
-        NAME = IDENTIFIER(?)
-        ASYNC = TRUE
-        """
-    )
-    params: list[Any] = [compute_pool, job_id]
-    query = query_template.format(yaml.dump(spec)).splitlines()
-    if external_access_integrations:
-        external_access_integration_list = ",".join(f"{e}" for e in external_access_integrations)
-        query.append(f"EXTERNAL_ACCESS_INTEGRATIONS = ({external_access_integration_list})")
-    if query_warehouse:
-        query.append("QUERY_WAREHOUSE = IDENTIFIER(?)")
-        params.append(query_warehouse)
-    if target_instances > 1:
-        query.append("REPLICAS = ?")
-        params.append(target_instances)
-    query_text = "\n".join(line for line in query if line)
-    _ = query_helper.run_query(session, query_text, params=params)
-    return get_job(job_id, session=session)
+    try:
+        return _do_submit_job(
+            session=session,
+            payload=uploaded_payload,
+            args=args,
+            env_vars=combined_env_vars,
+            spec_overrides=spec_overrides,
+            compute_pool=compute_pool,
+            job_id=job_id,
+            external_access_integrations=external_access_integrations,
+            query_warehouse=query_warehouse,
+            target_instances=target_instances,
+            min_instances=min_instances,
+            enable_metrics=enable_metrics,
+            use_async=True,
+            runtime_environment=runtime_environment,
+        )
+    except SnowparkSQLException as e:
+        if e.sql_error_code == 3001 and "schema" in str(e).lower():
+            raise RuntimeError(
+                "please grant privileges on schema before submitting a job, see",
+                "https://docs.snowflake.com/en/developer-guide/snowflake-ml/ml-jobs/access-control-requirements"
+                " for more details",
+            ) from e
+        raise
-def _do_submit_job_v2(
+def _do_submit_job(
     session: snowpark.Session,
     payload: types.UploadedPayload,
     args: Optional[list[str]],
@@ -684,9 +605,7 @@ def _do_submit_job_v2(
     Returns:
         The job object.
     """
-    args = [
-        (payload.stage_path.joinpath(v).as_posix() if isinstance(v, PurePath) else v) for v in payload.entrypoint
-    ] + (args or [])
+    args = [(v.as_posix() if isinstance(v, PurePath) else v) for v in payload.entrypoint] + (args or [])
     spec_options = {
         "STAGE_PATH": payload.stage_path.as_posix(),
         "ENTRYPOINT": ["/usr/local/bin/_entrypoint.sh"],
@@ -695,8 +614,8 @@ def _do_submit_job_v2(
         "ENABLE_METRICS": enable_metrics,
         "SPEC_OVERRIDES": spec_overrides,
     }
-    # for the image tag or full image URL, we use that directly
     if runtime_environment:
+        # for the image tag or full image URL, we use that directly
         spec_options["RUNTIME"] = runtime_environment
     elif feature_flags.FeatureFlags.ENABLE_RUNTIME_VERSIONS.is_enabled():
         # when feature flag is enabled, we get the local python version and wrap it in a dict
@@ -710,6 +629,9 @@ def _do_submit_job_v2(
         "MIN_INSTANCES": min_instances,
         "ASYNC": use_async,
     }
+    if feature_flags.FeatureFlags.ENABLE_STAGE_MOUNT_V2.is_enabled(default=True):
+        spec_options["ENABLE_STAGE_MOUNT_V2"] = True
     if payload.payload_name:
         job_options["GENERATE_SUFFIX"] = True
     job_options = {k: v for k, v in job_options.items() if v is not None}

snowflake/ml/lineage/lineage_node.py CHANGED Viewed

@@ -7,7 +7,7 @@ from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import identifier, mixins
 if TYPE_CHECKING:
-    from snowflake.ml import dataset
+    from snowflake.ml.dataset import dataset
     from snowflake.ml.feature_store import feature_view
     from snowflake.ml.model._client.model import model_version_impl

snowflake/ml/model/__init__.py CHANGED Viewed

@@ -2,16 +2,20 @@ import sys
 import warnings
 from snowflake.ml.model._client.model.batch_inference_specs import (
+    ColumnHandlingOptions,
+    FileEncoding,
     JobSpec,
     OutputSpec,
     SaveMode,
 )
 from snowflake.ml.model._client.model.model_impl import Model
 from snowflake.ml.model._client.model.model_version_impl import ExportMode, ModelVersion
+from snowflake.ml.model.code_path import CodePath
 from snowflake.ml.model.models.huggingface_pipeline import HuggingFacePipelineModel
 from snowflake.ml.model.volatility import Volatility
 __all__ = [
+    "CodePath",
     "Model",
     "ModelVersion",
     "ExportMode",
@@ -20,6 +24,8 @@ __all__ = [
     "OutputSpec",
     "SaveMode",
     "Volatility",
+    "FileEncoding",
+    "ColumnHandlingOptions",
 ]
 _deprecation_warning_msg_for_3_9 = (

snowflake/ml/model/_client/model/batch_inference_specs.py CHANGED Viewed

@@ -2,6 +2,7 @@ from enum import Enum
 from typing import Optional
 from pydantic import BaseModel
+from typing_extensions import TypedDict
 class SaveMode(str, Enum):
@@ -18,6 +19,20 @@ class SaveMode(str, Enum):
     ERROR = "error"
+class FileEncoding(str, Enum):
+    """The encoding of the file content that will be passed to the custom model."""
+    RAW_BYTES = "raw_bytes"
+    BASE64 = "base64"
+    BASE64_DATA_URL = "base64_data_url"
+class ColumnHandlingOptions(TypedDict):
+    """Options for handling specific columns during run_batch for file I/O."""
+    encoding: FileEncoding
 class OutputSpec(BaseModel):
     """Specification for batch inference output.
@@ -74,7 +89,7 @@ class JobSpec(BaseModel):
             the memory of the node.
         gpu_requests (Optional[str]): The gpu limit for GPU based inference. Can be integer or
             string values. Use CPU if None.
-        replicas (Optional[int]): Number of job replicas to run for high availability.
+        replicas (Optional[int]): Number of SPCS job nodes used for distributed inference.
             If not specified, defaults to 1 replica.
     Example:

snowflake-ml-python 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl

snowflake-ml-python 1.20.0py3-none-any.whl → 1.22.0py3-none-any.whl