PyPI - snowflake-ml-python - Versions diffs - 1.9.0__py3-none-any.whl → 1.9.2__py3-none-any.whl - Mend

snowflake-ml-python 1.9.0py3-none-any.whl → 1.9.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

snowflake/ml/_internal/env_utils.py +44 -3
snowflake/ml/_internal/platform_capabilities.py +52 -2
snowflake/ml/_internal/type_utils.py +1 -1
snowflake/ml/_internal/utils/mixins.py +54 -42
snowflake/ml/_internal/utils/service_logger.py +105 -3
snowflake/ml/data/_internal/arrow_ingestor.py +15 -2
snowflake/ml/data/data_connector.py +13 -2
snowflake/ml/data/data_ingestor.py +8 -0
snowflake/ml/data/torch_utils.py +1 -1
snowflake/ml/dataset/dataset.py +2 -1
snowflake/ml/dataset/dataset_reader.py +14 -4
snowflake/ml/experiment/__init__.py +3 -0
snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +98 -0
snowflake/ml/experiment/_entities/__init__.py +4 -0
snowflake/ml/experiment/_entities/experiment.py +10 -0
snowflake/ml/experiment/_entities/run.py +62 -0
snowflake/ml/experiment/_entities/run_metadata.py +68 -0
snowflake/ml/experiment/_experiment_info.py +63 -0
snowflake/ml/experiment/callback.py +121 -0
snowflake/ml/experiment/experiment_tracking.py +319 -0
snowflake/ml/jobs/_utils/constants.py +15 -4
snowflake/ml/jobs/_utils/payload_utils.py +156 -54
snowflake/ml/jobs/_utils/query_helper.py +16 -5
snowflake/ml/jobs/_utils/scripts/constants.py +0 -22
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +130 -23
snowflake/ml/jobs/_utils/spec_utils.py +23 -8
snowflake/ml/jobs/_utils/stage_utils.py +30 -14
snowflake/ml/jobs/_utils/types.py +64 -4
snowflake/ml/jobs/job.py +70 -75
snowflake/ml/jobs/manager.py +59 -31
snowflake/ml/lineage/lineage_node.py +2 -2
snowflake/ml/model/_client/model/model_version_impl.py +16 -4
snowflake/ml/model/_client/ops/service_ops.py +336 -137
snowflake/ml/model/_client/service/model_deployment_spec.py +1 -1
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -1
snowflake/ml/model/_client/sql/service.py +1 -38
snowflake/ml/model/_model_composer/model_composer.py +6 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +17 -3
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +41 -2
snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -5
snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +3 -1
snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -3
snowflake/ml/model/_signatures/pandas_handler.py +3 -0
snowflake/ml/model/_signatures/utils.py +4 -0
snowflake/ml/model/event_handler.py +117 -0
snowflake/ml/model/model_signature.py +11 -9
snowflake/ml/model/models/huggingface_pipeline.py +170 -1
snowflake/ml/modeling/framework/base.py +1 -1
snowflake/ml/modeling/metrics/classification.py +14 -14
snowflake/ml/modeling/metrics/correlation.py +19 -8
snowflake/ml/modeling/metrics/ranking.py +6 -6
snowflake/ml/modeling/metrics/regression.py +9 -9
snowflake/ml/monitoring/explain_visualize.py +12 -5
snowflake/ml/registry/_manager/model_manager.py +32 -15
snowflake/ml/registry/registry.py +48 -80
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.9.0.dist-info → snowflake_ml_python-1.9.2.dist-info}/METADATA +107 -5
{snowflake_ml_python-1.9.0.dist-info → snowflake_ml_python-1.9.2.dist-info}/RECORD +62 -52
{snowflake_ml_python-1.9.0.dist-info → snowflake_ml_python-1.9.2.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.9.0.dist-info → snowflake_ml_python-1.9.2.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.9.0.dist-info → snowflake_ml_python-1.9.2.dist-info}/top_level.txt +0 -0

snowflake/ml/registry/registry.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import logging
-import os
 import warnings
 from types import ModuleType
 from typing import Any, Optional, Union, overload
@@ -8,13 +6,15 @@ import pandas as pd
 from snowflake import snowpark
 from snowflake.ml._internal import telemetry
-from snowflake.ml._internal.utils import sql_identifier
+from snowflake.ml._internal.utils import query_result_checker, sql_identifier
 from snowflake.ml.model import (
     Model,
     ModelVersion,
+    event_handler,
     model_signature,
+    target_platform,
     task,
-    type_hints as model_types,
+    type_hints,
 )
 from snowflake.ml.model._client.model import model_version_impl
 from snowflake.ml.monitoring import model_monitor
@@ -32,52 +32,6 @@ _MODEL_MONITORING_DISABLED_ERROR = (
 )
-class _NullStatusContext:
-    """A fallback context manager that logs status updates."""
-    def __init__(self, label: str) -> None:
-        self._label = label
-    def __enter__(self) -> "_NullStatusContext":
-        logging.info(f"Starting: {self._label}")
-        return self
-    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
-        pass
-    def update(self, label: str, *, state: str = "running", expanded: bool = True) -> None:
-        """Update the status by logging the message."""
-        logging.info(f"Status update: {label} (state: {state})")
-class RegistryEventHandler:
-    def __init__(self) -> None:
-        try:
-            import streamlit as st
-            if not st.runtime.exists():
-                self._streamlit = None
-            else:
-                self._streamlit = st
-            USE_STREAMLIT_WIDGETS = os.getenv("USE_STREAMLIT_WIDGETS", "1") == "1"
-            if not USE_STREAMLIT_WIDGETS:
-                self._streamlit = None
-        except ImportError:
-            self._streamlit = None
-    def update(self, message: str) -> None:
-        """Write a message using streamlit if available, otherwise do nothing."""
-        if self._streamlit is not None:
-            self._streamlit.write(message)
-    def status(self, label: str, *, state: str = "running", expanded: bool = True) -> Any:
-        """Context manager that provides status updates with optional enhanced display capabilities."""
-        if self._streamlit is None:
-            return _NullStatusContext(label)
-        else:
-            return self._streamlit.status(label, state=state, expanded=expanded)
 class Registry:
     @telemetry.send_api_usage_telemetry(project=_TELEMETRY_PROJECT, subproject=_MODEL_TELEMETRY_SUBPROJECT)
     def __init__(
@@ -124,20 +78,30 @@ class Registry:
                 else sql_identifier.SqlIdentifier("PUBLIC")
             )
-        database_exists = session.sql(
-            f"""SELECT 1 FROM INFORMATION_SCHEMA.DATABASES WHERE DATABASE_NAME = '{self._database_name.resolved()}';"""
-        ).collect()
+        database_results = (
+            query_result_checker.SqlResultValidator(
+                session, f"""SHOW DATABASES LIKE '{self._database_name.resolved()}';"""
+            )
+            .has_column("name", allow_empty=True)
+            .validate()
+        )
-        if not database_exists:
+        db_names = [row["name"] for row in database_results]
+        if not self._database_name.resolved() in db_names:
             raise ValueError(f"Database {self._database_name} does not exist.")
-        schema_exists = session.sql(
-            f"""
-            SELECT 1 FROM {self._database_name.identifier()}.INFORMATION_SCHEMA.SCHEMATA
-            WHERE SCHEMA_NAME = '{self._schema_name.resolved()}';"""
-        ).collect()
+        schema_results = (
+            query_result_checker.SqlResultValidator(
+                session,
+                f"""SHOW SCHEMAS LIKE '{self._schema_name.resolved()}'
+                IN DATABASE {self._database_name.identifier()};""",
+            )
+            .has_column("name", allow_empty=True)
+            .validate()
+        )
-        if not schema_exists:
+        schema_names = [row["name"] for row in schema_results]
+        if not self._schema_name.resolved() in schema_names:
             raise ValueError(f"Schema {self._schema_name} does not exist.")
         self._model_manager = model_manager.ModelManager(
@@ -168,7 +132,7 @@ class Registry:
     @overload
     def log_model(
         self,
-        model: model_types.SupportedModelType,
+        model: type_hints.SupportedModelType,
         *,
         model_name: str,
         version_name: Optional[str] = None,
@@ -178,15 +142,15 @@ class Registry:
         pip_requirements: Optional[list[str]] = None,
         artifact_repository_map: Optional[dict[str, str]] = None,
         resource_constraint: Optional[dict[str, str]] = None,
-        target_platforms: Optional[list[model_types.SupportedTargetPlatformType]] = None,
+        target_platforms: Optional[list[Union[target_platform.TargetPlatform, str]]] = None,
         python_version: Optional[str] = None,
         signatures: Optional[dict[str, model_signature.ModelSignature]] = None,
-        sample_input_data: Optional[model_types.SupportedDataType] = None,
+        sample_input_data: Optional[type_hints.SupportedDataType] = None,
         user_files: Optional[dict[str, list[str]]] = None,
         code_paths: Optional[list[str]] = None,
         ext_modules: Optional[list[ModuleType]] = None,
-        task: model_types.Task = task.Task.UNKNOWN,
-        options: Optional[model_types.ModelSaveOption] = None,
+        task: task.Task = task.Task.UNKNOWN,
+        options: Optional[type_hints.ModelSaveOption] = None,
     ) -> ModelVersion:
         """
         Log a model with various parameters and metadata, or a ModelVersion object.
@@ -258,7 +222,8 @@ class Registry:
                 - target_methods: List of target methods to register when logging the model.
                   This option is not used in MLFlow models. Defaults to None, in which case the model handler's
                   default target methods will be used.
-                - save_location: Location to save the model and metadata.
+                - save_location: Local directory to save the the serialized model files first before
+                  uploading to Snowflake. This is useful when default tmp directory is not writable.
                 - method_options: Per-method saving options. This dictionary has method names as keys and dictionary
                     values with the desired options.
@@ -315,7 +280,7 @@ class Registry:
     )
     def log_model(
         self,
-        model: Union[model_types.SupportedModelType, ModelVersion],
+        model: Union[type_hints.SupportedModelType, ModelVersion],
         *,
         model_name: str,
         version_name: Optional[str] = None,
@@ -325,15 +290,15 @@ class Registry:
         pip_requirements: Optional[list[str]] = None,
         artifact_repository_map: Optional[dict[str, str]] = None,
         resource_constraint: Optional[dict[str, str]] = None,
-        target_platforms: Optional[list[model_types.SupportedTargetPlatformType]] = None,
+        target_platforms: Optional[list[Union[target_platform.TargetPlatform, str]]] = None,
         python_version: Optional[str] = None,
         signatures: Optional[dict[str, model_signature.ModelSignature]] = None,
-        sample_input_data: Optional[model_types.SupportedDataType] = None,
+        sample_input_data: Optional[type_hints.SupportedDataType] = None,
         user_files: Optional[dict[str, list[str]]] = None,
         code_paths: Optional[list[str]] = None,
         ext_modules: Optional[list[ModuleType]] = None,
-        task: model_types.Task = task.Task.UNKNOWN,
-        options: Optional[model_types.ModelSaveOption] = None,
+        task: task.Task = task.Task.UNKNOWN,
+        options: Optional[type_hints.ModelSaveOption] = None,
     ) -> ModelVersion:
         """
         Log a model with various parameters and metadata, or a ModelVersion object.
@@ -474,7 +439,7 @@ class Registry:
                     raise ValueError(
                         "When calling log_model with a ModelVersion, only model_name and version_name may be specified."
                     )
-            if task is not model_types.Task.UNKNOWN:
+            if task is not type_hints.Task.UNKNOWN:
                 raise ValueError("`task` cannot be specified when calling log_model with a ModelVersion.")
         if pip_requirements and not artifact_repository_map and self._targets_warehouse(target_platforms):
@@ -486,8 +451,12 @@ class Registry:
                 stacklevel=1,
             )
-        event_handler = RegistryEventHandler()
-        with event_handler.status("Logging model to registry...") as status:
+        registry_event_handler = event_handler.ModelEventHandler()
+        with registry_event_handler.status("Logging model", total=6) as status:
+            # Step 1: Validation and setup
+            status.update("validating model and dependencies...")
+            status.increment()
             # Perform the actual model logging
             try:
                 result = self._model_manager.log_model(
@@ -510,13 +479,12 @@ class Registry:
                     task=task,
                     options=options,
                     statement_params=statement_params,
-                    event_handler=event_handler,
+                    progress_status=status,
                 )
-                status.update(label="Model logged successfully!", state="complete", expanded=False)
+                status.update(label="Model logged successfully.", state="complete", expanded=False)
                 return result
             except Exception as e:
-                event_handler.update("❌ Model logging failed!")
-                status.update(label="Model logging failed!", state="error", expanded=False)
+                status.update(label="Model logging failed.", state="error", expanded=False)
                 raise e
     @telemetry.send_api_usage_telemetry(
@@ -696,10 +664,10 @@ class Registry:
         self._model_monitor_manager.delete_monitor(name)
     @staticmethod
-    def _targets_warehouse(target_platforms: Optional[list[model_types.SupportedTargetPlatformType]]) -> bool:
+    def _targets_warehouse(target_platforms: Optional[list[type_hints.SupportedTargetPlatformType]]) -> bool:
         """Returns True if warehouse is a target platform (None defaults to True)."""
         return (
             target_platforms is None
-            or model_types.TargetPlatform.WAREHOUSE in target_platforms
+            or type_hints.TargetPlatform.WAREHOUSE in target_platforms
             or "WAREHOUSE" in target_platforms
         )

snowflake/ml/version.py CHANGED Viewed

@@ -1,2 +1,2 @@
 # This is parsed by regex in conda recipe meta file. Make sure not to break it.
-VERSION = "1.9.0"
+VERSION = "1.9.2"

{snowflake_ml_python-1.9.0.dist-info → snowflake_ml_python-1.9.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: snowflake-ml-python
-Version: 1.9.0
+Version: 1.9.2
 Summary: The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.
 Author-email: "Snowflake, Inc" <support@snowflake.com>
 License:
@@ -240,9 +240,10 @@ Requires-Dist: cloudpickle>=2.0.0
 Requires-Dist: cryptography
 Requires-Dist: fsspec[http]<2026,>=2024.6.1
 Requires-Dist: importlib_resources<7,>=6.1.1
-Requires-Dist: numpy<2,>=1.23
+Requires-Dist: numpy<3,>=1.23
 Requires-Dist: packaging<25,>=20.9
 Requires-Dist: pandas<3,>=2.1.4
+Requires-Dist: platformdirs<5
 Requires-Dist: pyarrow
 Requires-Dist: pydantic<3,>=2.8.2
 Requires-Dist: pyjwt<3,>=2.0.0
@@ -257,6 +258,7 @@ Requires-Dist: snowflake-connector-python[pandas]<4,>=3.15.0
 Requires-Dist: snowflake-snowpark-python!=1.26.0,<2,>=1.17.0
 Requires-Dist: snowflake.core<2,>=1.0.2
 Requires-Dist: sqlparse<1,>=0.4
+Requires-Dist: tqdm<5
 Requires-Dist: typing-extensions<5,>=4.1.0
 Requires-Dist: xgboost<3,>=1.7.3
 Provides-Extra: all
@@ -272,7 +274,7 @@ Requires-Dist: tensorflow<3,>=2.17.0; extra == "all"
 Requires-Dist: tokenizers<1,>=0.15.1; extra == "all"
 Requires-Dist: torch<3,>=2.0.1; extra == "all"
 Requires-Dist: torchdata<1,>=0.4; extra == "all"
-Requires-Dist: transformers<5,>=4.39.3; extra == "all"
+Requires-Dist: transformers!=4.51.3,<5,>=4.39.3; extra == "all"
 Provides-Extra: altair
 Requires-Dist: altair<6,>=5; extra == "altair"
 Provides-Extra: catboost
@@ -297,7 +299,7 @@ Requires-Dist: sentence-transformers<4,>=2.7.0; extra == "transformers"
 Requires-Dist: sentencepiece<0.2.0,>=0.1.95; extra == "transformers"
 Requires-Dist: tokenizers<1,>=0.15.1; extra == "transformers"
 Requires-Dist: torch<3,>=2.0.1; extra == "transformers"
-Requires-Dist: transformers<5,>=4.39.3; extra == "transformers"
+Requires-Dist: transformers!=4.51.3,<5,>=4.39.3; extra == "transformers"
 Dynamic: license-file
 # Snowpark ML
@@ -408,6 +410,92 @@ NOTE: Version 1.7.0 is used as example here. Please choose the the latest versio
 # Release History
+## 1.9.2
+### Bug Fixes
+- DataConnector: Fix `self._session` related errors inside Container Runtime.
+- Registry: Fix a bug when trying to pass `None` to array (`pd.dtype('O')`) in signature and pandas data handler.
+### New Features
+- Experiment Tracking (PrPr): Automatically log the model, metrics, and parameters while training
+  XGBoost and LightGBM models.
+```python
+from snowflake.ml.experiment import ExperimentTracking
+from snowflake.ml.experiment.callback import SnowflakeXgboostCallback, SnowflakeLightgbmCallback
+exp = ExperimentTracking(session=sp_session, database_name="ML", schema_name="PUBLIC")
+exp.set_experiment("MY_EXPERIMENT")
+# XGBoost
+callback = SnowflakeXgboostCallback(
+  exp, log_model=True, log_metrics=True, log_params=True, model_name="model_name", model_signature=sig
+)
+model = XGBClassifier(callbacks=[callback])
+with exp.start_run():
+  model.fit(X, y, eval_set=[(X_test, y_test)])
+# LightGBM
+callback = SnowflakeLightgbmCallback(
+  exp, log_model=True, log_metrics=True, log_params=True, model_name="model_name", model_signature=sig
+)
+model = LGBMClassifier()
+with exp.start_run():
+  model.fit(X, y, eval_set=[(X_test, y_test)], callbacks=[callback])
+```
+## 1.9.1 (07-18-2025)
+### Bug Fixes
+- Registry: Fix a bug when trying to set the PAD token the HuggingFace `text-generation` model had multiple EOS tokens.
+  The handler picks the first EOS token as PAD token now.
+### New Features
+- DataConnector: DataConnector objects can now be pickled
+- Dataset: Dataset objects can now be pickled
+- Registry (PrPr): Introducing `create_service` function in `snowflake/ml/model/models/huggingface_pipeline.py`
+  which creates a service to log a HF model and upon successful logging, an inference service is created.
+```python
+from snowflake.ml.model.models import huggingface_pipeline
+hf_model_ref = huggingface_pipeline.HuggingFacePipelineModel(
+  model="gpt2",
+  task="text-generation", # Optional
+)
+hf_model_ref.create_service(
+    session=session,
+    service_name="test_service",
+    service_compute_pool="test_compute_pool",
+    image_repo="test_repo",
+    ...
+)
+```
+- Experiment Tracking (PrPr): New module for managing and tracking ML experiments in Snowflake.
+```python
+from snowflake.ml.experiment import ExperimentTracking
+exp = ExperimentTracking(session=sp_session, database_name="ML", schema_name="PUBLIC")
+exp.set_experiment("MY_EXPERIMENT")
+with exp.start_run():
+  exp.log_param("batch_size", 32)
+  exp.log_metrics("accuracy", 0.98, step=10)
+  exp.log_model(my_model, model_name="MY_MODEL")
+```
+- Registry: Added support for wide input (500+ features) for inference done using SPCS
 ## 1.9.0
 ### Bug Fixes
@@ -415,6 +503,19 @@ NOTE: Version 1.7.0 is used as example here. Please choose the the latest versio
 - Registry: Fixed bug causing snowpark to pandas dataframe conversion to fail when `QUOTED_IDENTIFIERS_IGNORE_CASE`
   parameter is enabled
 - Registry: Fixed duplicate UserWarning logs during model packaging
+- Registry: If the huggingface pipeline text-generation model doesn't contain a default chat template, a ChatML template
+  is assigned to the tokenizer.
+```shell
+{% for message in messages %}
+  {{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}
+{% endfor %}
+{% if add_generation_prompt %}
+  {{ '<|im_start|>assistant\n' }}
+{% endif %}"
+```
+- Registry: Fixed SQL queries during registry initialization that were forcing warehouse requirement
 ### Behavior Changes
@@ -524,7 +625,8 @@ NOTE: Version 1.7.0 is used as example here. Please choose the the latest versio
   - Pre-created Snowpark Session is now available inside job payloads using
     `snowflake.snowpark.context.get_active_session()`
 - Registry: Introducing `save_location` to `log_model` using the `options` argument.
-  User's can provide the path to write the model version's files that get stored in Snowflake's stage.
+  Users can use the `save_location` option to specify a local directory where the model files and configuration are written.
+  This is useful when the default temporary directory has space limitations.
 ```python
 reg.log_model(

snowflake-ml-python 1.9.0__py3-none-any.whl → 1.9.2__py3-none-any.whl

snowflake-ml-python 1.9.0py3-none-any.whl → 1.9.2py3-none-any.whl