PyPI - snowflake-ml-python - Versions diffs - 1.9.0__py3-none-any.whl → 1.9.2__py3-none-any.whl - Mend

snowflake-ml-python 1.9.0py3-none-any.whl → 1.9.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

snowflake/ml/_internal/env_utils.py +44 -3
snowflake/ml/_internal/platform_capabilities.py +52 -2
snowflake/ml/_internal/type_utils.py +1 -1
snowflake/ml/_internal/utils/mixins.py +54 -42
snowflake/ml/_internal/utils/service_logger.py +105 -3
snowflake/ml/data/_internal/arrow_ingestor.py +15 -2
snowflake/ml/data/data_connector.py +13 -2
snowflake/ml/data/data_ingestor.py +8 -0
snowflake/ml/data/torch_utils.py +1 -1
snowflake/ml/dataset/dataset.py +2 -1
snowflake/ml/dataset/dataset_reader.py +14 -4
snowflake/ml/experiment/__init__.py +3 -0
snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +98 -0
snowflake/ml/experiment/_entities/__init__.py +4 -0
snowflake/ml/experiment/_entities/experiment.py +10 -0
snowflake/ml/experiment/_entities/run.py +62 -0
snowflake/ml/experiment/_entities/run_metadata.py +68 -0
snowflake/ml/experiment/_experiment_info.py +63 -0
snowflake/ml/experiment/callback.py +121 -0
snowflake/ml/experiment/experiment_tracking.py +319 -0
snowflake/ml/jobs/_utils/constants.py +15 -4
snowflake/ml/jobs/_utils/payload_utils.py +156 -54
snowflake/ml/jobs/_utils/query_helper.py +16 -5
snowflake/ml/jobs/_utils/scripts/constants.py +0 -22
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +130 -23
snowflake/ml/jobs/_utils/spec_utils.py +23 -8
snowflake/ml/jobs/_utils/stage_utils.py +30 -14
snowflake/ml/jobs/_utils/types.py +64 -4
snowflake/ml/jobs/job.py +70 -75
snowflake/ml/jobs/manager.py +59 -31
snowflake/ml/lineage/lineage_node.py +2 -2
snowflake/ml/model/_client/model/model_version_impl.py +16 -4
snowflake/ml/model/_client/ops/service_ops.py +336 -137
snowflake/ml/model/_client/service/model_deployment_spec.py +1 -1
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -1
snowflake/ml/model/_client/sql/service.py +1 -38
snowflake/ml/model/_model_composer/model_composer.py +6 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +17 -3
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +41 -2
snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -5
snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +3 -1
snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -3
snowflake/ml/model/_signatures/pandas_handler.py +3 -0
snowflake/ml/model/_signatures/utils.py +4 -0
snowflake/ml/model/event_handler.py +117 -0
snowflake/ml/model/model_signature.py +11 -9
snowflake/ml/model/models/huggingface_pipeline.py +170 -1
snowflake/ml/modeling/framework/base.py +1 -1
snowflake/ml/modeling/metrics/classification.py +14 -14
snowflake/ml/modeling/metrics/correlation.py +19 -8
snowflake/ml/modeling/metrics/ranking.py +6 -6
snowflake/ml/modeling/metrics/regression.py +9 -9
snowflake/ml/monitoring/explain_visualize.py +12 -5
snowflake/ml/registry/_manager/model_manager.py +32 -15
snowflake/ml/registry/registry.py +48 -80
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.9.0.dist-info → snowflake_ml_python-1.9.2.dist-info}/METADATA +107 -5
{snowflake_ml_python-1.9.0.dist-info → snowflake_ml_python-1.9.2.dist-info}/RECORD +62 -52
{snowflake_ml_python-1.9.0.dist-info → snowflake_ml_python-1.9.2.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.9.0.dist-info → snowflake_ml_python-1.9.2.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.9.0.dist-info → snowflake_ml_python-1.9.2.dist-info}/top_level.txt +0 -0

snowflake/ml/_internal/env_utils.py CHANGED Viewed

@@ -337,13 +337,54 @@ def get_package_spec_with_supported_ops_only(req: requirements.Requirement) -> r
     Returns:
         A requirements.Requirement object with supported ops only
     """
+    if req.name == "numpy":
+        import numpy as np
+        package_specifiers = get_numpy_specifiers(req, version.Version(np.__version__).major)
+    else:
+        package_specifiers = [spec for spec in req.specifier if spec.operator in _SUPPORTED_PACKAGE_SPEC_OPS]
     new_req = copy.deepcopy(req)
-    new_req.specifier = specifiers.SpecifierSet(
-        specifiers=",".join([str(spec) for spec in req.specifier if spec.operator in _SUPPORTED_PACKAGE_SPEC_OPS])
-    )
+    new_req.specifier = specifiers.SpecifierSet(specifiers=",".join([str(spec) for spec in package_specifiers]))
     return new_req
+def get_numpy_specifiers(
+    req: requirements.Requirement,
+    client_numpy_major_version: int,
+) -> list[specifiers.Specifier]:
+    """Get the package spec with supported ops only including ==, >=, <=, > and < based on the client numpy
+    major version.
+    Args:
+        req: A requirements.Requirement object showing the requirement.
+        client_numpy_major_version: The major version of numpy to be used.
+    Returns:
+        A list of specifiers with supported ops only
+    """
+    req_specifiers = []
+    for org_spec in req.specifier:
+        # check specifier that provides upper bound
+        if org_spec.operator in ["<", "<="]:
+            client_version = version.Version(str(client_numpy_major_version))
+            org_spec_version = version.Version(org_spec.version)
+            # check if the client's numpy major version is less than the specifier's upper bound
+            # if so, pin to max possible client major version
+            if client_version.major < org_spec_version.major:
+                modified_spec = specifiers.Specifier(f"<{client_version.major + 1}")
+                req_specifiers.append(modified_spec)
+            else:
+                # use the original specifier
+                req_specifiers.append(org_spec)
+        else:
+            # use the original specifier
+            req_specifiers.append(org_spec)
+    return req_specifiers
 def _relax_specifier_set(
     specifier_set: specifiers.SpecifierSet, strategy: relax_version_strategy.RelaxVersionStrategy
 ) -> specifiers.SpecifierSet:

snowflake/ml/_internal/platform_capabilities.py CHANGED Viewed

@@ -3,7 +3,9 @@ from contextlib import contextmanager
 from typing import Any, Optional
 from absl import logging
+from packaging import version
+from snowflake.ml import version as snowml_version
 from snowflake.ml._internal.exceptions import error_codes, exceptions
 from snowflake.ml._internal.utils import query_result_checker
 from snowflake.snowpark import (
@@ -12,7 +14,7 @@ from snowflake.snowpark import (
 )
 LIVE_COMMIT_PARAMETER = "ENABLE_LIVE_VERSION_IN_SDK"
-INLINE_DEPLOYMENT_SPEC_PARAMETER = "ENABLE_INLINE_DEPLOYMENT_SPEC"
+INLINE_DEPLOYMENT_SPEC_PARAMETER = "ENABLE_INLINE_DEPLOYMENT_SPEC_FROM_CLIENT_VERSION"
 class PlatformCapabilities:
@@ -67,7 +69,7 @@ class PlatformCapabilities:
             cls.clear_mock_features()
     def is_inlined_deployment_spec_enabled(self) -> bool:
-        return self._get_bool_feature(INLINE_DEPLOYMENT_SPEC_PARAMETER, False)
+        return self._is_version_feature_enabled(INLINE_DEPLOYMENT_SPEC_PARAMETER)
     def is_live_commit_enabled(self) -> bool:
         return self._get_bool_feature(LIVE_COMMIT_PARAMETER, False)
@@ -126,3 +128,51 @@ class PlatformCapabilities:
             else:
                 raise ValueError(f"Invalid boolean string: {value} for feature {feature_name}")
         raise ValueError(f"Invalid boolean feature value: {value} for feature {feature_name}")
+    def _get_version_feature(self, feature_name: str) -> version.Version:
+        """Get a version feature value, returning a large version number on failure or missing feature.
+        Args:
+            feature_name: The name of the feature to retrieve.
+        Returns:
+            version.Version: The parsed version, or a large version number (999.999.999) if parsing fails
+            or the feature is missing.
+        """
+        # Large version number to use as fallback
+        large_version = version.Version("999.999.999")
+        value = self.features.get(feature_name)
+        if value is None:
+            logging.debug(f"Feature {feature_name} not found, returning large version number")
+            return large_version
+        try:
+            # Convert to string if it's not already
+            version_str = str(value)
+            return version.Version(version_str)
+        except (version.InvalidVersion, ValueError, TypeError) as e:
+            logging.debug(
+                f"Failed to parse version from feature {feature_name} with value '{value}': {e}. "
+                f"Returning large version number"
+            )
+            return large_version
+    def _is_version_feature_enabled(self, feature_name: str) -> bool:
+        """Check if the current package version is greater than or equal to the version feature.
+        Args:
+            feature_name: The name of the version feature to compare against.
+        Returns:
+            bool: True if current package version >= feature version, False otherwise.
+        """
+        current_version = version.Version(snowml_version.VERSION)
+        feature_version = self._get_version_feature(feature_name)
+        result = current_version >= feature_version
+        logging.debug(
+            f"Version comparison for feature {feature_name}: "
+            f"current={current_version}, feature={feature_version}, enabled={result}"
+        )
+        return result

snowflake/ml/_internal/type_utils.py CHANGED Viewed

@@ -66,4 +66,4 @@ class LazyType(Generic[T]):
             return False
-LiteralNDArrayType = Union[npt.NDArray[np.int_], npt.NDArray[np.float_], npt.NDArray[np.str_], npt.NDArray[np.bool_]]
+LiteralNDArrayType = Union[npt.NDArray[np.int_], npt.NDArray[np.float64], npt.NDArray[np.str_], npt.NDArray[np.bool_]]

snowflake/ml/_internal/utils/mixins.py CHANGED Viewed

@@ -1,7 +1,19 @@
 from typing import Any, Optional
 from snowflake.ml._internal.utils import identifier
-from snowflake.snowpark import session
+from snowflake.snowpark import session as snowpark_session
+_SESSION_KEY = "_session"
+_SESSION_ACCOUNT_KEY = "session$account"
+_SESSION_ROLE_KEY = "session$role"
+_SESSION_DATABASE_KEY = "session$database"
+_SESSION_SCHEMA_KEY = "session$schema"
+def _identifiers_match(saved: Optional[str], current: Optional[str]) -> bool:
+    saved_resolved = identifier.resolve_identifier(saved) if saved is not None else saved
+    current_resolved = identifier.resolve_identifier(current) if current is not None else current
+    return saved_resolved == current_resolved
 class SerializableSessionMixin:
@@ -9,53 +21,53 @@ class SerializableSessionMixin:
     def __getstate__(self) -> dict[str, Any]:
         """Customize pickling to exclude non-serializable session and related components."""
-        state = self.__dict__.copy()
+        parent_state = (
+            super().__getstate__()  # type: ignore[misc] # object.__getstate__ appears in 3.11
+            if hasattr(super(), "__getstate__")
+            else self.__dict__
+        )
+        state = dict(parent_state)  # Create a copy so we can safely modify the state
         # Save session metadata for validation during unpickling
-        if hasattr(self, "_session") and self._session is not None:
-            try:
-                state["__session-account__"] = self._session.get_current_account()
-                state["__session-role__"] = self._session.get_current_role()
-                state["__session-database__"] = self._session.get_current_database()
-                state["__session-schema__"] = self._session.get_current_schema()
-            except Exception:
-                pass
-        state["_session"] = None
+        session = state.pop(_SESSION_KEY, None)
+        if session is not None:
+            state[_SESSION_ACCOUNT_KEY] = session.get_current_account()
+            state[_SESSION_ROLE_KEY] = session.get_current_role()
+            state[_SESSION_DATABASE_KEY] = session.get_current_database()
+            state[_SESSION_SCHEMA_KEY] = session.get_current_schema()
         return state
     def __setstate__(self, state: dict[str, Any]) -> None:
         """Restore session from context during unpickling."""
-        saved_account = state.pop("__session-account__", None)
-        saved_role = state.pop("__session-role__", None)
-        saved_database = state.pop("__session-database__", None)
-        saved_schema = state.pop("__session-schema__", None)
-        self.__dict__.update(state)
+        saved_account = state.pop(_SESSION_ACCOUNT_KEY, None)
+        saved_role = state.pop(_SESSION_ROLE_KEY, None)
+        saved_database = state.pop(_SESSION_DATABASE_KEY, None)
+        saved_schema = state.pop(_SESSION_SCHEMA_KEY, None)
+        if hasattr(super(), "__setstate__"):
+            super().__setstate__(state)  # type: ignore[misc]
+        else:
+            self.__dict__.update(state)
         if saved_account is not None:
+            active_sessions = snowpark_session._get_active_sessions()
+            if len(active_sessions) == 0:
+                raise RuntimeError("No active Snowpark session available. Please create a session.")
-            def identifiers_match(saved: Optional[str], current: Optional[str]) -> bool:
-                saved_resolved = identifier.resolve_identifier(saved) if saved is not None else saved
-                current_resolved = identifier.resolve_identifier(current) if current is not None else current
-                return saved_resolved == current_resolved
-            for active_session in session._get_active_sessions():
-                try:
-                    current_account = active_session.get_current_account()
-                    current_role = active_session.get_current_role()
-                    current_database = active_session.get_current_database()
-                    current_schema = active_session.get_current_schema()
-                    if (
-                        identifiers_match(saved_account, current_account)
-                        and identifiers_match(saved_role, current_role)
-                        and identifiers_match(saved_database, current_database)
-                        and identifiers_match(saved_schema, current_schema)
-                    ):
-                        self._session = active_session
-                        return
-                except Exception:
-                    continue
-        # No matching session found or no metadata available
-        raise RuntimeError("No active Snowpark session available. Please create a session.")
+            # Best effort match: Find the session with the most matching identifiers
+            setattr(
+                self,
+                _SESSION_KEY,
+                max(
+                    active_sessions,
+                    key=lambda s: sum(
+                        (
+                            _identifiers_match(saved_account, s.get_current_account()),
+                            _identifiers_match(saved_role, s.get_current_role()),
+                            _identifiers_match(saved_database, s.get_current_database()),
+                            _identifiers_match(saved_schema, s.get_current_schema()),
+                        )
+                    ),
+                ),
+            )

snowflake/ml/_internal/utils/service_logger.py CHANGED Viewed

@@ -1,6 +1,13 @@
 import enum
 import logging
+import os
 import sys
+import tempfile
+import time
+import uuid
+from typing import Optional
+import platformdirs
 class LogColor(enum.Enum):
@@ -10,6 +17,10 @@ class LogColor(enum.Enum):
     YELLOW = "\x1b[33;20m"
     BLUE = "\x1b[34;20m"
     GREEN = "\x1b[32;20m"
+    ORANGE = "\x1b[38;5;214m"
+    BOLD_ORANGE = "\x1b[38;5;214;1m"
+    PURPLE = "\x1b[35;20m"
+    BOLD_PURPLE = "\x1b[35;1m"
 class CustomFormatter(logging.Formatter):
@@ -53,11 +64,102 @@ class CustomFormatter(logging.Formatter):
         return "\n".join(formatted_lines)
-def get_logger(logger_name: str, info_color: LogColor) -> logging.Logger:
+def _test_writability(directory: str) -> bool:
+    """Test if a directory is writable by creating and removing a test file."""
+    try:
+        os.makedirs(directory, exist_ok=True)
+        test_file = os.path.join(directory, f".write_test_{uuid.uuid4().hex[:8]}")
+        with open(test_file, "w") as f:
+            f.write("test")
+        os.remove(test_file)
+        return True
+    except OSError:
+        return False
+def _try_log_location(log_dir: str, operation_id: str) -> Optional[str]:
+    """Try to create a log file in the given directory if it's writable."""
+    if _test_writability(log_dir):
+        return os.path.join(log_dir, f"{operation_id}.log")
+    return None
+def _get_log_file_path(operation_id: str) -> Optional[str]:
+    """Get platform-independent log file path. Returns None if no writable location found."""
+    # Try locations in order of preference
+    locations = [
+        # Primary: User log directory
+        platformdirs.user_log_dir("snowflake-ml", "Snowflake"),
+        # Fallback 1: System temp directory
+        os.path.join(tempfile.gettempdir(), "snowflake-ml-logs"),
+        # Fallback 2: Current working directory
+        ".",
+    ]
+    for location in locations:
+        log_file_path = _try_log_location(location, operation_id)
+        if log_file_path:
+            return log_file_path
+    # No writable location found
+    return None
+def _get_or_create_parent_logger(operation_id: str) -> logging.Logger:
+    """Get or create a parent logger with FileHandler for the operation."""
+    parent_logger_name = f"snowflake_ml_operation_{operation_id}"
+    parent_logger = logging.getLogger(parent_logger_name)
+    # Only add handler if it doesn't exist yet
+    if not parent_logger.handlers:
+        log_file_path = _get_log_file_path(operation_id)
+        if log_file_path:
+            # Successfully found a writable location
+            try:
+                file_handler = logging.FileHandler(log_file_path)
+                file_handler.setFormatter(logging.Formatter("%(name)s [%(asctime)s] [%(levelname)s] %(message)s"))
+                parent_logger.addHandler(file_handler)
+                parent_logger.setLevel(logging.DEBUG)
+                parent_logger.propagate = False  # Don't propagate to root logger
+                # Log the file location
+                parent_logger.warning(f"Operation logs saved to: {log_file_path}")
+            except OSError as e:
+                # Even though we found a path, file creation failed
+                # Fall back to console-only logging
+                parent_logger.setLevel(logging.DEBUG)
+                parent_logger.propagate = False
+                parent_logger.warning(f"Could not create log file at {log_file_path}: {e}. Using console-only logging.")
+        else:
+            # No writable location found, use console-only logging
+            parent_logger.setLevel(logging.DEBUG)
+            parent_logger.propagate = False
+            parent_logger.warning("Filesystem appears to be readonly. Using console-only logging.")
+    return parent_logger
+def get_logger(logger_name: str, info_color: LogColor, operation_id: Optional[str] = None) -> logging.Logger:
     logger = logging.getLogger(logger_name)
-    logger.setLevel(logging.INFO)
     handler = logging.StreamHandler(sys.stdout)
-    handler.setLevel(logging.INFO)
     handler.setFormatter(CustomFormatter(info_color))
     logger.addHandler(handler)
+    # If operation_id provided, set up parent logger with file handler
+    if operation_id:
+        parent_logger = _get_or_create_parent_logger(operation_id)
+        logger.parent = parent_logger
+        logger.propagate = True
     return logger
+def get_operation_id() -> str:
+    """Generate a unique operation ID."""
+    return f"model_deploy_{uuid.uuid4().hex[:8]}_{int(time.time())}"
+def get_log_file_location(operation_id: str) -> Optional[str]:
+    """Get the log file path for an operation ID. Returns None if no writable location available."""
+    return _get_log_file_path(operation_id)

snowflake/ml/data/_internal/arrow_ingestor.py CHANGED Viewed

@@ -2,7 +2,7 @@ import collections
 import logging
 import os
 import time
-from typing import Any, Deque, Iterator, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Any, Deque, Iterator, Optional, Sequence, Union
 import numpy as np
 import numpy.typing as npt
@@ -10,7 +10,11 @@ import pandas as pd
 import pyarrow as pa
 import pyarrow.dataset as pds
+if TYPE_CHECKING:
+    import ray
 from snowflake import snowpark
+from snowflake.ml._internal.utils import mixins
 from snowflake.ml.data import data_ingestor, data_source, ingestor_utils
 _EMPTY_RECORD_BATCH = pa.RecordBatch.from_arrays([], [])
@@ -41,7 +45,7 @@ class _RecordBatchesBuffer:
         return popped
-class ArrowIngestor(data_ingestor.DataIngestor):
+class ArrowIngestor(data_ingestor.DataIngestor, mixins.SerializableSessionMixin):
     """Read and parse the data sources into an Arrow Dataset and yield batched numpy array in dict."""
     def __init__(
@@ -68,8 +72,17 @@ class ArrowIngestor(data_ingestor.DataIngestor):
     @classmethod
     def from_sources(cls, session: snowpark.Session, sources: Sequence[data_source.DataSource]) -> "ArrowIngestor":
+        if session is None:
+            raise ValueError("Session is required")
         return cls(session, sources)
+    @classmethod
+    def from_ray_dataset(
+        cls,
+        ray_ds: "ray.data.Dataset",
+    ) -> "ArrowIngestor":
+        raise NotImplementedError
     @property
     def data_sources(self) -> list[data_source.DataSource]:
         return self._data_sources

snowflake/ml/data/data_connector.py CHANGED Viewed

@@ -8,7 +8,7 @@ from snowflake import snowpark
 from snowflake.ml._internal import env, telemetry
 from snowflake.ml.data import data_ingestor, data_source
 from snowflake.ml.data._internal.arrow_ingestor import ArrowIngestor
-from snowflake.snowpark import context as sf_context
+from snowflake.snowpark import context as sp_context
 if TYPE_CHECKING:
     import pandas as pd
@@ -57,7 +57,7 @@ class DataConnector:
         ingestor_class: Optional[type[data_ingestor.DataIngestor]] = None,
         **kwargs: Any,
     ) -> DataConnectorType:
-        session = session or sf_context.get_active_session()
+        session = session or sp_context.get_active_session()
         source = data_source.DataFrameInfo(query)
         return cls.from_sources(session, [source], ingestor_class=ingestor_class, **kwargs)
@@ -75,6 +75,17 @@ class DataConnector:
         )
         return cls.from_sources(ds._session, [source], ingestor_class=ingestor_class, **kwargs)
+    @classmethod
+    def from_ray_dataset(
+        cls: type[DataConnectorType],
+        ray_ds: "ray.data.Dataset",
+        ingestor_class: Optional[type[data_ingestor.DataIngestor]] = None,
+        **kwargs: Any,
+    ) -> DataConnectorType:
+        ingestor_class = ingestor_class or cls.DEFAULT_INGESTOR_CLASS
+        ray_ingestor = ingestor_class.from_ray_dataset(ray_ds=ray_ds)
+        return cls(ray_ingestor, **kwargs)
     @classmethod
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,

snowflake/ml/data/data_ingestor.py CHANGED Viewed

@@ -7,6 +7,7 @@ from snowflake.ml.data import data_source
 if TYPE_CHECKING:
     import pandas as pd
+    import ray
 DataIngestorType = TypeVar("DataIngestorType", bound="DataIngestor")
@@ -19,6 +20,13 @@ class DataIngestor(Protocol):
     ) -> DataIngestorType:
         raise NotImplementedError
+    @classmethod
+    def from_ray_dataset(
+        cls: type[DataIngestorType],
+        ray_ds: "ray.data.Dataset",
+    ) -> DataIngestorType:
+        raise NotImplementedError
     @property
     def data_sources(self) -> list[data_source.DataSource]:
         raise NotImplementedError

snowflake/ml/data/torch_utils.py CHANGED Viewed

@@ -95,6 +95,6 @@ def _preprocess_array(
         array_list = arr.tolist()
         # If this is an array of arrays, convert the dtype to match the underlying array.
         # Otherwise, if this is a numpy array of strings, convert the array to a list.
-        arr = np.array(array_list, dtype=arr.flat[0].dtype) if isinstance(arr.flat[0], np.ndarray) else array_list
+        arr = np.array(array_list, dtype=arr.item(0).dtype) if isinstance(arr.item(0), np.ndarray) else array_list
     return arr

snowflake/ml/dataset/dataset.py CHANGED Viewed

@@ -14,6 +14,7 @@ from snowflake.ml._internal.exceptions import (
 from snowflake.ml._internal.utils import (
     formatting,
     identifier,
+    mixins,
     query_result_checker,
     snowpark_dataframe_utils,
 )
@@ -27,7 +28,7 @@ _METADATA_MAX_QUERY_LENGTH = 10000
 _DATASET_VERSION_NAME_COL = "version"
-class DatasetVersion:
+class DatasetVersion(mixins.SerializableSessionMixin):
     """Represents a version of a Snowflake Dataset"""
     @telemetry.send_api_usage_telemetry(project=_PROJECT)

snowflake/ml/dataset/dataset_reader.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Any, Optional
 from snowflake import snowpark
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.lineage import lineage_utils
+from snowflake.ml._internal.utils import mixins
 from snowflake.ml.data import data_connector, data_ingestor, data_source, ingestor_utils
 from snowflake.ml.fileset import snowfs
 from snowflake.snowpark._internal import utils as snowpark_utils
@@ -11,7 +12,7 @@ _PROJECT = "Dataset"
 _SUBPROJECT = "DatasetReader"
-class DatasetReader(data_connector.DataConnector):
+class DatasetReader(data_connector.DataConnector, mixins.SerializableSessionMixin):
     """Snowflake Dataset abstraction which provides application integration connectors"""
     @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
@@ -19,14 +20,21 @@ class DatasetReader(data_connector.DataConnector):
         self,
         ingestor: data_ingestor.DataIngestor,
         *,
-        snowpark_session: snowpark.Session,
+        snowpark_session: Optional[snowpark.Session] = None,
     ) -> None:
         super().__init__(ingestor)
-        self._session: snowpark.Session = snowpark_session
-        self._fs: snowfs.SnowFileSystem = ingestor_utils.get_dataset_filesystem(self._session)
+        self._session = snowpark_session
+        self._fs_cached: Optional[snowfs.SnowFileSystem] = None
         self._files: Optional[list[str]] = None
+    @property
+    def _fs(self) -> snowfs.SnowFileSystem:
+        if self._fs_cached is None:
+            assert self._session is not None
+            self._fs_cached = ingestor_utils.get_dataset_filesystem(self._session)
+        return self._fs_cached
     @classmethod
     def from_dataframe(
         cls, df: snowpark.DataFrame, ingestor_class: Optional[type[data_ingestor.DataIngestor]] = None, **kwargs: Any
@@ -42,6 +50,7 @@ class DatasetReader(data_connector.DataConnector):
         files: list[str] = []
         for source in self.data_sources:
             assert isinstance(source, data_source.DatasetInfo)
+            assert self._session is not None
             files.extend(ingestor_utils.get_dataset_files(self._session, source, filesystem=self._fs))
         files.sort()
@@ -95,6 +104,7 @@ class DatasetReader(data_connector.DataConnector):
         dfs: list[snowpark.DataFrame] = []
         for source in self.data_sources:
             assert isinstance(source, data_source.DatasetInfo) and source.url is not None
+            assert self._session is not None
             stage_reader = self._session.read.option("pattern", file_path_pattern)
             if "INFER_SCHEMA_OPTIONS" in snowpark_utils.NON_FORMAT_TYPE_OPTIONS:
                 stage_reader = stage_reader.option("INFER_SCHEMA_OPTIONS", {"MAX_FILE_COUNT": 1})

snowflake/ml/experiment/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from snowflake.ml.experiment.experiment_tracking import ExperimentTracking
+__all__ = ["ExperimentTracking"]

snowflake-ml-python 1.9.0__py3-none-any.whl → 1.9.2__py3-none-any.whl

snowflake-ml-python 1.9.0py3-none-any.whl → 1.9.2py3-none-any.whl