PyPI - snowflake-ml-python - Versions diffs - 1.17.0__py3-none-any.whl → 1.19.0__py3-none-any.whl - Mend

snowflake-ml-python 1.17.0py3-none-any.whl → 1.19.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

snowflake/ml/feature_store/feature_store.py CHANGED Viewed

@@ -474,8 +474,8 @@ class FeatureStore:
             feature_view: FeatureView instance to materialize.
             version: version of the registered FeatureView.
                 NOTE: Version only accepts letters, numbers and underscore. Also version will be capitalized.
-            block: Specify whether the FeatureView backend materialization should be blocking or not. If blocking then
-                the API will wait until the initial FeatureView data is generated. Default to true.
+            block: Deprecated. To make the initial refresh asynchronous, set the `initialize`
+                argument on the `FeatureView` to `"ON_SCHEDULE"`. Default is true.
             overwrite: Overwrite the existing FeatureView with same version. This is the same as dropping the
                 FeatureView first then recreate. NOTE: there will be backfill cost associated if the FeatureView is
                 being continuously maintained.
@@ -521,6 +521,15 @@ class FeatureStore:
         """
         version = FeatureViewVersion(version)
+        if block is False:
+            raise snowml_exceptions.SnowflakeMLException(
+                error_code=error_codes.INVALID_ARGUMENT,
+                original_exception=ValueError(
+                    'block=False is deprecated. Use FeatureView(..., initialize="ON_SCHEDULE") '
+                    "for async initial refresh."
+                ),
+            )
         if feature_view.status != FeatureViewStatus.DRAFT:
             try:
                 return self._get_feature_view_if_exists(feature_view.name, str(version))
@@ -1199,10 +1208,10 @@ class FeatureStore:
         """Get refresh history for online feature table."""
         online_table_name = FeatureView._get_online_table_name(feature_view.name, feature_view.version)
         select_cols = "*" if verbose else "name, state, refresh_start_time, refresh_end_time, refresh_action"
-        prefix = (
-            f"{self._config.database.resolved()}."
-            f"{self._config.schema.resolved()}."
-            f"{online_table_name.resolved()}"
+        name = (
+            f"{self._config.database.identifier()}."
+            f"{self._config.schema.identifier()}."
+            f"{online_table_name.identifier()}"
         )
         return self._session.sql(
             f"""
@@ -1210,9 +1219,8 @@ class FeatureStore:
                 {select_cols}
             FROM TABLE (
                 {self._config.database}.INFORMATION_SCHEMA.ONLINE_FEATURE_TABLE_REFRESH_HISTORY (
-                    NAME_PREFIX => '{prefix}'
+                    NAME => '{name}'
                 )
             )
             """
         )
@@ -1591,6 +1599,7 @@ class FeatureStore:
         spine_timestamp_col: Optional[str] = None,
         exclude_columns: Optional[list[str]] = None,
         include_feature_view_timestamp_col: bool = False,
+        join_method: Literal["sequential", "cte"] = "sequential",
     ) -> DataFrame:
         """
         Enrich spine dataframe with feature values. Mainly used to generate inference data input.
@@ -1604,6 +1613,8 @@ class FeatureStore:
             exclude_columns: Column names to exclude from the result dataframe.
             include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
                 (if feature view has timestamp column) if set true. Default to false.
+            join_method: Method for feature joins. "sequential" for layer-by-layer joins (default),
+                "cte" for CTE method. (Internal use only - subject to change)
         Returns:
             Snowpark DataFrame containing the joined results.
@@ -1641,6 +1652,7 @@ class FeatureStore:
             cast(list[Union[FeatureView, FeatureViewSlice]], features),
             spine_timestamp_col,
             include_feature_view_timestamp_col,
+            join_method,
         )
         if exclude_columns is not None:
@@ -1659,6 +1671,7 @@ class FeatureStore:
         spine_label_cols: Optional[list[str]] = None,
         exclude_columns: Optional[list[str]] = None,
         include_feature_view_timestamp_col: bool = False,
+        join_method: Literal["sequential", "cte"] = "sequential",
     ) -> DataFrame:
         """
         Generate a training set from the specified Spine DataFrame and Feature Views. Result is
@@ -1676,6 +1689,8 @@ class FeatureStore:
             exclude_columns: Name of column(s) to exclude from the resulting training set.
             include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
                 (if feature view has timestamp column) if set true. Default to false.
+            join_method: Method for feature joins. "sequential" for layer-by-layer joins (default),
+                "cte" for CTE method. (Internal use only - subject to change)
         Returns:
             Returns a Snowpark DataFrame representing the training set.
@@ -1709,7 +1724,7 @@ class FeatureStore:
             spine_label_cols = to_sql_identifiers(spine_label_cols)  # type: ignore[assignment]
         result_df, join_keys = self._join_features(
-            spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
+            spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col, join_method
         )
         if exclude_columns is not None:
@@ -1757,6 +1772,7 @@ class FeatureStore:
         include_feature_view_timestamp_col: bool = False,
         desc: str = "",
         output_type: Literal["dataset"] = "dataset",
+        join_method: Literal["sequential", "cte"] = "sequential",
     ) -> dataset.Dataset:
         ...
@@ -1774,6 +1790,7 @@ class FeatureStore:
         exclude_columns: Optional[list[str]] = None,
         include_feature_view_timestamp_col: bool = False,
         desc: str = "",
+        join_method: Literal["sequential", "cte"] = "sequential",
     ) -> DataFrame:
         ...
@@ -1791,6 +1808,7 @@ class FeatureStore:
         include_feature_view_timestamp_col: bool = False,
         desc: str = "",
         output_type: Literal["dataset", "table"] = "dataset",
+        join_method: Literal["sequential", "cte"] = "sequential",
     ) -> Union[dataset.Dataset, DataFrame]:
         """
         Generate dataset by given source table and feature views.
@@ -1811,6 +1829,8 @@ class FeatureStore:
                 (if feature view has timestamp column) if set true. Default to false.
             desc: A description about this dataset.
             output_type: (Deprecated) The type of Snowflake storage to use for the generated training data.
+            join_method: Method for feature joins. "sequential" for layer-by-layer joins (default),
+                "cte" for CTE method. (Internal use only - subject to change)
         Returns:
             If output_type is "dataset" (default), returns a Dataset object.
@@ -1874,6 +1894,7 @@ class FeatureStore:
             exclude_columns=exclude_columns,
             include_feature_view_timestamp_col=include_feature_view_timestamp_col,
             save_as=table_name,
+            join_method=join_method,
         )
         if output_type == "table":
             warnings.warn(
@@ -2082,26 +2103,48 @@ class FeatureStore:
     def _plan_online_update(
         self, feature_view: FeatureView, online_config: Optional[fv_mod.OnlineConfig]
     ) -> _OnlineUpdateStrategy:
-        """Plan online update operations based on current state and target config."""
+        """Plan online update operations based on current state and target config.
+        Handles three cases:
+        - enable is None: Preserve current online state, only update if currently online
+        - enable is True: Enable online storage (create if needed, update if exists)
+        - enable is False: Disable online storage (drop if exists)
+        Args:
+            feature_view: The FeatureView object to check current online state.
+            online_config: The OnlineConfig with target enable and lag settings.
+        Returns:
+            _OnlineUpdateStrategy containing operations and their rollbacks.
+        """
         if online_config is None:
             return self._OnlineUpdateStrategy([], [], None)
         current_online = feature_view.online
         target_online = online_config.enable
-        # Enable online (create table)
+        # Case 1: enable is None - preserve current online state, only update if currently online
+        if target_online is None:
+            if current_online and (online_config.target_lag is not None):
+                # Online is currently enabled and user wants to update lag
+                return self._plan_online_update_existing(feature_view, online_config)
+            else:
+                # No online changes needed (either not online, or lag not specified)
+                return self._OnlineUpdateStrategy([], [], None)
+        # Case 2: Enable online (create table)
         if target_online and not current_online:
             return self._plan_online_enable(feature_view, online_config)
-        # Disable online (drop table)
+        # Case 3: Disable online (drop table)
         elif not target_online and current_online:
             return self._plan_online_disable(feature_view)
-        # Update existing online table
+        # Case 4: Update existing online table
         elif target_online and current_online:
             return self._plan_online_update_existing(feature_view, online_config)
-        # No change needed
+        # Case 5: No change needed
         else:
             return self._OnlineUpdateStrategy([], [], online_config)
@@ -2596,91 +2639,278 @@ class FeatureStore:
         found_rows = self._find_object("TAGS", full_entity_tag_name)
         return len(found_rows) == 1
+    def _build_cte_query(
+        self,
+        feature_views: list[FeatureView],
+        feature_columns: list[str],
+        spine_ref: str,
+        spine_timestamp_col: Optional[SqlIdentifier],
+        include_feature_view_timestamp_col: bool = False,
+    ) -> str:
+        """
+        Build a CTE query with the spine query and the feature views.
+        This method supports feature views with different join keys by:
+        1. Creating a spine CTE that includes all possible join keys
+        2. For each feature view, creating a deduplicated spine subquery with only that FV's join keys
+        3. Performing ASOF JOINs on the deduplicated spine when timestamp columns exist
+        4. Performing LEFT JOINs on the deduplicated spine when timestamp columns are missing
+        5. Combining results by LEFT JOINing each FV CTE back to the original SPINE
+        Args:
+            feature_views: A list of feature views to join.
+            feature_columns: A list of feature column strings for each feature view.
+            spine_ref: The spine query.
+            spine_timestamp_col: The timestamp column from spine. Can be None if spine has no timestamp column.
+            include_feature_view_timestamp_col: Whether to include the timestamp column of
+                the feature view in the result. Default to false.
+        Returns:
+            A SQL query string with CTE structure for joining feature views.
+        """
+        if not feature_views:
+            return f"SELECT * FROM ({spine_ref})"
+        # Create spine CTE with the spine query for reuse
+        spine_cte = f"""SPINE AS (
+            SELECT * FROM ({spine_ref})
+        )"""
+        ctes = [spine_cte]
+        cte_names = []
+        for i, feature_view in enumerate(feature_views):
+            cte_name = f"FV{i:03d}"
+            cte_names.append(cte_name)
+            feature_timestamp_col = feature_view.timestamp_col
+            # Get the specific join keys for this feature view
+            fv_join_keys = list({k for e in feature_view.entities for k in e.join_keys})
+            join_keys_str = ", ".join(fv_join_keys)
+            # Use ASOF JOIN if both spine and feature view have timestamp columns, otherwise use LEFT JOIN
+            if spine_timestamp_col is not None and feature_timestamp_col is not None:
+                # Build the deduplicated spine columns set (join keys + timestamp)
+                spine_dedup_cols_set = set(fv_join_keys)
+                if spine_timestamp_col not in spine_dedup_cols_set:
+                    spine_dedup_cols_set.add(spine_timestamp_col)
+                spine_dedup_cols_str = ", ".join(f'"{col}"' for col in spine_dedup_cols_set)
+                # Build the JOIN condition using only this feature view's join keys
+                join_conditions_dedup = [f'SPINE_DEDUP."{col}" = FEATURE."{col}"' for col in fv_join_keys]
+                if include_feature_view_timestamp_col:
+                    f_ts_col_alias = identifier.concat_names(
+                        [feature_view.name, "_", str(feature_view.version), "_", feature_timestamp_col]
+                    )
+                    f_ts_col_str = f"FEATURE.{feature_timestamp_col} AS {f_ts_col_alias},"
+                else:
+                    f_ts_col_str = ""
+                ctes.append(
+                    f"""{cte_name} AS (
+    SELECT
+        SPINE_DEDUP.*,
+        {f_ts_col_str}
+        FEATURE.* EXCLUDE ({join_keys_str}, {feature_timestamp_col})
+    FROM (
+        SELECT DISTINCT {spine_dedup_cols_str}
+        FROM SPINE
+    ) SPINE_DEDUP
+    ASOF JOIN (
+        SELECT {join_keys_str}, {feature_timestamp_col}, {feature_columns[i]}
+        FROM {feature_view.fully_qualified_name()}
+    ) FEATURE
+    MATCH_CONDITION (SPINE_DEDUP."{spine_timestamp_col}" >= FEATURE."{feature_timestamp_col}")
+    ON {" AND ".join(join_conditions_dedup)}
+)"""
+                )
+            else:
+                # Build the deduplicated spine columns list (just join keys, no timestamp)
+                spine_dedup_cols_str = ", ".join(f'"{col}"' for col in fv_join_keys)
+                # Build the JOIN condition using only this feature view's join keys
+                join_conditions_dedup = [f'SPINE_DEDUP."{col}" = FEATURE."{col}"' for col in fv_join_keys]
+                ctes.append(
+                    f"""{cte_name} AS (
+    SELECT
+        SPINE_DEDUP.*,
+        FEATURE.* EXCLUDE ({join_keys_str})
+    FROM (
+        SELECT DISTINCT {spine_dedup_cols_str}
+        FROM SPINE
+    ) SPINE_DEDUP
+    LEFT JOIN (
+        SELECT {join_keys_str}, {feature_columns[i]}
+        FROM {feature_view.fully_qualified_name()}
+    ) FEATURE
+    ON {" AND ".join(join_conditions_dedup)}
+)"""
+                )
+        # Build final SELECT with LEFT joins to each FV CTE
+        select_columns = []
+        join_clauses = []
+        for i, cte_name in enumerate(cte_names):
+            feature_view = feature_views[i]
+            fv_join_keys = list({k for e in feature_view.entities for k in e.join_keys})
+            join_conditions = [f'SPINE."{col}" = {cte_name}."{col}"' for col in fv_join_keys]
+            # Only include spine timestamp in join condition if both spine and FV have timestamps
+            if spine_timestamp_col is not None and feature_view.timestamp_col is not None:
+                join_conditions.append(f'SPINE."{spine_timestamp_col}" = {cte_name}."{spine_timestamp_col}"')
+            if include_feature_view_timestamp_col and feature_view.timestamp_col is not None:
+                f_ts_col_alias = identifier.concat_names(
+                    [feature_view.name, "_", str(feature_view.version), "_", feature_view.timestamp_col]
+                )
+                f_ts_col_str = f"{cte_name}.{f_ts_col_alias} AS {f_ts_col_alias}"
+                select_columns.append(f_ts_col_str)
+            # Select features from the CTE
+            # feature_columns[i] is already a comma-separated string of column names
+            feature_cols_from_cte = []
+            for col in feature_columns[i].split(", "):
+                col_clean = col.strip()
+                feature_cols_from_cte.append(f"{cte_name}.{col_clean}")
+            select_columns.extend(feature_cols_from_cte)
+            # Create join condition using only this feature view's join keys
+            join_clauses.append(
+                f"""
+    LEFT JOIN {cte_name}
+    ON {" AND ".join(join_conditions)}"""
+            )
+        query = f"""WITH
+{', '.join(ctes)}
+SELECT
+    SPINE.*,
+    {', '.join(select_columns)}
+FROM SPINE{' '.join(join_clauses)}
+"""
+        return query
     def _join_features(
         self,
         spine_df: DataFrame,
         features: list[Union[FeatureView, FeatureViewSlice]],
         spine_timestamp_col: Optional[SqlIdentifier],
         include_feature_view_timestamp_col: bool,
+        join_method: Literal["sequential", "cte"] = "sequential",
     ) -> tuple[DataFrame, list[SqlIdentifier]]:
-        for f in features:
-            f = f.feature_view_ref if isinstance(f, FeatureViewSlice) else f
-            if f.status == FeatureViewStatus.DRAFT:
+        # Validate join_method parameter
+        if join_method not in ["sequential", "cte"]:
+            raise ValueError(f"Invalid join_method '{join_method}'. Must be 'sequential' or 'cte'.")
+        feature_views: list[FeatureView] = []
+        # Extract column selections for each feature view
+        feature_columns: list[str] = []
+        for feature in features:
+            fv = feature.feature_view_ref if isinstance(feature, FeatureViewSlice) else feature
+            if fv.status == FeatureViewStatus.DRAFT:
                 raise snowml_exceptions.SnowflakeMLException(
                     error_code=error_codes.NOT_FOUND,
-                    original_exception=ValueError(f"FeatureView {f.name} has not been registered."),
+                    original_exception=ValueError(f"FeatureView {fv.name} has not been registered."),
                 )
-            for e in f.entities:
+            for e in fv.entities:
                 for k in e.join_keys:
                     if k not in to_sql_identifiers(spine_df.columns):
                         raise snowml_exceptions.SnowflakeMLException(
                             error_code=error_codes.INVALID_ARGUMENT,
                             original_exception=ValueError(
-                                f"join_key {k} from Entity {e.name} in FeatureView {f.name} is not found in spine_df."
+                                f"join_key {k} from Entity {e.name} in FeatureView {fv.name} "
+                                "is not found in spine_df."
                             ),
                         )
+            feature_views.append(fv)
+            if isinstance(feature, FeatureViewSlice):
+                cols = feature.names
+            else:
+                cols = feature.feature_names
+            feature_columns.append(", ".join(col.resolved() for col in cols))
+        # TODO (SNOW-2396184): remove this check and the non-ASOF join path as ASOF join is enabled by default now.
         if self._asof_join_enabled is None:
             self._asof_join_enabled = self._is_asof_join_enabled()
         # TODO: leverage Snowpark dataframe for more concise syntax once it supports AsOfJoin
         query = spine_df.queries["queries"][-1]
-        layer = 0
-        for f in features:
-            if isinstance(f, FeatureViewSlice):
-                cols = f.names
-                f = f.feature_view_ref
-            else:
-                cols = f.feature_names
-            join_keys = list({k for e in f.entities for k in e.join_keys})
-            join_keys_str = ", ".join(join_keys)
-            assert f.version is not None
-            join_table_name = f.fully_qualified_name()
-            if spine_timestamp_col is not None and f.timestamp_col is not None:
-                if self._asof_join_enabled:
-                    if include_feature_view_timestamp_col:
-                        f_ts_col_alias = identifier.concat_names([f.name, "_", f.version, "_", f.timestamp_col])
-                        f_ts_col_str = f"r_{layer}.{f.timestamp_col} AS {f_ts_col_alias},"
+        join_keys: list[SqlIdentifier] = []
+        if join_method == "cte":
+            logger.info(f"Using the CTE method with {len(features)} feature views")
+            query = self._build_cte_query(
+                feature_views,
+                feature_columns,
+                spine_df.queries["queries"][-1],
+                spine_timestamp_col,
+                include_feature_view_timestamp_col,
+            )
+        else:
+            # Use sequential joins layer by layer
+            logger.info(f"Using the sequential join method with {len(features)} feature views")
+            layer = 0
+            for feature in features:
+                if isinstance(feature, FeatureViewSlice):
+                    cols = feature.names
+                    feature = feature.feature_view_ref
+                else:
+                    cols = feature.feature_names
+                join_keys = list({k for e in feature.entities for k in e.join_keys})
+                join_keys_str = ", ".join(join_keys)
+                assert feature.version is not None
+                join_table_name = feature.fully_qualified_name()
+                if spine_timestamp_col is not None and feature.timestamp_col is not None:
+                    if self._asof_join_enabled:
+                        if include_feature_view_timestamp_col:
+                            f_ts_col_alias = identifier.concat_names(
+                                [feature.name, "_", feature.version, "_", feature.timestamp_col]
+                            )
+                            f_ts_col_str = f"r_{layer}.{feature.timestamp_col} AS {f_ts_col_alias},"
+                        else:
+                            f_ts_col_str = ""
+                        query = f"""
+                            SELECT
+                                l_{layer}.*,
+                                {f_ts_col_str}
+                                r_{layer}.* EXCLUDE ({join_keys_str}, {feature.timestamp_col})
+                            FROM ({query}) l_{layer}
+                            ASOF JOIN (
+                                SELECT {join_keys_str}, {feature.timestamp_col},
+                                    {', '.join(col.resolved() for col in cols)}
+                                FROM {join_table_name}
+                            ) r_{layer}
+                            MATCH_CONDITION (l_{layer}.{spine_timestamp_col} >= r_{layer}.{feature.timestamp_col})
+                            ON {' AND '.join([f'l_{layer}.{k} = r_{layer}.{k}' for k in join_keys])}
+                        """
                     else:
-                        f_ts_col_str = ""
+                        query = self._composed_union_window_join_query(
+                            layer=layer,
+                            s_query=query,
+                            s_ts_col=spine_timestamp_col,
+                            f_df=feature.feature_df,
+                            f_table_name=join_table_name,
+                            f_ts_col=feature.timestamp_col,
+                            join_keys=join_keys,
+                        )
+                else:
                     query = f"""
                         SELECT
                             l_{layer}.*,
-                            {f_ts_col_str}
-                            r_{layer}.* EXCLUDE ({join_keys_str}, {f.timestamp_col})
+                            r_{layer}.* EXCLUDE ({join_keys_str})
                         FROM ({query}) l_{layer}
-                        ASOF JOIN (
-                            SELECT {join_keys_str}, {f.timestamp_col}, {', '.join(cols)}
+                        LEFT JOIN (
+                            SELECT {join_keys_str}, {', '.join(col.resolved() for col in cols)}
                             FROM {join_table_name}
                         ) r_{layer}
-                        MATCH_CONDITION (l_{layer}.{spine_timestamp_col} >= r_{layer}.{f.timestamp_col})
                         ON {' AND '.join([f'l_{layer}.{k} = r_{layer}.{k}' for k in join_keys])}
                     """
-                else:
-                    query = self._composed_union_window_join_query(
-                        layer=layer,
-                        s_query=query,
-                        s_ts_col=spine_timestamp_col,
-                        f_df=f.feature_df,
-                        f_table_name=join_table_name,
-                        f_ts_col=f.timestamp_col,
-                        join_keys=join_keys,
-                    )
-            else:
-                query = f"""
-                    SELECT
-                        l_{layer}.*,
-                        r_{layer}.* EXCLUDE ({join_keys_str})
-                    FROM ({query}) l_{layer}
-                    LEFT JOIN (
-                        SELECT {join_keys_str}, {', '.join(cols)}
-                        FROM {join_table_name}
-                    ) r_{layer}
-                    ON {' AND '.join([f'l_{layer}.{k} = r_{layer}.{k}' for k in join_keys])}
-                """
-            layer += 1
+                layer += 1
         # TODO: construct result dataframe with datframe APIs once ASOF join is supported natively.
         # Below code manually construct result dataframe from private members of spine dataframe, which
@@ -3213,7 +3443,7 @@ class FeatureStore:
         online_table_name = FeatureView._get_online_table_name(feature_view_name)
         fully_qualified_online_name = self._get_fully_qualified_name(online_table_name)
-        source_table_name = feature_view_name
+        source_table_name = self._get_fully_qualified_name(feature_view_name)
         # Extract join keys for PRIMARY KEY (preserve order and ensure unique)
         ordered_join_keys: list[str] = []

snowflake/ml/feature_store/feature_view.py CHANGED Viewed

@@ -1,7 +1,7 @@
+"""Feature view module for Snowflake ML Feature Store."""
 from __future__ import annotations
 import json
-import logging
 import re
 import warnings
 from collections import OrderedDict
@@ -52,7 +52,7 @@ _RESULT_SCAN_QUERY_PATTERN = re.compile(
 class OnlineConfig:
     """Configuration for online feature storage."""
-    enable: bool = False
+    enable: Optional[bool] = None
     target_lag: Optional[str] = None
     def __post_init__(self) -> None:
@@ -248,6 +248,7 @@ class FeatureView(lineage_node.LineageNode):
                 - If `timestamp_col` is provided, it is added to the default clustering keys.
             online_config: Optional configuration for online storage. If provided with enable=True,
                 online storage will be enabled. Defaults to None (no online storage).
+                NOTE: this feature is currently in Public Preview.
             _kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
         Example::
@@ -289,8 +290,6 @@ class FeatureView(lineage_node.LineageNode):
         # noqa: DAR401
         """
-        if online_config is not None:
-            logging.warning("'online_config' is in private preview since 1.12.0. Do not use it in production.")
         self._name: SqlIdentifier = SqlIdentifier(name)
         self._entities: list[Entity] = entities
@@ -533,8 +532,15 @@ class FeatureView(lineage_node.LineageNode):
         return self._feature_desc
     @property
-    def online(self) -> bool:
-        return self._online_config.enable if self._online_config else False
+    def online(self) -> bool:  # noqa: DAR101
+        """Check if online storage is enabled for this feature view.
+        Returns:
+            True if online storage is enabled, False otherwise.
+        """
+        if self._online_config and self._online_config.enable is True:
+            return True
+        return False
     @property
     def online_config(self) -> Optional[OnlineConfig]:

snowflake/ml/fileset/stage_fs.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import inspect
 import logging
+import re
 import time
 from dataclasses import dataclass
 from typing import Any, Optional, Union, cast
@@ -27,6 +28,8 @@ _PRESIGNED_URL_LIFETIME_SEC = 14400
 # The threshold of when the presigned url should get refreshed before its expiration.
 _PRESIGNED_URL_HEADROOM_SEC = 3600
+# Regex pattern to match cloud storage prefixes (s3://, gcs://, azure://) and bucket/container name at start of string
+_CLOUD_PATH_PREFIX_PATTERN = re.compile(r"^(s3|gcs|azure)://[^/]+/", re.IGNORECASE)
 _PROJECT = "FileSet"
@@ -355,8 +358,16 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
         Returns:
             A string of the relative stage path.
+        Raises:
+            ValueError: If the stage path format is invalid.
         """
-        return stage_path[len(self._stage) + 1 :]
+        if stage_path.lower().startswith(self._stage.lower()):
+            return stage_path[len(self._stage) + 1 :]
+        elif match := _CLOUD_PATH_PREFIX_PATTERN.match(stage_path):
+            return stage_path[match.end() :]
+        raise ValueError(f"Invalid stage path: {stage_path}")
     def _add_file_info_helper(
         self,

snowflake/ml/jobs/_utils/constants.py CHANGED Viewed

@@ -56,8 +56,9 @@ ENABLE_HEALTH_CHECKS_ENV_VAR = "ENABLE_HEALTH_CHECKS"
 ENABLE_HEALTH_CHECKS = "false"
 # Job status polling constants
-JOB_POLL_INITIAL_DELAY_SECONDS = 0.1
+JOB_POLL_INITIAL_DELAY_SECONDS = 5
 JOB_POLL_MAX_DELAY_SECONDS = 30
+JOB_SPCS_TIMEOUT_SECONDS = 30
 # Log start and end messages
 LOG_START_MSG = "--------------------------------\nML job started\n--------------------------------"
@@ -73,6 +74,7 @@ COMMON_INSTANCE_FAMILIES = {
     "CPU_X64_XS": ComputeResources(cpu=1, memory=6),
     "CPU_X64_S": ComputeResources(cpu=3, memory=13),
     "CPU_X64_M": ComputeResources(cpu=6, memory=28),
+    "CPU_X64_SL": ComputeResources(cpu=14, memory=54),
     "CPU_X64_L": ComputeResources(cpu=28, memory=116),
     "HIGHMEM_X64_S": ComputeResources(cpu=6, memory=58),
 }
@@ -85,6 +87,7 @@ AWS_INSTANCE_FAMILIES = {
 }
 AZURE_INSTANCE_FAMILIES = {
     "HIGHMEM_X64_M": ComputeResources(cpu=28, memory=244),
+    "HIGHMEM_X64_SL": ComputeResources(cpu=92, memory=654),
     "HIGHMEM_X64_L": ComputeResources(cpu=92, memory=654),
     "GPU_NV_XS": ComputeResources(cpu=3, memory=26, gpu=1, gpu_type="T4"),
     "GPU_NV_SM": ComputeResources(cpu=32, memory=424, gpu=1, gpu_type="A10"),
@@ -92,7 +95,15 @@ AZURE_INSTANCE_FAMILIES = {
     "GPU_NV_3M": ComputeResources(cpu=44, memory=424, gpu=2, gpu_type="A100"),
     "GPU_NV_SL": ComputeResources(cpu=92, memory=858, gpu=4, gpu_type="A100"),
 }
+GCP_INSTANCE_FAMILIES = {
+    "HIGHMEM_X64_M": ComputeResources(cpu=28, memory=244),
+    "HIGHMEM_X64_SL": ComputeResources(cpu=92, memory=654),
+    "GPU_GCP_NV_L4_1_24G": ComputeResources(cpu=6, memory=28, gpu=1, gpu_type="L4"),
+    "GPU_GCP_NV_L4_4_24G": ComputeResources(cpu=44, memory=178, gpu=4, gpu_type="L4"),
+    "GPU_GCP_NV_A100_8_40G": ComputeResources(cpu=92, memory=654, gpu=8, gpu_type="A100"),
+}
 CLOUD_INSTANCE_FAMILIES = {
     SnowflakeCloudType.AWS: AWS_INSTANCE_FAMILIES,
     SnowflakeCloudType.AZURE: AZURE_INSTANCE_FAMILIES,
+    SnowflakeCloudType.GCP: GCP_INSTANCE_FAMILIES,
 }

snowflake-ml-python 1.17.0__py3-none-any.whl → 1.19.0__py3-none-any.whl

snowflake-ml-python 1.17.0py3-none-any.whl → 1.19.0py3-none-any.whl