snowflake-ml-python 1.17.0__py3-none-any.whl → 1.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. snowflake/ml/_internal/telemetry.py +3 -2
  2. snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +18 -19
  3. snowflake/ml/experiment/callback/keras.py +3 -0
  4. snowflake/ml/experiment/callback/lightgbm.py +3 -0
  5. snowflake/ml/experiment/callback/xgboost.py +3 -0
  6. snowflake/ml/experiment/experiment_tracking.py +50 -70
  7. snowflake/ml/feature_store/feature_store.py +299 -69
  8. snowflake/ml/feature_store/feature_view.py +12 -6
  9. snowflake/ml/fileset/stage_fs.py +12 -1
  10. snowflake/ml/jobs/_utils/constants.py +12 -1
  11. snowflake/ml/jobs/_utils/payload_utils.py +7 -1
  12. snowflake/ml/jobs/_utils/stage_utils.py +4 -0
  13. snowflake/ml/jobs/_utils/types.py +5 -0
  14. snowflake/ml/jobs/job.py +19 -5
  15. snowflake/ml/jobs/manager.py +18 -7
  16. snowflake/ml/model/__init__.py +19 -0
  17. snowflake/ml/model/_client/model/batch_inference_specs.py +63 -0
  18. snowflake/ml/model/_client/model/inference_engine_utils.py +1 -5
  19. snowflake/ml/model/_client/model/model_version_impl.py +129 -11
  20. snowflake/ml/model/_client/ops/model_ops.py +11 -4
  21. snowflake/ml/model/_client/ops/service_ops.py +3 -0
  22. snowflake/ml/model/_client/service/model_deployment_spec.py +3 -0
  23. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -0
  24. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +3 -1
  25. snowflake/ml/model/_model_composer/model_method/model_method.py +4 -1
  26. snowflake/ml/model/_packager/model_handlers/_utils.py +70 -0
  27. snowflake/ml/model/_packager/model_handlers/prophet.py +566 -0
  28. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -0
  29. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  30. snowflake/ml/model/type_hints.py +16 -0
  31. snowflake/ml/modeling/metrics/metrics_utils.py +9 -2
  32. snowflake/ml/monitoring/explain_visualize.py +3 -1
  33. snowflake/ml/version.py +1 -1
  34. {snowflake_ml_python-1.17.0.dist-info → snowflake_ml_python-1.19.0.dist-info}/METADATA +50 -4
  35. {snowflake_ml_python-1.17.0.dist-info → snowflake_ml_python-1.19.0.dist-info}/RECORD +38 -37
  36. {snowflake_ml_python-1.17.0.dist-info → snowflake_ml_python-1.19.0.dist-info}/WHEEL +0 -0
  37. {snowflake_ml_python-1.17.0.dist-info → snowflake_ml_python-1.19.0.dist-info}/licenses/LICENSE.txt +0 -0
  38. {snowflake_ml_python-1.17.0.dist-info → snowflake_ml_python-1.19.0.dist-info}/top_level.txt +0 -0
@@ -474,8 +474,8 @@ class FeatureStore:
474
474
  feature_view: FeatureView instance to materialize.
475
475
  version: version of the registered FeatureView.
476
476
  NOTE: Version only accepts letters, numbers and underscore. Also version will be capitalized.
477
- block: Specify whether the FeatureView backend materialization should be blocking or not. If blocking then
478
- the API will wait until the initial FeatureView data is generated. Default to true.
477
+ block: Deprecated. To make the initial refresh asynchronous, set the `initialize`
478
+ argument on the `FeatureView` to `"ON_SCHEDULE"`. Default is true.
479
479
  overwrite: Overwrite the existing FeatureView with same version. This is the same as dropping the
480
480
  FeatureView first then recreate. NOTE: there will be backfill cost associated if the FeatureView is
481
481
  being continuously maintained.
@@ -521,6 +521,15 @@ class FeatureStore:
521
521
  """
522
522
  version = FeatureViewVersion(version)
523
523
 
524
+ if block is False:
525
+ raise snowml_exceptions.SnowflakeMLException(
526
+ error_code=error_codes.INVALID_ARGUMENT,
527
+ original_exception=ValueError(
528
+ 'block=False is deprecated. Use FeatureView(..., initialize="ON_SCHEDULE") '
529
+ "for async initial refresh."
530
+ ),
531
+ )
532
+
524
533
  if feature_view.status != FeatureViewStatus.DRAFT:
525
534
  try:
526
535
  return self._get_feature_view_if_exists(feature_view.name, str(version))
@@ -1199,10 +1208,10 @@ class FeatureStore:
1199
1208
  """Get refresh history for online feature table."""
1200
1209
  online_table_name = FeatureView._get_online_table_name(feature_view.name, feature_view.version)
1201
1210
  select_cols = "*" if verbose else "name, state, refresh_start_time, refresh_end_time, refresh_action"
1202
- prefix = (
1203
- f"{self._config.database.resolved()}."
1204
- f"{self._config.schema.resolved()}."
1205
- f"{online_table_name.resolved()}"
1211
+ name = (
1212
+ f"{self._config.database.identifier()}."
1213
+ f"{self._config.schema.identifier()}."
1214
+ f"{online_table_name.identifier()}"
1206
1215
  )
1207
1216
  return self._session.sql(
1208
1217
  f"""
@@ -1210,9 +1219,8 @@ class FeatureStore:
1210
1219
  {select_cols}
1211
1220
  FROM TABLE (
1212
1221
  {self._config.database}.INFORMATION_SCHEMA.ONLINE_FEATURE_TABLE_REFRESH_HISTORY (
1213
- NAME_PREFIX => '{prefix}'
1222
+ NAME => '{name}'
1214
1223
  )
1215
-
1216
1224
  )
1217
1225
  """
1218
1226
  )
@@ -1591,6 +1599,7 @@ class FeatureStore:
1591
1599
  spine_timestamp_col: Optional[str] = None,
1592
1600
  exclude_columns: Optional[list[str]] = None,
1593
1601
  include_feature_view_timestamp_col: bool = False,
1602
+ join_method: Literal["sequential", "cte"] = "sequential",
1594
1603
  ) -> DataFrame:
1595
1604
  """
1596
1605
  Enrich spine dataframe with feature values. Mainly used to generate inference data input.
@@ -1604,6 +1613,8 @@ class FeatureStore:
1604
1613
  exclude_columns: Column names to exclude from the result dataframe.
1605
1614
  include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
1606
1615
  (if feature view has timestamp column) if set true. Default to false.
1616
+ join_method: Method for feature joins. "sequential" for layer-by-layer joins (default),
1617
+ "cte" for CTE method. (Internal use only - subject to change)
1607
1618
 
1608
1619
  Returns:
1609
1620
  Snowpark DataFrame containing the joined results.
@@ -1641,6 +1652,7 @@ class FeatureStore:
1641
1652
  cast(list[Union[FeatureView, FeatureViewSlice]], features),
1642
1653
  spine_timestamp_col,
1643
1654
  include_feature_view_timestamp_col,
1655
+ join_method,
1644
1656
  )
1645
1657
 
1646
1658
  if exclude_columns is not None:
@@ -1659,6 +1671,7 @@ class FeatureStore:
1659
1671
  spine_label_cols: Optional[list[str]] = None,
1660
1672
  exclude_columns: Optional[list[str]] = None,
1661
1673
  include_feature_view_timestamp_col: bool = False,
1674
+ join_method: Literal["sequential", "cte"] = "sequential",
1662
1675
  ) -> DataFrame:
1663
1676
  """
1664
1677
  Generate a training set from the specified Spine DataFrame and Feature Views. Result is
@@ -1676,6 +1689,8 @@ class FeatureStore:
1676
1689
  exclude_columns: Name of column(s) to exclude from the resulting training set.
1677
1690
  include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
1678
1691
  (if feature view has timestamp column) if set true. Default to false.
1692
+ join_method: Method for feature joins. "sequential" for layer-by-layer joins (default),
1693
+ "cte" for CTE method. (Internal use only - subject to change)
1679
1694
 
1680
1695
  Returns:
1681
1696
  Returns a Snowpark DataFrame representing the training set.
@@ -1709,7 +1724,7 @@ class FeatureStore:
1709
1724
  spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
1710
1725
 
1711
1726
  result_df, join_keys = self._join_features(
1712
- spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
1727
+ spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col, join_method
1713
1728
  )
1714
1729
 
1715
1730
  if exclude_columns is not None:
@@ -1757,6 +1772,7 @@ class FeatureStore:
1757
1772
  include_feature_view_timestamp_col: bool = False,
1758
1773
  desc: str = "",
1759
1774
  output_type: Literal["dataset"] = "dataset",
1775
+ join_method: Literal["sequential", "cte"] = "sequential",
1760
1776
  ) -> dataset.Dataset:
1761
1777
  ...
1762
1778
 
@@ -1774,6 +1790,7 @@ class FeatureStore:
1774
1790
  exclude_columns: Optional[list[str]] = None,
1775
1791
  include_feature_view_timestamp_col: bool = False,
1776
1792
  desc: str = "",
1793
+ join_method: Literal["sequential", "cte"] = "sequential",
1777
1794
  ) -> DataFrame:
1778
1795
  ...
1779
1796
 
@@ -1791,6 +1808,7 @@ class FeatureStore:
1791
1808
  include_feature_view_timestamp_col: bool = False,
1792
1809
  desc: str = "",
1793
1810
  output_type: Literal["dataset", "table"] = "dataset",
1811
+ join_method: Literal["sequential", "cte"] = "sequential",
1794
1812
  ) -> Union[dataset.Dataset, DataFrame]:
1795
1813
  """
1796
1814
  Generate dataset by given source table and feature views.
@@ -1811,6 +1829,8 @@ class FeatureStore:
1811
1829
  (if feature view has timestamp column) if set true. Default to false.
1812
1830
  desc: A description about this dataset.
1813
1831
  output_type: (Deprecated) The type of Snowflake storage to use for the generated training data.
1832
+ join_method: Method for feature joins. "sequential" for layer-by-layer joins (default),
1833
+ "cte" for CTE method. (Internal use only - subject to change)
1814
1834
 
1815
1835
  Returns:
1816
1836
  If output_type is "dataset" (default), returns a Dataset object.
@@ -1874,6 +1894,7 @@ class FeatureStore:
1874
1894
  exclude_columns=exclude_columns,
1875
1895
  include_feature_view_timestamp_col=include_feature_view_timestamp_col,
1876
1896
  save_as=table_name,
1897
+ join_method=join_method,
1877
1898
  )
1878
1899
  if output_type == "table":
1879
1900
  warnings.warn(
@@ -2082,26 +2103,48 @@ class FeatureStore:
2082
2103
  def _plan_online_update(
2083
2104
  self, feature_view: FeatureView, online_config: Optional[fv_mod.OnlineConfig]
2084
2105
  ) -> _OnlineUpdateStrategy:
2085
- """Plan online update operations based on current state and target config."""
2106
+ """Plan online update operations based on current state and target config.
2107
+
2108
+ Handles three cases:
2109
+ - enable is None: Preserve current online state, only update if currently online
2110
+ - enable is True: Enable online storage (create if needed, update if exists)
2111
+ - enable is False: Disable online storage (drop if exists)
2112
+
2113
+ Args:
2114
+ feature_view: The FeatureView object to check current online state.
2115
+ online_config: The OnlineConfig with target enable and lag settings.
2116
+
2117
+ Returns:
2118
+ _OnlineUpdateStrategy containing operations and their rollbacks.
2119
+ """
2086
2120
  if online_config is None:
2087
2121
  return self._OnlineUpdateStrategy([], [], None)
2088
2122
 
2089
2123
  current_online = feature_view.online
2090
2124
  target_online = online_config.enable
2091
2125
 
2092
- # Enable online (create table)
2126
+ # Case 1: enable is None - preserve current online state, only update if currently online
2127
+ if target_online is None:
2128
+ if current_online and (online_config.target_lag is not None):
2129
+ # Online is currently enabled and user wants to update lag
2130
+ return self._plan_online_update_existing(feature_view, online_config)
2131
+ else:
2132
+ # No online changes needed (either not online, or lag not specified)
2133
+ return self._OnlineUpdateStrategy([], [], None)
2134
+
2135
+ # Case 2: Enable online (create table)
2093
2136
  if target_online and not current_online:
2094
2137
  return self._plan_online_enable(feature_view, online_config)
2095
2138
 
2096
- # Disable online (drop table)
2139
+ # Case 3: Disable online (drop table)
2097
2140
  elif not target_online and current_online:
2098
2141
  return self._plan_online_disable(feature_view)
2099
2142
 
2100
- # Update existing online table
2143
+ # Case 4: Update existing online table
2101
2144
  elif target_online and current_online:
2102
2145
  return self._plan_online_update_existing(feature_view, online_config)
2103
2146
 
2104
- # No change needed
2147
+ # Case 5: No change needed
2105
2148
  else:
2106
2149
  return self._OnlineUpdateStrategy([], [], online_config)
2107
2150
 
@@ -2596,91 +2639,278 @@ class FeatureStore:
2596
2639
  found_rows = self._find_object("TAGS", full_entity_tag_name)
2597
2640
  return len(found_rows) == 1
2598
2641
 
2642
+ def _build_cte_query(
2643
+ self,
2644
+ feature_views: list[FeatureView],
2645
+ feature_columns: list[str],
2646
+ spine_ref: str,
2647
+ spine_timestamp_col: Optional[SqlIdentifier],
2648
+ include_feature_view_timestamp_col: bool = False,
2649
+ ) -> str:
2650
+ """
2651
+ Build a CTE query with the spine query and the feature views.
2652
+
2653
+ This method supports feature views with different join keys by:
2654
+ 1. Creating a spine CTE that includes all possible join keys
2655
+ 2. For each feature view, creating a deduplicated spine subquery with only that FV's join keys
2656
+ 3. Performing ASOF JOINs on the deduplicated spine when timestamp columns exist
2657
+ 4. Performing LEFT JOINs on the deduplicated spine when timestamp columns are missing
2658
+ 5. Combining results by LEFT JOINing each FV CTE back to the original SPINE
2659
+
2660
+ Args:
2661
+ feature_views: A list of feature views to join.
2662
+ feature_columns: A list of feature column strings for each feature view.
2663
+ spine_ref: The spine query.
2664
+ spine_timestamp_col: The timestamp column from spine. Can be None if spine has no timestamp column.
2665
+ include_feature_view_timestamp_col: Whether to include the timestamp column of
2666
+ the feature view in the result. Default to false.
2667
+
2668
+ Returns:
2669
+ A SQL query string with CTE structure for joining feature views.
2670
+ """
2671
+ if not feature_views:
2672
+ return f"SELECT * FROM ({spine_ref})"
2673
+
2674
+ # Create spine CTE with the spine query for reuse
2675
+ spine_cte = f"""SPINE AS (
2676
+ SELECT * FROM ({spine_ref})
2677
+ )"""
2678
+
2679
+ ctes = [spine_cte]
2680
+ cte_names = []
2681
+ for i, feature_view in enumerate(feature_views):
2682
+ cte_name = f"FV{i:03d}"
2683
+ cte_names.append(cte_name)
2684
+
2685
+ feature_timestamp_col = feature_view.timestamp_col
2686
+
2687
+ # Get the specific join keys for this feature view
2688
+ fv_join_keys = list({k for e in feature_view.entities for k in e.join_keys})
2689
+ join_keys_str = ", ".join(fv_join_keys)
2690
+
2691
+ # Use ASOF JOIN if both spine and feature view have timestamp columns, otherwise use LEFT JOIN
2692
+ if spine_timestamp_col is not None and feature_timestamp_col is not None:
2693
+ # Build the deduplicated spine columns set (join keys + timestamp)
2694
+ spine_dedup_cols_set = set(fv_join_keys)
2695
+ if spine_timestamp_col not in spine_dedup_cols_set:
2696
+ spine_dedup_cols_set.add(spine_timestamp_col)
2697
+ spine_dedup_cols_str = ", ".join(f'"{col}"' for col in spine_dedup_cols_set)
2698
+
2699
+ # Build the JOIN condition using only this feature view's join keys
2700
+ join_conditions_dedup = [f'SPINE_DEDUP."{col}" = FEATURE."{col}"' for col in fv_join_keys]
2701
+
2702
+ if include_feature_view_timestamp_col:
2703
+ f_ts_col_alias = identifier.concat_names(
2704
+ [feature_view.name, "_", str(feature_view.version), "_", feature_timestamp_col]
2705
+ )
2706
+ f_ts_col_str = f"FEATURE.{feature_timestamp_col} AS {f_ts_col_alias},"
2707
+ else:
2708
+ f_ts_col_str = ""
2709
+ ctes.append(
2710
+ f"""{cte_name} AS (
2711
+ SELECT
2712
+ SPINE_DEDUP.*,
2713
+ {f_ts_col_str}
2714
+ FEATURE.* EXCLUDE ({join_keys_str}, {feature_timestamp_col})
2715
+ FROM (
2716
+ SELECT DISTINCT {spine_dedup_cols_str}
2717
+ FROM SPINE
2718
+ ) SPINE_DEDUP
2719
+ ASOF JOIN (
2720
+ SELECT {join_keys_str}, {feature_timestamp_col}, {feature_columns[i]}
2721
+ FROM {feature_view.fully_qualified_name()}
2722
+ ) FEATURE
2723
+ MATCH_CONDITION (SPINE_DEDUP."{spine_timestamp_col}" >= FEATURE."{feature_timestamp_col}")
2724
+ ON {" AND ".join(join_conditions_dedup)}
2725
+ )"""
2726
+ )
2727
+ else:
2728
+ # Build the deduplicated spine columns list (just join keys, no timestamp)
2729
+ spine_dedup_cols_str = ", ".join(f'"{col}"' for col in fv_join_keys)
2730
+
2731
+ # Build the JOIN condition using only this feature view's join keys
2732
+ join_conditions_dedup = [f'SPINE_DEDUP."{col}" = FEATURE."{col}"' for col in fv_join_keys]
2733
+
2734
+ ctes.append(
2735
+ f"""{cte_name} AS (
2736
+ SELECT
2737
+ SPINE_DEDUP.*,
2738
+ FEATURE.* EXCLUDE ({join_keys_str})
2739
+ FROM (
2740
+ SELECT DISTINCT {spine_dedup_cols_str}
2741
+ FROM SPINE
2742
+ ) SPINE_DEDUP
2743
+ LEFT JOIN (
2744
+ SELECT {join_keys_str}, {feature_columns[i]}
2745
+ FROM {feature_view.fully_qualified_name()}
2746
+ ) FEATURE
2747
+ ON {" AND ".join(join_conditions_dedup)}
2748
+ )"""
2749
+ )
2750
+
2751
+ # Build final SELECT with LEFT joins to each FV CTE
2752
+ select_columns = []
2753
+ join_clauses = []
2754
+
2755
+ for i, cte_name in enumerate(cte_names):
2756
+ feature_view = feature_views[i]
2757
+ fv_join_keys = list({k for e in feature_view.entities for k in e.join_keys})
2758
+ join_conditions = [f'SPINE."{col}" = {cte_name}."{col}"' for col in fv_join_keys]
2759
+ # Only include spine timestamp in join condition if both spine and FV have timestamps
2760
+ if spine_timestamp_col is not None and feature_view.timestamp_col is not None:
2761
+ join_conditions.append(f'SPINE."{spine_timestamp_col}" = {cte_name}."{spine_timestamp_col}"')
2762
+
2763
+ if include_feature_view_timestamp_col and feature_view.timestamp_col is not None:
2764
+ f_ts_col_alias = identifier.concat_names(
2765
+ [feature_view.name, "_", str(feature_view.version), "_", feature_view.timestamp_col]
2766
+ )
2767
+ f_ts_col_str = f"{cte_name}.{f_ts_col_alias} AS {f_ts_col_alias}"
2768
+ select_columns.append(f_ts_col_str)
2769
+
2770
+ # Select features from the CTE
2771
+ # feature_columns[i] is already a comma-separated string of column names
2772
+ feature_cols_from_cte = []
2773
+ for col in feature_columns[i].split(", "):
2774
+ col_clean = col.strip()
2775
+ feature_cols_from_cte.append(f"{cte_name}.{col_clean}")
2776
+ select_columns.extend(feature_cols_from_cte)
2777
+
2778
+ # Create join condition using only this feature view's join keys
2779
+ join_clauses.append(
2780
+ f"""
2781
+ LEFT JOIN {cte_name}
2782
+ ON {" AND ".join(join_conditions)}"""
2783
+ )
2784
+
2785
+ query = f"""WITH
2786
+ {', '.join(ctes)}
2787
+ SELECT
2788
+ SPINE.*,
2789
+ {', '.join(select_columns)}
2790
+ FROM SPINE{' '.join(join_clauses)}
2791
+ """
2792
+
2793
+ return query
2794
+
2599
2795
  def _join_features(
2600
2796
  self,
2601
2797
  spine_df: DataFrame,
2602
2798
  features: list[Union[FeatureView, FeatureViewSlice]],
2603
2799
  spine_timestamp_col: Optional[SqlIdentifier],
2604
2800
  include_feature_view_timestamp_col: bool,
2801
+ join_method: Literal["sequential", "cte"] = "sequential",
2605
2802
  ) -> tuple[DataFrame, list[SqlIdentifier]]:
2606
- for f in features:
2607
- f = f.feature_view_ref if isinstance(f, FeatureViewSlice) else f
2608
- if f.status == FeatureViewStatus.DRAFT:
2803
+ # Validate join_method parameter
2804
+ if join_method not in ["sequential", "cte"]:
2805
+ raise ValueError(f"Invalid join_method '{join_method}'. Must be 'sequential' or 'cte'.")
2806
+
2807
+ feature_views: list[FeatureView] = []
2808
+ # Extract column selections for each feature view
2809
+ feature_columns: list[str] = []
2810
+ for feature in features:
2811
+ fv = feature.feature_view_ref if isinstance(feature, FeatureViewSlice) else feature
2812
+ if fv.status == FeatureViewStatus.DRAFT:
2609
2813
  raise snowml_exceptions.SnowflakeMLException(
2610
2814
  error_code=error_codes.NOT_FOUND,
2611
- original_exception=ValueError(f"FeatureView {f.name} has not been registered."),
2815
+ original_exception=ValueError(f"FeatureView {fv.name} has not been registered."),
2612
2816
  )
2613
- for e in f.entities:
2817
+ for e in fv.entities:
2614
2818
  for k in e.join_keys:
2615
2819
  if k not in to_sql_identifiers(spine_df.columns):
2616
2820
  raise snowml_exceptions.SnowflakeMLException(
2617
2821
  error_code=error_codes.INVALID_ARGUMENT,
2618
2822
  original_exception=ValueError(
2619
- f"join_key {k} from Entity {e.name} in FeatureView {f.name} is not found in spine_df."
2823
+ f"join_key {k} from Entity {e.name} in FeatureView {fv.name} "
2824
+ "is not found in spine_df."
2620
2825
  ),
2621
2826
  )
2622
-
2827
+ feature_views.append(fv)
2828
+ if isinstance(feature, FeatureViewSlice):
2829
+ cols = feature.names
2830
+ else:
2831
+ cols = feature.feature_names
2832
+ feature_columns.append(", ".join(col.resolved() for col in cols))
2833
+ # TODO (SNOW-2396184): remove this check and the non-ASOF join path as ASOF join is enabled by default now.
2623
2834
  if self._asof_join_enabled is None:
2624
2835
  self._asof_join_enabled = self._is_asof_join_enabled()
2625
2836
 
2626
2837
  # TODO: leverage Snowpark dataframe for more concise syntax once it supports AsOfJoin
2627
2838
  query = spine_df.queries["queries"][-1]
2628
- layer = 0
2629
- for f in features:
2630
- if isinstance(f, FeatureViewSlice):
2631
- cols = f.names
2632
- f = f.feature_view_ref
2633
- else:
2634
- cols = f.feature_names
2635
-
2636
- join_keys = list({k for e in f.entities for k in e.join_keys})
2637
- join_keys_str = ", ".join(join_keys)
2638
- assert f.version is not None
2639
- join_table_name = f.fully_qualified_name()
2640
-
2641
- if spine_timestamp_col is not None and f.timestamp_col is not None:
2642
- if self._asof_join_enabled:
2643
- if include_feature_view_timestamp_col:
2644
- f_ts_col_alias = identifier.concat_names([f.name, "_", f.version, "_", f.timestamp_col])
2645
- f_ts_col_str = f"r_{layer}.{f.timestamp_col} AS {f_ts_col_alias},"
2839
+ join_keys: list[SqlIdentifier] = []
2840
+
2841
+ if join_method == "cte":
2842
+
2843
+ logger.info(f"Using the CTE method with {len(features)} feature views")
2844
+
2845
+ query = self._build_cte_query(
2846
+ feature_views,
2847
+ feature_columns,
2848
+ spine_df.queries["queries"][-1],
2849
+ spine_timestamp_col,
2850
+ include_feature_view_timestamp_col,
2851
+ )
2852
+ else:
2853
+ # Use sequential joins layer by layer
2854
+ logger.info(f"Using the sequential join method with {len(features)} feature views")
2855
+ layer = 0
2856
+ for feature in features:
2857
+ if isinstance(feature, FeatureViewSlice):
2858
+ cols = feature.names
2859
+ feature = feature.feature_view_ref
2860
+ else:
2861
+ cols = feature.feature_names
2862
+
2863
+ join_keys = list({k for e in feature.entities for k in e.join_keys})
2864
+ join_keys_str = ", ".join(join_keys)
2865
+ assert feature.version is not None
2866
+ join_table_name = feature.fully_qualified_name()
2867
+
2868
+ if spine_timestamp_col is not None and feature.timestamp_col is not None:
2869
+ if self._asof_join_enabled:
2870
+ if include_feature_view_timestamp_col:
2871
+ f_ts_col_alias = identifier.concat_names(
2872
+ [feature.name, "_", feature.version, "_", feature.timestamp_col]
2873
+ )
2874
+ f_ts_col_str = f"r_{layer}.{feature.timestamp_col} AS {f_ts_col_alias},"
2875
+ else:
2876
+ f_ts_col_str = ""
2877
+ query = f"""
2878
+ SELECT
2879
+ l_{layer}.*,
2880
+ {f_ts_col_str}
2881
+ r_{layer}.* EXCLUDE ({join_keys_str}, {feature.timestamp_col})
2882
+ FROM ({query}) l_{layer}
2883
+ ASOF JOIN (
2884
+ SELECT {join_keys_str}, {feature.timestamp_col},
2885
+ {', '.join(col.resolved() for col in cols)}
2886
+ FROM {join_table_name}
2887
+ ) r_{layer}
2888
+ MATCH_CONDITION (l_{layer}.{spine_timestamp_col} >= r_{layer}.{feature.timestamp_col})
2889
+ ON {' AND '.join([f'l_{layer}.{k} = r_{layer}.{k}' for k in join_keys])}
2890
+ """
2646
2891
  else:
2647
- f_ts_col_str = ""
2892
+ query = self._composed_union_window_join_query(
2893
+ layer=layer,
2894
+ s_query=query,
2895
+ s_ts_col=spine_timestamp_col,
2896
+ f_df=feature.feature_df,
2897
+ f_table_name=join_table_name,
2898
+ f_ts_col=feature.timestamp_col,
2899
+ join_keys=join_keys,
2900
+ )
2901
+ else:
2648
2902
  query = f"""
2649
2903
  SELECT
2650
2904
  l_{layer}.*,
2651
- {f_ts_col_str}
2652
- r_{layer}.* EXCLUDE ({join_keys_str}, {f.timestamp_col})
2905
+ r_{layer}.* EXCLUDE ({join_keys_str})
2653
2906
  FROM ({query}) l_{layer}
2654
- ASOF JOIN (
2655
- SELECT {join_keys_str}, {f.timestamp_col}, {', '.join(cols)}
2907
+ LEFT JOIN (
2908
+ SELECT {join_keys_str}, {', '.join(col.resolved() for col in cols)}
2656
2909
  FROM {join_table_name}
2657
2910
  ) r_{layer}
2658
- MATCH_CONDITION (l_{layer}.{spine_timestamp_col} >= r_{layer}.{f.timestamp_col})
2659
2911
  ON {' AND '.join([f'l_{layer}.{k} = r_{layer}.{k}' for k in join_keys])}
2660
2912
  """
2661
- else:
2662
- query = self._composed_union_window_join_query(
2663
- layer=layer,
2664
- s_query=query,
2665
- s_ts_col=spine_timestamp_col,
2666
- f_df=f.feature_df,
2667
- f_table_name=join_table_name,
2668
- f_ts_col=f.timestamp_col,
2669
- join_keys=join_keys,
2670
- )
2671
- else:
2672
- query = f"""
2673
- SELECT
2674
- l_{layer}.*,
2675
- r_{layer}.* EXCLUDE ({join_keys_str})
2676
- FROM ({query}) l_{layer}
2677
- LEFT JOIN (
2678
- SELECT {join_keys_str}, {', '.join(cols)}
2679
- FROM {join_table_name}
2680
- ) r_{layer}
2681
- ON {' AND '.join([f'l_{layer}.{k} = r_{layer}.{k}' for k in join_keys])}
2682
- """
2683
- layer += 1
2913
+ layer += 1
2684
2914
 
2685
2915
  # TODO: construct result dataframe with datframe APIs once ASOF join is supported natively.
2686
2916
  # Below code manually construct result dataframe from private members of spine dataframe, which
@@ -3213,7 +3443,7 @@ class FeatureStore:
3213
3443
  online_table_name = FeatureView._get_online_table_name(feature_view_name)
3214
3444
 
3215
3445
  fully_qualified_online_name = self._get_fully_qualified_name(online_table_name)
3216
- source_table_name = feature_view_name
3446
+ source_table_name = self._get_fully_qualified_name(feature_view_name)
3217
3447
 
3218
3448
  # Extract join keys for PRIMARY KEY (preserve order and ensure unique)
3219
3449
  ordered_join_keys: list[str] = []
@@ -1,7 +1,7 @@
1
+ """Feature view module for Snowflake ML Feature Store."""
1
2
  from __future__ import annotations
2
3
 
3
4
  import json
4
- import logging
5
5
  import re
6
6
  import warnings
7
7
  from collections import OrderedDict
@@ -52,7 +52,7 @@ _RESULT_SCAN_QUERY_PATTERN = re.compile(
52
52
  class OnlineConfig:
53
53
  """Configuration for online feature storage."""
54
54
 
55
- enable: bool = False
55
+ enable: Optional[bool] = None
56
56
  target_lag: Optional[str] = None
57
57
 
58
58
  def __post_init__(self) -> None:
@@ -248,6 +248,7 @@ class FeatureView(lineage_node.LineageNode):
248
248
  - If `timestamp_col` is provided, it is added to the default clustering keys.
249
249
  online_config: Optional configuration for online storage. If provided with enable=True,
250
250
  online storage will be enabled. Defaults to None (no online storage).
251
+ NOTE: this feature is currently in Public Preview.
251
252
  _kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
252
253
 
253
254
  Example::
@@ -289,8 +290,6 @@ class FeatureView(lineage_node.LineageNode):
289
290
 
290
291
  # noqa: DAR401
291
292
  """
292
- if online_config is not None:
293
- logging.warning("'online_config' is in private preview since 1.12.0. Do not use it in production.")
294
293
 
295
294
  self._name: SqlIdentifier = SqlIdentifier(name)
296
295
  self._entities: list[Entity] = entities
@@ -533,8 +532,15 @@ class FeatureView(lineage_node.LineageNode):
533
532
  return self._feature_desc
534
533
 
535
534
  @property
536
- def online(self) -> bool:
537
- return self._online_config.enable if self._online_config else False
535
+ def online(self) -> bool: # noqa: DAR101
536
+ """Check if online storage is enabled for this feature view.
537
+
538
+ Returns:
539
+ True if online storage is enabled, False otherwise.
540
+ """
541
+ if self._online_config and self._online_config.enable is True:
542
+ return True
543
+ return False
538
544
 
539
545
  @property
540
546
  def online_config(self) -> Optional[OnlineConfig]:
@@ -1,5 +1,6 @@
1
1
  import inspect
2
2
  import logging
3
+ import re
3
4
  import time
4
5
  from dataclasses import dataclass
5
6
  from typing import Any, Optional, Union, cast
@@ -27,6 +28,8 @@ _PRESIGNED_URL_LIFETIME_SEC = 14400
27
28
  # The threshold of when the presigned url should get refreshed before its expiration.
28
29
  _PRESIGNED_URL_HEADROOM_SEC = 3600
29
30
 
31
+ # Regex pattern to match cloud storage prefixes (s3://, gcs://, azure://) and bucket/container name at start of string
32
+ _CLOUD_PATH_PREFIX_PATTERN = re.compile(r"^(s3|gcs|azure)://[^/]+/", re.IGNORECASE)
30
33
 
31
34
  _PROJECT = "FileSet"
32
35
 
@@ -355,8 +358,16 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
355
358
 
356
359
  Returns:
357
360
  A string of the relative stage path.
361
+
362
+ Raises:
363
+ ValueError: If the stage path format is invalid.
358
364
  """
359
- return stage_path[len(self._stage) + 1 :]
365
+ if stage_path.lower().startswith(self._stage.lower()):
366
+ return stage_path[len(self._stage) + 1 :]
367
+ elif match := _CLOUD_PATH_PREFIX_PATTERN.match(stage_path):
368
+ return stage_path[match.end() :]
369
+
370
+ raise ValueError(f"Invalid stage path: {stage_path}")
360
371
 
361
372
  def _add_file_info_helper(
362
373
  self,
@@ -56,8 +56,9 @@ ENABLE_HEALTH_CHECKS_ENV_VAR = "ENABLE_HEALTH_CHECKS"
56
56
  ENABLE_HEALTH_CHECKS = "false"
57
57
 
58
58
  # Job status polling constants
59
- JOB_POLL_INITIAL_DELAY_SECONDS = 0.1
59
+ JOB_POLL_INITIAL_DELAY_SECONDS = 5
60
60
  JOB_POLL_MAX_DELAY_SECONDS = 30
61
+ JOB_SPCS_TIMEOUT_SECONDS = 30
61
62
 
62
63
  # Log start and end messages
63
64
  LOG_START_MSG = "--------------------------------\nML job started\n--------------------------------"
@@ -73,6 +74,7 @@ COMMON_INSTANCE_FAMILIES = {
73
74
  "CPU_X64_XS": ComputeResources(cpu=1, memory=6),
74
75
  "CPU_X64_S": ComputeResources(cpu=3, memory=13),
75
76
  "CPU_X64_M": ComputeResources(cpu=6, memory=28),
77
+ "CPU_X64_SL": ComputeResources(cpu=14, memory=54),
76
78
  "CPU_X64_L": ComputeResources(cpu=28, memory=116),
77
79
  "HIGHMEM_X64_S": ComputeResources(cpu=6, memory=58),
78
80
  }
@@ -85,6 +87,7 @@ AWS_INSTANCE_FAMILIES = {
85
87
  }
86
88
  AZURE_INSTANCE_FAMILIES = {
87
89
  "HIGHMEM_X64_M": ComputeResources(cpu=28, memory=244),
90
+ "HIGHMEM_X64_SL": ComputeResources(cpu=92, memory=654),
88
91
  "HIGHMEM_X64_L": ComputeResources(cpu=92, memory=654),
89
92
  "GPU_NV_XS": ComputeResources(cpu=3, memory=26, gpu=1, gpu_type="T4"),
90
93
  "GPU_NV_SM": ComputeResources(cpu=32, memory=424, gpu=1, gpu_type="A10"),
@@ -92,7 +95,15 @@ AZURE_INSTANCE_FAMILIES = {
92
95
  "GPU_NV_3M": ComputeResources(cpu=44, memory=424, gpu=2, gpu_type="A100"),
93
96
  "GPU_NV_SL": ComputeResources(cpu=92, memory=858, gpu=4, gpu_type="A100"),
94
97
  }
98
+ GCP_INSTANCE_FAMILIES = {
99
+ "HIGHMEM_X64_M": ComputeResources(cpu=28, memory=244),
100
+ "HIGHMEM_X64_SL": ComputeResources(cpu=92, memory=654),
101
+ "GPU_GCP_NV_L4_1_24G": ComputeResources(cpu=6, memory=28, gpu=1, gpu_type="L4"),
102
+ "GPU_GCP_NV_L4_4_24G": ComputeResources(cpu=44, memory=178, gpu=4, gpu_type="L4"),
103
+ "GPU_GCP_NV_A100_8_40G": ComputeResources(cpu=92, memory=654, gpu=8, gpu_type="A100"),
104
+ }
95
105
  CLOUD_INSTANCE_FAMILIES = {
96
106
  SnowflakeCloudType.AWS: AWS_INSTANCE_FAMILIES,
97
107
  SnowflakeCloudType.AZURE: AZURE_INSTANCE_FAMILIES,
108
+ SnowflakeCloudType.GCP: GCP_INSTANCE_FAMILIES,
98
109
  }