snowflake-ml-python 1.22.0__py3-none-any.whl → 1.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/platform_capabilities.py +0 -4
- snowflake/ml/feature_store/__init__.py +2 -0
- snowflake/ml/feature_store/aggregation.py +367 -0
- snowflake/ml/feature_store/feature.py +366 -0
- snowflake/ml/feature_store/feature_store.py +234 -20
- snowflake/ml/feature_store/feature_view.py +189 -4
- snowflake/ml/feature_store/metadata_manager.py +425 -0
- snowflake/ml/feature_store/tile_sql_generator.py +1079 -0
- snowflake/ml/jobs/__init__.py +2 -0
- snowflake/ml/jobs/_utils/constants.py +1 -0
- snowflake/ml/jobs/_utils/payload_utils.py +38 -18
- snowflake/ml/jobs/_utils/query_helper.py +8 -1
- snowflake/ml/jobs/_utils/runtime_env_utils.py +117 -0
- snowflake/ml/jobs/_utils/stage_utils.py +2 -2
- snowflake/ml/jobs/_utils/types.py +22 -2
- snowflake/ml/jobs/job_definition.py +232 -0
- snowflake/ml/jobs/manager.py +16 -177
- snowflake/ml/model/__init__.py +4 -0
- snowflake/ml/model/_client/model/batch_inference_specs.py +38 -2
- snowflake/ml/model/_client/model/model_version_impl.py +120 -89
- snowflake/ml/model/_client/ops/model_ops.py +4 -26
- snowflake/ml/model/_client/ops/param_utils.py +124 -0
- snowflake/ml/model/_client/ops/service_ops.py +63 -23
- snowflake/ml/model/_client/service/model_deployment_spec.py +12 -5
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -0
- snowflake/ml/model/_client/sql/service.py +25 -54
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +21 -3
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +21 -3
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +21 -3
- snowflake/ml/model/_model_composer/model_method/model_method.py +3 -1
- snowflake/ml/model/_packager/model_handlers/huggingface.py +74 -10
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +121 -29
- snowflake/ml/model/_signatures/utils.py +130 -0
- snowflake/ml/model/openai_signatures.py +97 -0
- snowflake/ml/registry/_manager/model_parameter_reconciler.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/METADATA +105 -1
- {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/RECORD +41 -35
- {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/WHEEL +1 -1
- snowflake/ml/experiment/callback/__init__.py +0 -0
- {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/top_level.txt +0 -0
|
@@ -41,6 +41,11 @@ from snowflake.ml.feature_store.feature_view import (
|
|
|
41
41
|
FeatureViewVersion,
|
|
42
42
|
_FeatureViewMetadata,
|
|
43
43
|
)
|
|
44
|
+
from snowflake.ml.feature_store.metadata_manager import (
|
|
45
|
+
AggregationMetadata,
|
|
46
|
+
FeatureStoreMetadataManager,
|
|
47
|
+
)
|
|
48
|
+
from snowflake.ml.feature_store.tile_sql_generator import MergingSqlGenerator
|
|
44
49
|
from snowflake.ml.utils import sql_client
|
|
45
50
|
from snowflake.snowpark import DataFrame, Row, Session, functions as F
|
|
46
51
|
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
@@ -91,6 +96,7 @@ class _FeatureStoreObjTypes(Enum):
|
|
|
91
96
|
FEATURE_VIEW_REFRESH_TASK = "FEATURE_VIEW_REFRESH_TASK"
|
|
92
97
|
TRAINING_DATA = "TRAINING_DATA"
|
|
93
98
|
ONLINE_FEATURE_TABLE = "ONLINE_FEATURE_TABLE"
|
|
99
|
+
INTERNAL_METADATA_TABLE = "INTERNAL_METADATA_TABLE"
|
|
94
100
|
|
|
95
101
|
@classmethod
|
|
96
102
|
def parse(cls, val: str) -> _FeatureStoreObjTypes:
|
|
@@ -262,6 +268,12 @@ class FeatureStore:
|
|
|
262
268
|
database=database,
|
|
263
269
|
schema=name,
|
|
264
270
|
)
|
|
271
|
+
self._metadata_manager = FeatureStoreMetadataManager(
|
|
272
|
+
session=session,
|
|
273
|
+
schema_path=self._config.full_schema_path,
|
|
274
|
+
fs_object_tag_path=self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG),
|
|
275
|
+
telemetry_stmp=self._telemetry_stmp,
|
|
276
|
+
)
|
|
265
277
|
self._asof_join_enabled = None
|
|
266
278
|
|
|
267
279
|
# A dict from object name to tuple of search space and object domain.
|
|
@@ -295,6 +307,7 @@ class FeatureStore:
|
|
|
295
307
|
self._session.sql(f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(tag)}").collect(
|
|
296
308
|
statement_params=self._telemetry_stmp
|
|
297
309
|
)
|
|
310
|
+
# Metadata table for aggregation configs is created lazily by metadata manager
|
|
298
311
|
except Exception as e:
|
|
299
312
|
raise snowml_exceptions.SnowflakeMLException(
|
|
300
313
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
|
@@ -583,11 +596,16 @@ class FeatureStore:
|
|
|
583
596
|
desc = "" if desc is None else f"COMMENT '{desc}'"
|
|
584
597
|
return f"{col.name} {desc}"
|
|
585
598
|
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
599
|
+
# For tiled feature views, skip column definitions since the tiling query
|
|
600
|
+
# produces different columns (TILE_START, partial aggregates)
|
|
601
|
+
if feature_view.is_tiled:
|
|
602
|
+
column_descs = ""
|
|
603
|
+
else:
|
|
604
|
+
column_descs = (
|
|
605
|
+
", ".join([f"{create_col_desc(col)}" for col in feature_view.output_schema.fields])
|
|
606
|
+
if feature_view.feature_descs is not None
|
|
607
|
+
else ""
|
|
608
|
+
)
|
|
591
609
|
|
|
592
610
|
# Step 1: Create offline feature view (Dynamic Table or View)
|
|
593
611
|
created_resources.extend(
|
|
@@ -611,10 +629,26 @@ class FeatureStore:
|
|
|
611
629
|
(_FeatureStoreObjTypes.ONLINE_FEATURE_TABLE, self._get_fully_qualified_name(online_table_name))
|
|
612
630
|
)
|
|
613
631
|
|
|
632
|
+
# Step 3: Save aggregation metadata for tiled feature views (atomically)
|
|
633
|
+
if feature_view.is_tiled:
|
|
634
|
+
agg_metadata = AggregationMetadata(
|
|
635
|
+
feature_granularity=feature_view.feature_granularity, # type: ignore[arg-type]
|
|
636
|
+
features=feature_view.aggregation_specs, # type: ignore[arg-type]
|
|
637
|
+
)
|
|
638
|
+
# Convert SqlIdentifier keys to strings if descriptions exist
|
|
639
|
+
descs = None
|
|
640
|
+
if feature_view.feature_descs:
|
|
641
|
+
descs = {k.identifier(): v for k, v in feature_view.feature_descs.items()}
|
|
642
|
+
# Save specs and descs atomically in a single statement
|
|
643
|
+
self._metadata_manager.save_feature_view_metadata(feature_view.name, version, agg_metadata, descs)
|
|
644
|
+
|
|
614
645
|
except Exception as e:
|
|
615
646
|
# We can't rollback in case of overwrite.
|
|
616
647
|
if not overwrite:
|
|
617
648
|
self._rollback_created_resources(created_resources)
|
|
649
|
+
# Also cleanup metadata for tiled FVs (safe even if not saved yet)
|
|
650
|
+
if feature_view.is_tiled:
|
|
651
|
+
self._metadata_manager.delete_feature_view_metadata(str(feature_view.name), version)
|
|
618
652
|
|
|
619
653
|
if isinstance(e, snowml_exceptions.SnowflakeMLException):
|
|
620
654
|
raise
|
|
@@ -1437,6 +1471,9 @@ class FeatureStore:
|
|
|
1437
1471
|
),
|
|
1438
1472
|
)
|
|
1439
1473
|
|
|
1474
|
+
# Delete aggregation metadata and feature descriptions if exist
|
|
1475
|
+
self._metadata_manager.delete_feature_view_metadata(str(feature_view.name), str(feature_view.version))
|
|
1476
|
+
|
|
1440
1477
|
logger.info(f"Deleted FeatureView {feature_view.name}/{feature_view.version}.")
|
|
1441
1478
|
|
|
1442
1479
|
@dispatch_decorator()
|
|
@@ -1916,6 +1953,15 @@ class FeatureStore:
|
|
|
1916
1953
|
" to generate the data as a Snowflake Table."
|
|
1917
1954
|
),
|
|
1918
1955
|
)
|
|
1956
|
+
|
|
1957
|
+
# Cache the result to a temporary table before creating the dataset
|
|
1958
|
+
# to ensure single query evaluation:
|
|
1959
|
+
has_tiled_fv = any(
|
|
1960
|
+
(fv.feature_view_ref if isinstance(fv, FeatureViewSlice) else fv).is_tiled for fv in features
|
|
1961
|
+
)
|
|
1962
|
+
if has_tiled_fv:
|
|
1963
|
+
result_df = result_df.cache_result()
|
|
1964
|
+
|
|
1919
1965
|
# TODO: Add feature store tag once Dataset (version) supports tags
|
|
1920
1966
|
ds: dataset.Dataset = dataset.create_from_dataframe(
|
|
1921
1967
|
self._session,
|
|
@@ -2375,9 +2421,25 @@ class FeatureStore:
|
|
|
2375
2421
|
def _read_from_offline_store(
|
|
2376
2422
|
self, feature_view: FeatureView, keys: Optional[list[list[str]]], feature_names: Optional[list[str]]
|
|
2377
2423
|
) -> DataFrame:
|
|
2378
|
-
"""Read feature values from the offline store (main feature view table).
|
|
2424
|
+
"""Read feature values from the offline store (main feature view table).
|
|
2425
|
+
|
|
2426
|
+
For tiled feature views, this computes aggregated features at current time
|
|
2427
|
+
by creating a synthetic spine with unique entity combinations.
|
|
2428
|
+
|
|
2429
|
+
Args:
|
|
2430
|
+
feature_view: The feature view to read from.
|
|
2431
|
+
keys: Optional list of key values to filter by.
|
|
2432
|
+
feature_names: Optional list of feature names to return.
|
|
2433
|
+
|
|
2434
|
+
Returns:
|
|
2435
|
+
Snowpark DataFrame containing the feature values.
|
|
2436
|
+
"""
|
|
2379
2437
|
table_name = feature_view.fully_qualified_name()
|
|
2380
2438
|
|
|
2439
|
+
# For tiled FVs, compute features at current time
|
|
2440
|
+
if feature_view.is_tiled:
|
|
2441
|
+
return self._read_tiled_fv_at_current_time(feature_view, keys, feature_names)
|
|
2442
|
+
|
|
2381
2443
|
# Build SELECT and WHERE clauses using helper methods
|
|
2382
2444
|
select_clause = self._build_select_clause_and_validate(feature_view, feature_names, include_join_keys=True)
|
|
2383
2445
|
where_clause = self._build_where_clause_for_keys(feature_view, keys)
|
|
@@ -2385,6 +2447,78 @@ class FeatureStore:
|
|
|
2385
2447
|
query = f"SELECT {select_clause} FROM {table_name}{where_clause}"
|
|
2386
2448
|
return self._session.sql(query)
|
|
2387
2449
|
|
|
2450
|
+
def _read_tiled_fv_at_current_time(
|
|
2451
|
+
self, feature_view: FeatureView, keys: Optional[list[list[str]]], feature_names: Optional[list[str]]
|
|
2452
|
+
) -> DataFrame:
|
|
2453
|
+
"""Read tiled feature view by computing aggregated features at current time.
|
|
2454
|
+
|
|
2455
|
+
Creates a synthetic spine with unique entity combinations from the tile table,
|
|
2456
|
+
uses CURRENT_TIMESTAMP as the query time, and merges tiles to compute features.
|
|
2457
|
+
|
|
2458
|
+
Args:
|
|
2459
|
+
feature_view: The tiled feature view to read from.
|
|
2460
|
+
keys: Optional list of key values to filter by.
|
|
2461
|
+
feature_names: Optional list of feature names to return.
|
|
2462
|
+
|
|
2463
|
+
Returns:
|
|
2464
|
+
Snowpark DataFrame containing the computed feature values.
|
|
2465
|
+
"""
|
|
2466
|
+
table_name = feature_view.fully_qualified_name()
|
|
2467
|
+
|
|
2468
|
+
# Get join keys from entities
|
|
2469
|
+
join_keys: list[str] = []
|
|
2470
|
+
for entity in feature_view.entities:
|
|
2471
|
+
join_keys.extend([str(k) for k in entity.join_keys])
|
|
2472
|
+
|
|
2473
|
+
quoted_keys = [f'"{k}"' for k in join_keys]
|
|
2474
|
+
quoted_keys_str = ", ".join(quoted_keys)
|
|
2475
|
+
|
|
2476
|
+
# Build WHERE clause for key filtering (if any)
|
|
2477
|
+
where_clause = self._build_where_clause_for_keys(feature_view, keys)
|
|
2478
|
+
|
|
2479
|
+
# Step 1: Create spine CTE with unique entities + CURRENT_TIMESTAMP
|
|
2480
|
+
spine_cte = f"""
|
|
2481
|
+
SELECT DISTINCT {quoted_keys_str},
|
|
2482
|
+
CURRENT_TIMESTAMP() AS "_QUERY_TS"
|
|
2483
|
+
FROM {table_name}{where_clause}
|
|
2484
|
+
"""
|
|
2485
|
+
|
|
2486
|
+
# Step 2: Generate merge CTEs using MergingSqlGenerator
|
|
2487
|
+
assert feature_view.aggregation_specs is not None
|
|
2488
|
+
assert feature_view.feature_granularity is not None
|
|
2489
|
+
assert feature_view.timestamp_col is not None
|
|
2490
|
+
|
|
2491
|
+
generator = MergingSqlGenerator(
|
|
2492
|
+
tile_table=table_name,
|
|
2493
|
+
join_keys=join_keys,
|
|
2494
|
+
timestamp_col=str(feature_view.timestamp_col),
|
|
2495
|
+
feature_granularity=feature_view.feature_granularity,
|
|
2496
|
+
features=feature_view.aggregation_specs,
|
|
2497
|
+
spine_timestamp_col="_QUERY_TS",
|
|
2498
|
+
fv_index=0,
|
|
2499
|
+
)
|
|
2500
|
+
|
|
2501
|
+
merge_ctes = generator.generate_all_ctes()
|
|
2502
|
+
|
|
2503
|
+
# Step 3: Build full query
|
|
2504
|
+
cte_parts = [f"SPINE AS ({spine_cte})"]
|
|
2505
|
+
for cte_name, cte_body in merge_ctes:
|
|
2506
|
+
cte_parts.append(f"{cte_name} AS ({cte_body})")
|
|
2507
|
+
|
|
2508
|
+
# Get feature columns for final SELECT
|
|
2509
|
+
all_feature_cols = [spec.get_sql_column_name() for spec in feature_view.aggregation_specs]
|
|
2510
|
+
if feature_names:
|
|
2511
|
+
# Filter to requested features
|
|
2512
|
+
feature_names_upper = [f.upper() for f in feature_names]
|
|
2513
|
+
all_feature_cols = [c for c in all_feature_cols if c.strip('"').upper() in feature_names_upper]
|
|
2514
|
+
|
|
2515
|
+
feature_cols_str = ", ".join(all_feature_cols)
|
|
2516
|
+
# CTE name format matches generator: FV{index:03d}
|
|
2517
|
+
final_select = f"SELECT {quoted_keys_str}, {feature_cols_str} FROM FV000"
|
|
2518
|
+
|
|
2519
|
+
full_query = f"WITH {', '.join(cte_parts)} {final_select}"
|
|
2520
|
+
return self._session.sql(full_query)
|
|
2521
|
+
|
|
2388
2522
|
def _read_from_online_store(
|
|
2389
2523
|
self, feature_view: FeatureView, keys: Optional[list[list[str]]], feature_names: Optional[list[str]]
|
|
2390
2524
|
) -> DataFrame:
|
|
@@ -2488,7 +2622,17 @@ class FeatureStore:
|
|
|
2488
2622
|
query = ""
|
|
2489
2623
|
try:
|
|
2490
2624
|
override_clause = " OR REPLACE" if override else ""
|
|
2491
|
-
|
|
2625
|
+
|
|
2626
|
+
# Use tiling query for tiled feature views
|
|
2627
|
+
if feature_view.is_tiled:
|
|
2628
|
+
source_query = feature_view._get_tile_query()
|
|
2629
|
+
else:
|
|
2630
|
+
source_query = feature_view.query
|
|
2631
|
+
|
|
2632
|
+
# Include column definitions only if provided (skip for tiled feature views)
|
|
2633
|
+
column_clause = f" ({column_descs})" if column_descs else ""
|
|
2634
|
+
|
|
2635
|
+
query = f"""CREATE{override_clause} DYNAMIC TABLE {fully_qualified_name}{column_clause}
|
|
2492
2636
|
TARGET_LAG = '{'DOWNSTREAM' if schedule_task else feature_view.refresh_freq}'
|
|
2493
2637
|
COMMENT = '{feature_view.desc}'
|
|
2494
2638
|
TAG (
|
|
@@ -2499,11 +2643,19 @@ class FeatureStore:
|
|
|
2499
2643
|
INITIALIZE = {feature_view.initialize}
|
|
2500
2644
|
"""
|
|
2501
2645
|
if feature_view.cluster_by:
|
|
2502
|
-
|
|
2646
|
+
# For tiled FVs, replace timestamp column with TILE_START in cluster_by
|
|
2647
|
+
if feature_view.is_tiled and feature_view.timestamp_col:
|
|
2648
|
+
ts_col_upper = feature_view.timestamp_col.upper()
|
|
2649
|
+
cluster_by_cols = [
|
|
2650
|
+
"TILE_START" if col.upper() == ts_col_upper else col for col in feature_view.cluster_by
|
|
2651
|
+
]
|
|
2652
|
+
else:
|
|
2653
|
+
cluster_by_cols = [str(col) for col in feature_view.cluster_by]
|
|
2654
|
+
cluster_by_clause = f"CLUSTER BY ({', '.join(cluster_by_cols)})"
|
|
2503
2655
|
query += f"{cluster_by_clause}"
|
|
2504
2656
|
|
|
2505
2657
|
query += f"""
|
|
2506
|
-
AS {
|
|
2658
|
+
AS {source_query}
|
|
2507
2659
|
"""
|
|
2508
2660
|
self._session.sql(query).collect(block=block, statement_params=self._telemetry_stmp)
|
|
2509
2661
|
|
|
@@ -2653,9 +2805,10 @@ class FeatureStore:
|
|
|
2653
2805
|
This method supports feature views with different join keys by:
|
|
2654
2806
|
1. Creating a spine CTE that includes all possible join keys
|
|
2655
2807
|
2. For each feature view, creating a deduplicated spine subquery with only that FV's join keys
|
|
2656
|
-
3.
|
|
2657
|
-
4. Performing
|
|
2658
|
-
5.
|
|
2808
|
+
3. For tiled FVs: Using MergingSqlGenerator to generate tile merging CTEs
|
|
2809
|
+
4. For non-tiled FVs: Performing ASOF JOINs on the deduplicated spine when timestamp columns exist
|
|
2810
|
+
5. Performing LEFT JOINs on the deduplicated spine when timestamp columns are missing
|
|
2811
|
+
6. Combining results by LEFT JOINing each FV CTE back to the original SPINE
|
|
2659
2812
|
|
|
2660
2813
|
Args:
|
|
2661
2814
|
feature_views: A list of feature views to join.
|
|
@@ -2688,8 +2841,23 @@ class FeatureStore:
|
|
|
2688
2841
|
fv_join_keys = list({k for e in feature_view.entities for k in e.join_keys})
|
|
2689
2842
|
join_keys_str = ", ".join(fv_join_keys)
|
|
2690
2843
|
|
|
2844
|
+
# Handle tiled feature views using MergingSqlGenerator
|
|
2845
|
+
if feature_view.is_tiled and spine_timestamp_col is not None:
|
|
2846
|
+
generator = MergingSqlGenerator(
|
|
2847
|
+
tile_table=feature_view.fully_qualified_name(),
|
|
2848
|
+
join_keys=[str(k) for k in fv_join_keys],
|
|
2849
|
+
timestamp_col=str(feature_timestamp_col),
|
|
2850
|
+
feature_granularity=feature_view.feature_granularity, # type: ignore[arg-type]
|
|
2851
|
+
features=feature_view.aggregation_specs, # type: ignore[arg-type]
|
|
2852
|
+
spine_timestamp_col=str(spine_timestamp_col),
|
|
2853
|
+
fv_index=i,
|
|
2854
|
+
)
|
|
2855
|
+
# Add all CTEs from the merging generator
|
|
2856
|
+
for cte_tuple in generator.generate_all_ctes():
|
|
2857
|
+
ctes.append(f"{cte_tuple[0]} AS (\n{cte_tuple[1]}\n)")
|
|
2858
|
+
|
|
2691
2859
|
# Use ASOF JOIN if both spine and feature view have timestamp columns, otherwise use LEFT JOIN
|
|
2692
|
-
|
|
2860
|
+
elif spine_timestamp_col is not None and feature_timestamp_col is not None:
|
|
2693
2861
|
# Build the deduplicated spine columns set (join keys + timestamp)
|
|
2694
2862
|
spine_dedup_cols_set = set(fv_join_keys)
|
|
2695
2863
|
if spine_timestamp_col not in spine_dedup_cols_set:
|
|
@@ -2760,7 +2928,11 @@ class FeatureStore:
|
|
|
2760
2928
|
if spine_timestamp_col is not None and feature_view.timestamp_col is not None:
|
|
2761
2929
|
join_conditions.append(f'SPINE."{spine_timestamp_col}" = {cte_name}."{spine_timestamp_col}"')
|
|
2762
2930
|
|
|
2763
|
-
if
|
|
2931
|
+
if (
|
|
2932
|
+
include_feature_view_timestamp_col
|
|
2933
|
+
and feature_view.timestamp_col is not None
|
|
2934
|
+
and not feature_view.is_tiled
|
|
2935
|
+
):
|
|
2764
2936
|
f_ts_col_alias = identifier.concat_names(
|
|
2765
2937
|
[feature_view.name, "_", str(feature_view.version), "_", feature_view.timestamp_col]
|
|
2766
2938
|
)
|
|
@@ -2768,11 +2940,17 @@ class FeatureStore:
|
|
|
2768
2940
|
select_columns.append(f_ts_col_str)
|
|
2769
2941
|
|
|
2770
2942
|
# Select features from the CTE
|
|
2771
|
-
#
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
2775
|
-
|
|
2943
|
+
# For tiled FVs, get output columns from aggregation specs
|
|
2944
|
+
if feature_view.is_tiled and feature_view.aggregation_specs:
|
|
2945
|
+
feature_cols_from_cte = [
|
|
2946
|
+
f"{cte_name}.{spec.get_sql_column_name()}" for spec in feature_view.aggregation_specs
|
|
2947
|
+
]
|
|
2948
|
+
else:
|
|
2949
|
+
# feature_columns[i] is already a comma-separated string of column names
|
|
2950
|
+
feature_cols_from_cte = []
|
|
2951
|
+
for col in feature_columns[i].split(", "):
|
|
2952
|
+
col_clean = col.strip()
|
|
2953
|
+
feature_cols_from_cte.append(f"{cte_name}.{col_clean}")
|
|
2776
2954
|
select_columns.extend(feature_cols_from_cte)
|
|
2777
2955
|
|
|
2778
2956
|
# Create join condition using only this feature view's join keys
|
|
@@ -2804,6 +2982,26 @@ FROM SPINE{' '.join(join_clauses)}
|
|
|
2804
2982
|
if join_method not in ["sequential", "cte"]:
|
|
2805
2983
|
raise ValueError(f"Invalid join_method '{join_method}'. Must be 'sequential' or 'cte'.")
|
|
2806
2984
|
|
|
2985
|
+
# Check if any feature view is tiled - tiled FVs require CTE method and timestamp column
|
|
2986
|
+
has_tiled_fv = False
|
|
2987
|
+
for feature in features:
|
|
2988
|
+
fv = feature.feature_view_ref if isinstance(feature, FeatureViewSlice) else feature
|
|
2989
|
+
if fv.is_tiled:
|
|
2990
|
+
has_tiled_fv = True
|
|
2991
|
+
break
|
|
2992
|
+
|
|
2993
|
+
if has_tiled_fv and join_method != "cte":
|
|
2994
|
+
raise ValueError(
|
|
2995
|
+
"Tiled feature views require join_method='cte'. "
|
|
2996
|
+
"Please set join_method='cte' when using feature views with tile-based aggregations."
|
|
2997
|
+
)
|
|
2998
|
+
|
|
2999
|
+
if has_tiled_fv and spine_timestamp_col is None:
|
|
3000
|
+
raise ValueError(
|
|
3001
|
+
"Tiled feature views require a spine_timestamp_col for point-in-time joins. "
|
|
3002
|
+
"Please provide spine_timestamp_col when using feature views with tile-based aggregations."
|
|
3003
|
+
)
|
|
3004
|
+
|
|
2807
3005
|
feature_views: list[FeatureView] = []
|
|
2808
3006
|
# Extract column selections for each feature view
|
|
2809
3007
|
feature_columns: list[str] = []
|
|
@@ -3340,6 +3538,12 @@ FROM SPINE{' '.join(join_clauses)}
|
|
|
3340
3538
|
online_config_json = self._determine_online_config_from_oft(name.identifier(), version)
|
|
3341
3539
|
online_config = fv_mod.OnlineConfig.from_json(online_config_json)
|
|
3342
3540
|
|
|
3541
|
+
# Load feature metadata if present (for tiled feature views)
|
|
3542
|
+
agg_metadata = self._metadata_manager.get_feature_specs(name.identifier(), version)
|
|
3543
|
+
feature_granularity = agg_metadata.feature_granularity if agg_metadata else None
|
|
3544
|
+
aggregation_specs = agg_metadata.features if agg_metadata else None
|
|
3545
|
+
is_tiled = agg_metadata is not None
|
|
3546
|
+
|
|
3343
3547
|
if obj_type == _FeatureStoreObjTypes.MANAGED_FEATURE_VIEW:
|
|
3344
3548
|
df = self._session.sql(query)
|
|
3345
3549
|
entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
|
|
@@ -3348,6 +3552,12 @@ FROM SPINE{' '.join(join_clauses)}
|
|
|
3348
3552
|
re_initialize = re.match(_DT_INITIALIZE_PATTERN, row["text"])
|
|
3349
3553
|
initialize = re_initialize.group("initialize") if re_initialize is not None else "ON_CREATE"
|
|
3350
3554
|
|
|
3555
|
+
# For tiled FVs, get descriptions from metadata table; otherwise from DT columns
|
|
3556
|
+
if is_tiled:
|
|
3557
|
+
feature_descs = self._metadata_manager.get_feature_descs(name.identifier(), version) or {}
|
|
3558
|
+
else:
|
|
3559
|
+
feature_descs = self._fetch_column_descs("DYNAMIC TABLE", fv_name)
|
|
3560
|
+
|
|
3351
3561
|
fv = FeatureView._construct_feature_view(
|
|
3352
3562
|
name=name,
|
|
3353
3563
|
entities=entities,
|
|
@@ -3360,7 +3570,7 @@ FROM SPINE{' '.join(join_clauses)}
|
|
|
3360
3570
|
if len(row["scheduling_state"]) > 0
|
|
3361
3571
|
else FeatureViewStatus.MASKED
|
|
3362
3572
|
),
|
|
3363
|
-
feature_descs=
|
|
3573
|
+
feature_descs=feature_descs,
|
|
3364
3574
|
refresh_freq=row["target_lag"],
|
|
3365
3575
|
database=self._config.database.identifier(),
|
|
3366
3576
|
schema=self._config.schema.identifier(),
|
|
@@ -3377,6 +3587,8 @@ FROM SPINE{' '.join(join_clauses)}
|
|
|
3377
3587
|
session=self._session,
|
|
3378
3588
|
cluster_by=self._extract_cluster_by_columns(row["cluster_by"]),
|
|
3379
3589
|
online_config=online_config,
|
|
3590
|
+
feature_granularity=feature_granularity,
|
|
3591
|
+
aggregation_specs=aggregation_specs,
|
|
3380
3592
|
)
|
|
3381
3593
|
return fv
|
|
3382
3594
|
else:
|
|
@@ -3405,6 +3617,8 @@ FROM SPINE{' '.join(join_clauses)}
|
|
|
3405
3617
|
infer_schema_df=infer_schema_df,
|
|
3406
3618
|
session=self._session,
|
|
3407
3619
|
online_config=online_config,
|
|
3620
|
+
feature_granularity=feature_granularity,
|
|
3621
|
+
aggregation_specs=aggregation_specs,
|
|
3408
3622
|
)
|
|
3409
3623
|
return fv
|
|
3410
3624
|
|