snowflake-ml-python 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. snowflake/cortex/__init__.py +4 -1
  2. snowflake/cortex/_classify_text.py +36 -0
  3. snowflake/cortex/_complete.py +281 -21
  4. snowflake/cortex/_extract_answer.py +0 -1
  5. snowflake/cortex/_sentiment.py +0 -1
  6. snowflake/cortex/_summarize.py +0 -1
  7. snowflake/cortex/_translate.py +0 -1
  8. snowflake/cortex/_util.py +12 -85
  9. snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
  10. snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
  11. snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
  12. snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
  13. snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
  14. snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
  15. snowflake/ml/_internal/lineage/lineage_utils.py +4 -4
  16. snowflake/ml/_internal/telemetry.py +38 -2
  17. snowflake/ml/_internal/utils/identifier.py +14 -0
  18. snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
  19. snowflake/ml/data/_internal/arrow_ingestor.py +228 -0
  20. snowflake/ml/data/_internal/ingestor_utils.py +58 -0
  21. snowflake/ml/data/data_connector.py +133 -0
  22. snowflake/ml/data/data_ingestor.py +28 -0
  23. snowflake/ml/data/data_source.py +23 -0
  24. snowflake/ml/dataset/dataset.py +39 -32
  25. snowflake/ml/dataset/dataset_reader.py +18 -118
  26. snowflake/ml/feature_store/access_manager.py +7 -1
  27. snowflake/ml/feature_store/entity.py +19 -2
  28. snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +20 -0
  29. snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +31 -0
  30. snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +24 -0
  31. snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +4 -0
  32. snowflake/ml/feature_store/examples/example_helper.py +240 -0
  33. snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +12 -0
  34. snowflake/ml/feature_store/examples/new_york_taxi_features/features/dropoff_features.py +39 -0
  35. snowflake/ml/feature_store/examples/new_york_taxi_features/features/pickup_features.py +58 -0
  36. snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +5 -0
  37. snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +36 -0
  38. snowflake/ml/feature_store/examples/source_data/fraud_transactions.yaml +29 -0
  39. snowflake/ml/feature_store/examples/source_data/nyc_yellow_trips.yaml +4 -0
  40. snowflake/ml/feature_store/examples/source_data/winequality_red.yaml +32 -0
  41. snowflake/ml/feature_store/examples/wine_quality_features/entities.py +14 -0
  42. snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +29 -0
  43. snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +21 -0
  44. snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +5 -0
  45. snowflake/ml/feature_store/feature_store.py +987 -264
  46. snowflake/ml/feature_store/feature_view.py +228 -13
  47. snowflake/ml/fileset/embedded_stage_fs.py +25 -21
  48. snowflake/ml/fileset/fileset.py +2 -2
  49. snowflake/ml/fileset/snowfs.py +4 -15
  50. snowflake/ml/fileset/stage_fs.py +24 -18
  51. snowflake/ml/lineage/__init__.py +3 -0
  52. snowflake/ml/lineage/lineage_node.py +139 -0
  53. snowflake/ml/model/_client/model/model_impl.py +47 -14
  54. snowflake/ml/model/_client/model/model_version_impl.py +82 -2
  55. snowflake/ml/model/_client/ops/model_ops.py +77 -5
  56. snowflake/ml/model/_client/sql/model.py +1 -0
  57. snowflake/ml/model/_client/sql/model_version.py +45 -2
  58. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +4 -6
  59. snowflake/ml/model/_model_composer/model_composer.py +15 -17
  60. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -17
  61. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -1
  62. snowflake/ml/model/_model_composer/model_method/function_generator.py +20 -4
  63. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +3 -32
  64. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +55 -0
  65. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +5 -34
  66. snowflake/ml/model/_model_composer/model_method/model_method.py +10 -7
  67. snowflake/ml/model/_packager/model_handlers/_base.py +13 -3
  68. snowflake/ml/model/_packager/model_handlers/_utils.py +59 -1
  69. snowflake/ml/model/_packager/model_handlers/catboost.py +44 -2
  70. snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
  71. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
  72. snowflake/ml/model/_packager/model_handlers/lightgbm.py +70 -2
  73. snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
  74. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
  75. snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
  76. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
  77. snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
  78. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
  79. snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
  80. snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
  81. snowflake/ml/model/_packager/model_handlers/xgboost.py +61 -2
  82. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  83. snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
  84. snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
  85. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  86. snowflake/ml/model/_packager/model_packager.py +9 -4
  87. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  88. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -5
  89. snowflake/ml/model/custom_model.py +22 -2
  90. snowflake/ml/model/model_signature.py +4 -4
  91. snowflake/ml/model/type_hints.py +77 -4
  92. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +3 -1
  93. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +13 -1
  94. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +1 -0
  95. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +6 -0
  96. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +1 -0
  97. snowflake/ml/modeling/cluster/affinity_propagation.py +4 -2
  98. snowflake/ml/modeling/cluster/agglomerative_clustering.py +4 -2
  99. snowflake/ml/modeling/cluster/birch.py +4 -2
  100. snowflake/ml/modeling/cluster/bisecting_k_means.py +4 -2
  101. snowflake/ml/modeling/cluster/dbscan.py +4 -2
  102. snowflake/ml/modeling/cluster/feature_agglomeration.py +4 -2
  103. snowflake/ml/modeling/cluster/k_means.py +4 -2
  104. snowflake/ml/modeling/cluster/mean_shift.py +4 -2
  105. snowflake/ml/modeling/cluster/mini_batch_k_means.py +4 -2
  106. snowflake/ml/modeling/cluster/optics.py +4 -2
  107. snowflake/ml/modeling/cluster/spectral_biclustering.py +4 -2
  108. snowflake/ml/modeling/cluster/spectral_clustering.py +4 -2
  109. snowflake/ml/modeling/cluster/spectral_coclustering.py +4 -2
  110. snowflake/ml/modeling/compose/column_transformer.py +4 -2
  111. snowflake/ml/modeling/covariance/elliptic_envelope.py +4 -2
  112. snowflake/ml/modeling/covariance/empirical_covariance.py +4 -2
  113. snowflake/ml/modeling/covariance/graphical_lasso.py +4 -2
  114. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +4 -2
  115. snowflake/ml/modeling/covariance/ledoit_wolf.py +4 -2
  116. snowflake/ml/modeling/covariance/min_cov_det.py +4 -2
  117. snowflake/ml/modeling/covariance/oas.py +4 -2
  118. snowflake/ml/modeling/covariance/shrunk_covariance.py +4 -2
  119. snowflake/ml/modeling/decomposition/dictionary_learning.py +4 -2
  120. snowflake/ml/modeling/decomposition/factor_analysis.py +4 -2
  121. snowflake/ml/modeling/decomposition/fast_ica.py +4 -2
  122. snowflake/ml/modeling/decomposition/incremental_pca.py +4 -2
  123. snowflake/ml/modeling/decomposition/kernel_pca.py +4 -2
  124. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +4 -2
  125. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +4 -2
  126. snowflake/ml/modeling/decomposition/pca.py +4 -2
  127. snowflake/ml/modeling/decomposition/sparse_pca.py +4 -2
  128. snowflake/ml/modeling/decomposition/truncated_svd.py +4 -2
  129. snowflake/ml/modeling/ensemble/isolation_forest.py +4 -2
  130. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +4 -2
  131. snowflake/ml/modeling/feature_selection/variance_threshold.py +4 -2
  132. snowflake/ml/modeling/impute/iterative_imputer.py +4 -2
  133. snowflake/ml/modeling/impute/knn_imputer.py +4 -2
  134. snowflake/ml/modeling/impute/missing_indicator.py +4 -2
  135. snowflake/ml/modeling/impute/simple_imputer.py +26 -0
  136. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +4 -2
  137. snowflake/ml/modeling/kernel_approximation/nystroem.py +4 -2
  138. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +4 -2
  139. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +4 -2
  140. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +4 -2
  141. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +4 -2
  142. snowflake/ml/modeling/manifold/isomap.py +4 -2
  143. snowflake/ml/modeling/manifold/mds.py +4 -2
  144. snowflake/ml/modeling/manifold/spectral_embedding.py +4 -2
  145. snowflake/ml/modeling/manifold/tsne.py +4 -2
  146. snowflake/ml/modeling/metrics/ranking.py +3 -0
  147. snowflake/ml/modeling/metrics/regression.py +3 -0
  148. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +4 -2
  149. snowflake/ml/modeling/mixture/gaussian_mixture.py +4 -2
  150. snowflake/ml/modeling/neighbors/kernel_density.py +4 -2
  151. snowflake/ml/modeling/neighbors/local_outlier_factor.py +4 -2
  152. snowflake/ml/modeling/neighbors/nearest_neighbors.py +4 -2
  153. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +4 -2
  154. snowflake/ml/modeling/pipeline/pipeline.py +5 -4
  155. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +43 -9
  156. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +36 -8
  157. snowflake/ml/modeling/preprocessing/polynomial_features.py +4 -2
  158. snowflake/ml/registry/_manager/model_manager.py +16 -3
  159. snowflake/ml/registry/registry.py +100 -13
  160. snowflake/ml/version.py +1 -1
  161. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/METADATA +81 -7
  162. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/RECORD +165 -139
  163. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/WHEEL +1 -1
  164. snowflake/ml/_internal/lineage/data_source.py +0 -10
  165. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/LICENSE.txt +0 -0
  166. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,7 @@ from snowflake.ml._internal.exceptions import (
33
33
  dataset_errors,
34
34
  error_codes,
35
35
  exceptions as snowml_exceptions,
36
+ sql_error_codes,
36
37
  )
37
38
  from snowflake.ml._internal.utils import identifier
38
39
  from snowflake.ml._internal.utils.sql_identifier import (
@@ -131,6 +132,10 @@ _LIST_FEATURE_VIEW_SCHEMA = StructType(
131
132
  StructField("owner", StringType()),
132
133
  StructField("desc", StringType()),
133
134
  StructField("entities", ArrayType(StringType())),
135
+ StructField("refresh_freq", StringType()),
136
+ StructField("refresh_mode", StringType()),
137
+ StructField("scheduling_state", StringType()),
138
+ StructField("warehouse", StringType()),
134
139
  ]
135
140
  )
136
141
 
@@ -201,6 +206,7 @@ class FeatureStore:
201
206
  database: str,
202
207
  name: str,
203
208
  default_warehouse: str,
209
+ *,
204
210
  creation_mode: CreationMode = CreationMode.FAIL_IF_NOT_EXIST,
205
211
  ) -> None:
206
212
  """
@@ -220,6 +226,32 @@ class FeatureStore:
220
226
  SnowflakeMLException: [ValueError] Required resources not exist when mode is FAIL_IF_NOT_EXIST.
221
227
  SnowflakeMLException: [RuntimeError] Failed to find resources.
222
228
  SnowflakeMLException: [RuntimeError] Failed to create feature store.
229
+
230
+ Example::
231
+
232
+ >>> from snowflake.ml.feature_store import (
233
+ ... FeatureStore,
234
+ ... CreationMode,
235
+ ... )
236
+ <BLANKLINE>
237
+ >>> # Create a new Feature Store:
238
+ >>> fs = FeatureStore(
239
+ ... session=session,
240
+ ... database="MYDB",
241
+ ... name="MYSCHEMA",
242
+ ... default_warehouse="MYWH",
243
+ ... creation_mode=CreationMode.CREATE_IF_NOT_EXIST
244
+ ... )
245
+ <BLANKLINE>
246
+ >>> # Connect to an existing Feature Store:
247
+ >>> fs = FeatureStore(
248
+ ... session=session,
249
+ ... database="MYDB",
250
+ ... name="MYSCHEMA",
251
+ ... default_warehouse="MYWH",
252
+ ... creation_mode=CreationMode.FAIL_IF_NOT_EXIST
253
+ ... )
254
+
223
255
  """
224
256
 
225
257
  database = SqlIdentifier(database)
@@ -267,10 +299,7 @@ class FeatureStore:
267
299
  raise snowml_exceptions.SnowflakeMLException(
268
300
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
269
301
  original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
270
- )
271
-
272
- # TODO: remove this after tag_ref_internal rollout
273
- self._use_optimized_tag_ref = self._tag_ref_internal_enabled()
302
+ ) from e
274
303
  self._check_feature_store_object_versions()
275
304
  logger.info(f"Successfully connected to feature store: {self._config.full_schema_path}.")
276
305
 
@@ -283,6 +312,16 @@ class FeatureStore:
283
312
 
284
313
  Raises:
285
314
  SnowflakeMLException: If warehouse does not exists.
315
+
316
+ Example::
317
+
318
+ >>> fs = FeatureStore(...)
319
+ >>> fs.update_default_warehouse("MYWH_2")
320
+ >>> draft_fv = FeatureView("my_fv", ...)
321
+ >>> registered_fv = fs.register_feature_view(draft_fv, '2.0')
322
+ >>> print(registered_fv.warehouse)
323
+ MYWH_2
324
+
286
325
  """
287
326
  warehouse = SqlIdentifier(warehouse_name)
288
327
  warehouse_result = self._find_object("WAREHOUSES", warehouse)
@@ -300,15 +339,27 @@ class FeatureStore:
300
339
  Register Entity in the FeatureStore.
301
340
 
302
341
  Args:
303
- entity: Entity object to register.
342
+ entity: Entity object to be registered.
304
343
 
305
344
  Returns:
306
345
  A registered entity object.
307
346
 
308
347
  Raises:
309
348
  SnowflakeMLException: [RuntimeError] Failed to find resources.
310
- """
311
349
 
350
+ Example::
351
+
352
+ >>> fs = FeatureStore(...)
353
+ >>> e = Entity('BAR', ['A'], desc='entity bar')
354
+ >>> fs.register_entity(e)
355
+ >>> fs.list_entities().show()
356
+ --------------------------------------------------
357
+ |"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
358
+ --------------------------------------------------
359
+ |BAR |["A"] |entity bar |REGTEST_RL |
360
+ --------------------------------------------------
361
+
362
+ """
312
363
  tag_name = self._get_entity_name(entity.name)
313
364
  found_rows = self._find_object("TAGS", tag_name)
314
365
  if len(found_rows) > 0:
@@ -340,12 +391,74 @@ class FeatureStore:
340
391
 
341
392
  return self.get_entity(entity.name)
342
393
 
394
+ def update_entity(self, name: str, *, desc: Optional[str] = None) -> Optional[Entity]:
395
+ """Update a registered entity with provided information.
396
+
397
+ Args:
398
+ name: Name of entity to update.
399
+ desc: Optional new description to apply. Default to None.
400
+
401
+ Raises:
402
+ SnowflakeMLException: Error happen when updating.
403
+
404
+ Returns:
405
+ A new entity with updated information or None if the entity doesn't exist.
406
+
407
+ Example::
408
+
409
+ >>> fs = FeatureStore(...)
410
+ <BLANKLINE>
411
+ >>> e = Entity(name='foo', join_keys=['COL_1'], desc='old desc')
412
+ >>> fs.list_entities().show()
413
+ ------------------------------------------------
414
+ |"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
415
+ ------------------------------------------------
416
+ |FOO |["COL_1"] |old desc |REGTEST_RL |
417
+ ------------------------------------------------
418
+ <BLANKLINE>
419
+ >>> fs.update_entity('foo', desc='NEW DESC')
420
+ >>> fs.list_entities().show()
421
+ ------------------------------------------------
422
+ |"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
423
+ ------------------------------------------------
424
+ |FOO |["COL_1"] |NEW DESC |REGTEST_RL |
425
+ ------------------------------------------------
426
+
427
+ """
428
+ name = SqlIdentifier(name)
429
+ found_rows = self.list_entities().filter(F.col("NAME") == name.resolved()).collect()
430
+
431
+ if len(found_rows) == 0:
432
+ warnings.warn(
433
+ f"Entity {name} does not exist.",
434
+ stacklevel=2,
435
+ category=UserWarning,
436
+ )
437
+ return None
438
+
439
+ new_desc = desc if desc is not None else found_rows[0]["DESC"]
440
+
441
+ try:
442
+ full_name = f"{self._config.full_schema_path}.{self._get_entity_name(name)}"
443
+ self._session.sql(f"ALTER TAG {full_name} SET COMMENT = '{new_desc}'").collect(
444
+ statement_params=self._telemetry_stmp
445
+ )
446
+ except Exception as e:
447
+ raise snowml_exceptions.SnowflakeMLException(
448
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
449
+ original_exception=RuntimeError(f"Failed to update entity `{name}`: {e}."),
450
+ ) from e
451
+
452
+ logger.info(f"Successfully updated Entity {name}.")
453
+ return self.get_entity(name)
454
+
343
455
  # TODO: add support to update column desc once SNOW-894249 is fixed
344
456
  @dispatch_decorator()
345
457
  def register_feature_view(
346
458
  self,
347
459
  feature_view: FeatureView,
348
460
  version: str,
461
+ *,
349
462
  block: bool = True,
350
463
  overwrite: bool = False,
351
464
  ) -> FeatureView:
@@ -355,12 +468,6 @@ class FeatureStore:
355
468
  NOTE: Each new materialization will trigger a full FeatureView history refresh for the data included in the
356
469
  FeatureView.
357
470
 
358
- Examples:
359
- ...
360
- draft_fv = FeatureView(name="my_fv", entities=[entities], feature_df)
361
- registered_fv = fs.register_feature_view(feature_view=draft_fv, version="v1")
362
- ...
363
-
364
471
  Args:
365
472
  feature_view: FeatureView instance to materialize.
366
473
  version: version of the registered FeatureView.
@@ -379,6 +486,35 @@ class FeatureStore:
379
486
  SnowflakeMLException: [ValueError] Warehouse or default warehouse is not specified.
380
487
  SnowflakeMLException: [RuntimeError] Failed to create dynamic table, task, or view.
381
488
  SnowflakeMLException: [RuntimeError] Failed to find resources.
489
+
490
+ Example::
491
+
492
+ >>> fs = FeatureStore(...)
493
+ >>> # draft_fv is a local object that hasn't materiaized to Snowflake backend yet.
494
+ >>> feature_df = session.sql("select f_1, f_2 from source_table")
495
+ >>> draft_fv = FeatureView("my_fv", [entities], feature_df)
496
+ >>> print(draft_fv.status)
497
+ FeatureViewStatus.DRAFT
498
+ <BLANKLINE>
499
+ >>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
500
+ -------------------------------------------
501
+ |"NAME" |"VERSION" |"SCHEDULING_STATE" |
502
+ -------------------------------------------
503
+ | | | |
504
+ -------------------------------------------
505
+ <BLANKLINE>
506
+ >>> # registered_fv is a local object that maps to a Snowflake backend object.
507
+ >>> registered_fv = fs.register_feature_view(draft_fv, "v1")
508
+ >>> print(registered_fv.status)
509
+ FeatureViewStatus.ACTIVE
510
+ <BLANKLINE>
511
+ >>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
512
+ -------------------------------------------
513
+ |"NAME" |"VERSION" |"SCHEDULING_STATE" |
514
+ -------------------------------------------
515
+ |MY_FV |v1 |ACTIVE |
516
+ -------------------------------------------
517
+
382
518
  """
383
519
  version = FeatureViewVersion(version)
384
520
 
@@ -443,7 +579,7 @@ class FeatureStore:
443
579
  column_descs,
444
580
  tagging_clause_str,
445
581
  schedule_task,
446
- self._default_warehouse,
582
+ feature_view.warehouse if feature_view.warehouse is not None else self._default_warehouse,
447
583
  block,
448
584
  overwrite,
449
585
  )
@@ -469,7 +605,13 @@ class FeatureStore:
469
605
 
470
606
  @dispatch_decorator()
471
607
  def update_feature_view(
472
- self, name: str, version: str, refresh_freq: Optional[str] = None, warehouse: Optional[str] = None
608
+ self,
609
+ name: str,
610
+ version: str,
611
+ *,
612
+ refresh_freq: Optional[str] = None,
613
+ warehouse: Optional[str] = None,
614
+ desc: Optional[str] = None,
473
615
  ) -> FeatureView:
474
616
  """Update a registered feature view.
475
617
  Check feature_view.py for which fields are allowed to be updated after registration.
@@ -479,32 +621,73 @@ class FeatureStore:
479
621
  version: version of the FeatureView to be updated.
480
622
  refresh_freq: updated refresh frequency.
481
623
  warehouse: updated warehouse.
624
+ desc: description of feature view.
482
625
 
483
626
  Returns:
484
627
  Updated FeatureView.
485
628
 
629
+ Example::
630
+
631
+ >>> fs = FeatureStore(...)
632
+ >>> fv = FeatureView(
633
+ ... name='foo',
634
+ ... entities=[e1, e2],
635
+ ... feature_df=session.sql('...'),
636
+ ... desc='this is old description',
637
+ ... )
638
+ >>> fv = fs.register_feature_view(feature_view=fv, version='v1')
639
+ >>> fs.list_feature_views().select("name", "version", "desc").show()
640
+ ------------------------------------------------
641
+ |"NAME" |"VERSION" |"DESC" |
642
+ ------------------------------------------------
643
+ |FOO |v1 |this is old description |
644
+ ------------------------------------------------
645
+ <BLANKLINE>
646
+ >>> # update_feature_view will apply new arguments to the registered feature view.
647
+ >>> new_fv = fs.update_feature_view(
648
+ ... name='foo',
649
+ ... version='v1',
650
+ ... desc='that is new descption',
651
+ ... )
652
+ >>> fs.list_feature_views().select("name", "version", "desc").show()
653
+ ------------------------------------------------
654
+ |"NAME" |"VERSION" |"DESC" |
655
+ ------------------------------------------------
656
+ |FOO |v1 |THAT IS NEW DESCRIPTION |
657
+ ------------------------------------------------
658
+
486
659
  Raises:
487
660
  SnowflakeMLException: [RuntimeError] If FeatureView is not managed and refresh_freq is defined.
488
661
  SnowflakeMLException: [RuntimeError] Failed to update feature view.
489
662
  """
490
663
  feature_view = self.get_feature_view(name=name, version=version)
491
- if refresh_freq is not None and feature_view.status == FeatureViewStatus.STATIC:
492
- full_name = f"{feature_view.name}/{feature_view.version}"
493
- raise snowml_exceptions.SnowflakeMLException(
494
- error_code=error_codes.INVALID_ARGUMENT,
495
- original_exception=RuntimeError(f"Feature view {full_name} must be non-static so that can be updated."),
496
- )
664
+ new_desc = desc if desc is not None else feature_view.desc
497
665
 
498
- warehouse = SqlIdentifier(warehouse) if warehouse else feature_view.warehouse
666
+ if feature_view.status == FeatureViewStatus.STATIC:
667
+ if refresh_freq is not None or warehouse is not None:
668
+ full_name = f"{feature_view.name}/{feature_view.version}"
669
+ raise snowml_exceptions.SnowflakeMLException(
670
+ error_code=error_codes.INVALID_ARGUMENT,
671
+ original_exception=RuntimeError(
672
+ f"Static feature view '{full_name}' does not support refresh_freq and warehouse."
673
+ ),
674
+ )
675
+ new_query = f"""
676
+ ALTER VIEW {feature_view.fully_qualified_name()} SET
677
+ COMMENT = '{new_desc}'
678
+ """
679
+ else:
680
+ warehouse = SqlIdentifier(warehouse) if warehouse else feature_view.warehouse
681
+ # TODO(@wezhou): we need to properly handle cron expr
682
+ new_query = f"""
683
+ ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
684
+ TARGET_LAG = '{refresh_freq or feature_view.refresh_freq}'
685
+ WAREHOUSE = {warehouse}
686
+ COMMENT = '{new_desc}'
687
+ """
499
688
 
500
- # TODO(@wezhou): we need to properly handle cron expr
501
689
  try:
502
- self._session.sql(
503
- f"""ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
504
- TARGET_LAG = '{refresh_freq or feature_view.refresh_freq}'
505
- WAREHOUSE = {warehouse}
506
- """
507
- ).collect(statement_params=self._telemetry_stmp)
690
+ self._session.sql(new_query).collect(statement_params=self._telemetry_stmp)
508
691
  except Exception as e:
509
692
  raise snowml_exceptions.SnowflakeMLException(
510
693
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
@@ -514,20 +697,56 @@ class FeatureStore:
514
697
  ) from e
515
698
  return self.get_feature_view(name=name, version=version)
516
699
 
517
- @dispatch_decorator()
700
+ @overload
701
+ def read_feature_view(self, feature_view: str, version: str) -> DataFrame:
702
+ ...
703
+
704
+ @overload
518
705
  def read_feature_view(self, feature_view: FeatureView) -> DataFrame:
706
+ ...
707
+
708
+ @dispatch_decorator() # type: ignore[misc]
709
+ def read_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> DataFrame:
519
710
  """
520
- Read FeatureView data.
711
+ Read values from a FeatureView.
521
712
 
522
713
  Args:
523
- feature_view: FeatureView to retrieve data from.
714
+ feature_view: A FeatureView object to read from, or the name of feature view.
715
+ If name is provided then version also must be provided.
716
+ version: Optional version of feature view. Must set when argument feature_view is a str.
524
717
 
525
718
  Returns:
526
719
  Snowpark DataFrame(lazy mode) containing the FeatureView data.
527
720
 
528
721
  Raises:
722
+ SnowflakeMLException: [ValueError] version argument is missing when argument feature_view is a str.
529
723
  SnowflakeMLException: [ValueError] FeatureView is not registered.
724
+
725
+ Example::
726
+
727
+ >>> fs = FeatureStore(...)
728
+ >>> # Read from feature view name and version.
729
+ >>> fs.read_feature_view('foo', 'v1').show()
730
+ ------------------------------------------
731
+ |"NAME" |"ID" |"TITLE" |"AGE" |"TS" |
732
+ ------------------------------------------
733
+ |jonh |1 |boss |20 |100 |
734
+ |porter |2 |manager |30 |200 |
735
+ ------------------------------------------
736
+ <BLANKLINE>
737
+ >>> # Read from feature view object.
738
+ >>> fv = fs.get_feature_view('foo', 'v1')
739
+ >>> fs.read_feature_view(fv).show()
740
+ ------------------------------------------
741
+ |"NAME" |"ID" |"TITLE" |"AGE" |"TS" |
742
+ ------------------------------------------
743
+ |jonh |1 |boss |20 |100 |
744
+ |porter |2 |manager |30 |200 |
745
+ ------------------------------------------
746
+
530
747
  """
748
+ feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
749
+
531
750
  if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
532
751
  raise snowml_exceptions.SnowflakeMLException(
533
752
  error_code=error_codes.NOT_FOUND,
@@ -539,6 +758,7 @@ class FeatureStore:
539
758
  @dispatch_decorator()
540
759
  def list_feature_views(
541
760
  self,
761
+ *,
542
762
  entity_name: Optional[str] = None,
543
763
  feature_view_name: Optional[str] = None,
544
764
  ) -> DataFrame:
@@ -553,19 +773,34 @@ class FeatureStore:
553
773
 
554
774
  Returns:
555
775
  FeatureViews information as a Snowpark DataFrame.
776
+
777
+ Example::
778
+
779
+ >>> fs = FeatureStore(...)
780
+ >>> draft_fv = FeatureView(
781
+ ... name='foo',
782
+ ... entities=[e1, e2],
783
+ ... feature_df=session.sql('...'),
784
+ ... desc='this is description',
785
+ ... )
786
+ >>> fs.register_feature_view(feature_view=draft_fv, version='v1')
787
+ >>> fs.list_feature_views().select("name", "version", "desc").show()
788
+ --------------------------------------------
789
+ |"NAME" |"VERSION" |"DESC" |
790
+ --------------------------------------------
791
+ |FOO |v1 |this is description |
792
+ --------------------------------------------
793
+
556
794
  """
557
795
  if feature_view_name is not None:
558
796
  feature_view_name = SqlIdentifier(feature_view_name)
559
797
 
560
798
  if entity_name is not None:
561
799
  entity_name = SqlIdentifier(entity_name)
562
- if self._use_optimized_tag_ref:
563
- return self._optimized_find_feature_views(entity_name, feature_view_name)
564
- else:
565
- return self._find_feature_views(entity_name, feature_view_name)
800
+ return self._optimized_find_feature_views(entity_name, feature_view_name)
566
801
  else:
567
802
  output_values: List[List[Any]] = []
568
- for row in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
803
+ for row, _ in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
569
804
  self._extract_feature_view_info(row, output_values)
570
805
  return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
571
806
 
@@ -584,6 +819,28 @@ class FeatureStore:
584
819
  Raises:
585
820
  SnowflakeMLException: [ValueError] FeatureView with name and version is not found,
586
821
  or incurred exception when reconstructing the FeatureView object.
822
+
823
+ Example::
824
+
825
+ >>> fs = FeatureStore(...)
826
+ >>> # draft_fv is a local object that hasn't materiaized to Snowflake backend yet.
827
+ >>> draft_fv = FeatureView(
828
+ ... name='foo',
829
+ ... entities=[e1],
830
+ ... feature_df=session.sql('...'),
831
+ ... desc='this is description',
832
+ ... )
833
+ >>> fs.register_feature_view(feature_view=draft_fv, version='v1')
834
+ <BLANKLINE>
835
+ >>> # fv is a local object that maps to a Snowflake backend object.
836
+ >>> fv = fs.get_feature_view('foo', 'v1')
837
+ >>> print(f"name: {fv.name}")
838
+ >>> print(f"version:{fv.version}")
839
+ >>> print(f"desc:{fv.desc}")
840
+ name: FOO
841
+ version:v1
842
+ desc:this is description
843
+
587
844
  """
588
845
  name = SqlIdentifier(name)
589
846
  version = FeatureViewVersion(version)
@@ -596,45 +853,287 @@ class FeatureStore:
596
853
  original_exception=ValueError(f"Failed to find FeatureView {name}/{version}: {results}"),
597
854
  )
598
855
 
599
- return self._compose_feature_view(results[0], self.list_entities().collect())
856
+ return self._compose_feature_view(results[0][0], results[0][1], self.list_entities().collect())
600
857
 
601
- @dispatch_decorator()
858
+ @overload
859
+ def refresh_feature_view(self, feature_view: FeatureView) -> None:
860
+ ...
861
+
862
+ @overload
863
+ def refresh_feature_view(self, feature_view: str, version: str) -> None:
864
+ ...
865
+
866
+ @dispatch_decorator() # type: ignore[misc]
867
+ def refresh_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> None:
868
+ """Manually refresh a feature view.
869
+
870
+ Args:
871
+ feature_view: A registered feature view object, or the name of feature view.
872
+ version: Optional version of feature view. Must set when argument feature_view is a str.
873
+
874
+ Example::
875
+
876
+ >>> fs = FeatureStore(...)
877
+ >>> fv = fs.get_feature_view(name='MY_FV', version='v1')
878
+ <BLANKLINE>
879
+ >>> # refresh with name and version
880
+ >>> fs.refresh_feature_view('MY_FV', 'v1')
881
+ >>> fs.get_refresh_history('MY_FV', 'v1').show()
882
+ -----------------------------------------------------------------------------------------------------
883
+ |"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
884
+ -----------------------------------------------------------------------------------------------------
885
+ |MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
886
+ -----------------------------------------------------------------------------------------------------
887
+ <BLANKLINE>
888
+ >>> # refresh with feature view object
889
+ >>> fs.refresh_feature_view(fv)
890
+ >>> fs.get_refresh_history(fv).show()
891
+ -----------------------------------------------------------------------------------------------------
892
+ |"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
893
+ -----------------------------------------------------------------------------------------------------
894
+ |MY_FV$v1 |SUCCEEDED |2024-07-10 14:54:06.680000 |2024-07-10 14:54:07.226000 |INCREMENTAL |
895
+ |MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
896
+ -----------------------------------------------------------------------------------------------------
897
+
898
+ """
899
+ feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
900
+
901
+ if feature_view.status == FeatureViewStatus.STATIC:
902
+ warnings.warn(
903
+ "Static feature view can't be refreshed. You must set refresh_freq when register_feature_view().",
904
+ stacklevel=2,
905
+ category=UserWarning,
906
+ )
907
+ return
908
+ self._update_feature_view_status(feature_view, "REFRESH")
909
+
910
+ @overload
911
+ def get_refresh_history(
912
+ self, feature_view: FeatureView, version: Optional[str] = None, *, verbose: bool = False
913
+ ) -> DataFrame:
914
+ ...
915
+
916
+ @overload
917
+ def get_refresh_history(self, feature_view: str, version: str, *, verbose: bool = False) -> DataFrame:
918
+ ...
919
+
920
+ def get_refresh_history(
921
+ self, feature_view: Union[FeatureView, str], version: Optional[str] = None, *, verbose: bool = False
922
+ ) -> DataFrame:
923
+ """Get refresh hisotry statistics about a feature view.
924
+
925
+ Args:
926
+ feature_view: A registered feature view object, or the name of feature view.
927
+ version: Optional version of feature view. Must set when argument feature_view is a str.
928
+ verbose: Return more detailed history when set true.
929
+
930
+ Returns:
931
+ A dataframe contains the refresh history information.
932
+
933
+ Example::
934
+
935
+ >>> fs = FeatureStore(...)
936
+ >>> fv = fs.get_feature_view(name='MY_FV', version='v1')
937
+ >>> # refresh with name and version
938
+ >>> fs.refresh_feature_view('MY_FV', 'v1')
939
+ >>> fs.get_refresh_history('MY_FV', 'v1').show()
940
+ -----------------------------------------------------------------------------------------------------
941
+ |"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
942
+ -----------------------------------------------------------------------------------------------------
943
+ |MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
944
+ -----------------------------------------------------------------------------------------------------
945
+ <BLANKLINE>
946
+ >>> # refresh with feature view object
947
+ >>> fs.refresh_feature_view(fv)
948
+ >>> fs.get_refresh_history(fv).show()
949
+ -----------------------------------------------------------------------------------------------------
950
+ |"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
951
+ -----------------------------------------------------------------------------------------------------
952
+ |MY_FV$v1 |SUCCEEDED |2024-07-10 14:54:06.680000 |2024-07-10 14:54:07.226000 |INCREMENTAL |
953
+ |MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
954
+ -----------------------------------------------------------------------------------------------------
955
+
956
+ """
957
+ feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
958
+
959
+ if feature_view.status == FeatureViewStatus.STATIC:
960
+ warnings.warn(
961
+ "Static feature view never refreshes.",
962
+ stacklevel=2,
963
+ category=UserWarning,
964
+ )
965
+ return self._session.create_dataframe([Row()])
966
+
967
+ if feature_view.status == FeatureViewStatus.DRAFT:
968
+ warnings.warn(
969
+ "This feature view has not been registered thus has no refresh history.",
970
+ stacklevel=2,
971
+ category=UserWarning,
972
+ )
973
+ return self._session.create_dataframe([Row()])
974
+
975
+ fv_resolved_name = FeatureView._get_physical_name(
976
+ feature_view.name,
977
+ feature_view.version, # type: ignore[arg-type]
978
+ ).resolved()
979
+ select_cols = "*" if verbose else "name, state, refresh_start_time, refresh_end_time, refresh_action"
980
+ return self._session.sql(
981
+ f"""
982
+ SELECT
983
+ {select_cols}
984
+ FROM TABLE (
985
+ {self._config.database}.INFORMATION_SCHEMA.DYNAMIC_TABLE_REFRESH_HISTORY ()
986
+ )
987
+ WHERE NAME = '{fv_resolved_name}'
988
+ AND SCHEMA_NAME = '{self._config.schema}'
989
+ """
990
+ )
991
+
992
+ @overload
602
993
  def resume_feature_view(self, feature_view: FeatureView) -> FeatureView:
994
+ ...
995
+
996
+ @overload
997
+ def resume_feature_view(self, feature_view: str, version: str) -> FeatureView:
998
+ ...
999
+
1000
+ @dispatch_decorator() # type: ignore[misc]
1001
+ def resume_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> FeatureView:
603
1002
  """
604
1003
  Resume a previously suspended FeatureView.
605
1004
 
606
1005
  Args:
607
- feature_view: FeatureView to resume.
1006
+ feature_view: FeatureView object or name to resume.
1007
+ version: Optional version of feature view. Must set when argument feature_view is a str.
608
1008
 
609
1009
  Returns:
610
1010
  A new feature view with updated status.
1011
+
1012
+ Example::
1013
+
1014
+ >>> fs = FeatureStore(...)
1015
+ >>> # you must already have feature views registered
1016
+ >>> fv = fs.get_feature_view(name='MY_FV', version='v1')
1017
+ >>> fs.suspend_feature_view('MY_FV', 'v1')
1018
+ >>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
1019
+ -------------------------------------------
1020
+ |"NAME" |"VERSION" |"SCHEDULING_STATE" |
1021
+ -------------------------------------------
1022
+ |MY_FV |v1 |SUSPENDED |
1023
+ -------------------------------------------
1024
+ <BLANKLINE>
1025
+ >>> fs.resume_feature_view('MY_FV', 'v1')
1026
+ >>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
1027
+ -------------------------------------------
1028
+ |"NAME" |"VERSION" |"SCHEDULING_STATE" |
1029
+ -------------------------------------------
1030
+ |MY_FV |v1 |ACTIVE |
1031
+ -------------------------------------------
1032
+
611
1033
  """
1034
+ feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
612
1035
  return self._update_feature_view_status(feature_view, "RESUME")
613
1036
 
614
- @dispatch_decorator()
1037
+ @overload
615
1038
  def suspend_feature_view(self, feature_view: FeatureView) -> FeatureView:
1039
+ ...
1040
+
1041
+ @overload
1042
+ def suspend_feature_view(self, feature_view: str, version: str) -> FeatureView:
1043
+ ...
1044
+
1045
+ @dispatch_decorator() # type: ignore[misc]
1046
+ def suspend_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> FeatureView:
616
1047
  """
617
1048
  Suspend an active FeatureView.
618
1049
 
619
1050
  Args:
620
- feature_view: FeatureView to suspend.
1051
+ feature_view: FeatureView object or name to suspend.
1052
+ version: Optional version of feature view. Must set when argument feature_view is a str.
621
1053
 
622
1054
  Returns:
623
1055
  A new feature view with updated status.
1056
+
1057
+ Example::
1058
+
1059
+ >>> fs = FeatureStore(...)
1060
+ >>> # assume you already have feature views registered
1061
+ >>> fv = fs.get_feature_view(name='MY_FV', version='v1')
1062
+ >>> fs.suspend_feature_view('MY_FV', 'v1')
1063
+ >>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
1064
+ -------------------------------------------
1065
+ |"NAME" |"VERSION" |"SCHEDULING_STATE" |
1066
+ -------------------------------------------
1067
+ |MY_FV |v1 |SUSPENDED |
1068
+ -------------------------------------------
1069
+ <BLANKLINE>
1070
+ >>> fs.resume_feature_view('MY_FV', 'v1')
1071
+ >>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
1072
+ -------------------------------------------
1073
+ |"NAME" |"VERSION" |"SCHEDULING_STATE" |
1074
+ -------------------------------------------
1075
+ |MY_FV |v1 |ACTIVE |
1076
+ -------------------------------------------
1077
+
624
1078
  """
1079
+ feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
625
1080
  return self._update_feature_view_status(feature_view, "SUSPEND")
626
1081
 
627
- @dispatch_decorator()
1082
+ @overload
628
1083
  def delete_feature_view(self, feature_view: FeatureView) -> None:
1084
+ ...
1085
+
1086
+ @overload
1087
+ def delete_feature_view(self, feature_view: str, version: str) -> None:
1088
+ ...
1089
+
1090
+ @dispatch_decorator() # type: ignore[misc]
1091
+ def delete_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> None:
629
1092
  """
630
1093
  Delete a FeatureView.
631
1094
 
632
1095
  Args:
633
- feature_view: FeatureView to delete.
1096
+ feature_view: FeatureView object or name to delete.
1097
+ version: Optional version of feature view. Must set when argument feature_view is a str.
634
1098
 
635
1099
  Raises:
636
1100
  SnowflakeMLException: [ValueError] FeatureView is not registered.
1101
+
1102
+ Example::
1103
+
1104
+ >>> fs = FeatureStore(...)
1105
+ >>> fv = FeatureView('FV0', ...)
1106
+ >>> fv1 = fs.register_feature_view(fv, 'FIRST')
1107
+ >>> fv2 = fs.register_feature_view(fv, 'SECOND')
1108
+ >>> fs.list_feature_views().select('NAME', 'VERSION').show()
1109
+ ----------------------
1110
+ |"NAME" |"VERSION" |
1111
+ ----------------------
1112
+ |FV0 |SECOND |
1113
+ |FV0 |FIRST |
1114
+ ----------------------
1115
+ <BLANKLINE>
1116
+ >>> # delete with name and version
1117
+ >>> fs.delete_feature_view('FV0', 'FIRST')
1118
+ >>> fs.list_feature_views().select('NAME', 'VERSION').show()
1119
+ ----------------------
1120
+ |"NAME" |"VERSION" |
1121
+ ----------------------
1122
+ |FV0 |SECOND |
1123
+ ----------------------
1124
+ <BLANKLINE>
1125
+ >>> # delete with feature view object
1126
+ >>> fs.delete_feature_view(fv2)
1127
+ >>> fs.list_feature_views().select('NAME', 'VERSION').show()
1128
+ ----------------------
1129
+ |"NAME" |"VERSION" |
1130
+ ----------------------
1131
+ | | |
1132
+ ----------------------
1133
+
637
1134
  """
1135
+ feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
1136
+
638
1137
  # TODO: we should leverage lineage graph to check downstream deps, and block the deletion
639
1138
  # if there're other FVs depending on this
640
1139
  if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
@@ -666,6 +1165,19 @@ class FeatureStore:
666
1165
 
667
1166
  Returns:
668
1167
  Snowpark DataFrame containing the results.
1168
+
1169
+ Example::
1170
+
1171
+ >>> fs = FeatureStore(...)
1172
+ >>> e_1 = Entity("my_entity", ['col_1'], desc='My first entity.')
1173
+ >>> fs.register_entity(e_1)
1174
+ >>> fs.list_entities().show()
1175
+ -----------------------------------------------------------
1176
+ |"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
1177
+ -----------------------------------------------------------
1178
+ |MY_ENTITY |["COL_1"] |My first entity. |REGTEST_RL |
1179
+ -----------------------------------------------------------
1180
+
669
1181
  """
670
1182
  prefix_len = len(_ENTITY_TAG_PREFIX) + 1
671
1183
  return cast(
@@ -695,6 +1207,19 @@ class FeatureStore:
695
1207
  SnowflakeMLException: [ValueError] Entity is not found.
696
1208
  SnowflakeMLException: [RuntimeError] Failed to retrieve tag reference information.
697
1209
  SnowflakeMLException: [RuntimeError] Failed to find resources.
1210
+
1211
+ Example::
1212
+
1213
+ >>> fs = FeatureStore(...)
1214
+ >>> # e_1 is a local object that hasn't registered to Snowflake backend yet.
1215
+ >>> e_1 = Entity("my_entity", ['col_1'], desc='My first entity.')
1216
+ >>> fs.register_entity(e_1)
1217
+ <BLANKLINE>
1218
+ >>> # e_2 is a local object that points a backend object in Snowflake.
1219
+ >>> e_2 = fs.get_entity("my_entity")
1220
+ >>> print(e_2)
1221
+ Entity(name=MY_ENTITY, join_keys=['COL_1'], owner=REGTEST_RL, desc=My first entity.)
1222
+
698
1223
  """
699
1224
  name = SqlIdentifier(name)
700
1225
  try:
@@ -725,12 +1250,33 @@ class FeatureStore:
725
1250
  Delete a previously registered Entity.
726
1251
 
727
1252
  Args:
728
- name: Entity name.
1253
+ name: Name of entity to be deleted.
729
1254
 
730
1255
  Raises:
731
1256
  SnowflakeMLException: [ValueError] Entity with given name not exists.
732
1257
  SnowflakeMLException: [RuntimeError] Failed to alter schema or drop tag.
733
1258
  SnowflakeMLException: [RuntimeError] Failed to find resources.
1259
+
1260
+ Example::
1261
+
1262
+ >>> fs = FeatureStore(...)
1263
+ >>> e_1 = Entity("my_entity", ['col_1'], desc='My first entity.')
1264
+ >>> fs.register_entity(e_1)
1265
+ >>> fs.list_entities().show()
1266
+ -----------------------------------------------------------
1267
+ |"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
1268
+ -----------------------------------------------------------
1269
+ |MY_ENTITY |["COL_1"] |My first entity. |REGTEST_RL |
1270
+ -----------------------------------------------------------
1271
+ <BLANKLINE>
1272
+ >>> fs.delete_entity("my_entity")
1273
+ >>> fs.list_entities().show()
1274
+ -------------------------------------------
1275
+ |"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
1276
+ -------------------------------------------
1277
+ | | | | |
1278
+ -------------------------------------------
1279
+
734
1280
  """
735
1281
  name = SqlIdentifier(name)
736
1282
 
@@ -764,6 +1310,7 @@ class FeatureStore:
764
1310
  self,
765
1311
  spine_df: DataFrame,
766
1312
  features: Union[List[Union[FeatureView, FeatureViewSlice]], List[str]],
1313
+ *,
767
1314
  spine_timestamp_col: Optional[str] = None,
768
1315
  exclude_columns: Optional[List[str]] = None,
769
1316
  include_feature_view_timestamp_col: bool = False,
@@ -786,6 +1333,23 @@ class FeatureStore:
786
1333
 
787
1334
  Raises:
788
1335
  ValueError: if features is empty.
1336
+
1337
+ Example::
1338
+
1339
+ >>> fs = FeatureStore(...)
1340
+ >>> # Assume you already have feature view registered.
1341
+ >>> fv = fs.get_feature_view('my_fv', 'v1')
1342
+ >>> # Spine dataframe has same join keys as the entity of fv.
1343
+ >>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
1344
+ >>> fs.retrieve_feature_values(spine_df, [fv]).show()
1345
+ --------------------
1346
+ |"END_STATION_ID" |
1347
+ --------------------
1348
+ |505 |
1349
+ |347 |
1350
+ |466 |
1351
+ --------------------
1352
+
789
1353
  """
790
1354
  if spine_timestamp_col is not None:
791
1355
  spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
@@ -807,12 +1371,97 @@ class FeatureStore:
807
1371
 
808
1372
  return df
809
1373
 
1374
+ @dispatch_decorator()
1375
+ def generate_training_set(
1376
+ self,
1377
+ spine_df: DataFrame,
1378
+ features: List[Union[FeatureView, FeatureViewSlice]],
1379
+ *,
1380
+ save_as: Optional[str] = None,
1381
+ spine_timestamp_col: Optional[str] = None,
1382
+ spine_label_cols: Optional[List[str]] = None,
1383
+ exclude_columns: Optional[List[str]] = None,
1384
+ include_feature_view_timestamp_col: bool = False,
1385
+ ) -> DataFrame:
1386
+ """
1387
+ Generate a training set from the specified Spine DataFrame and Feature Views. Result is
1388
+ materialized to a Snowflake Table if `save_as` is specified.
1389
+
1390
+ Args:
1391
+ spine_df: Snowpark DataFrame to join features into.
1392
+ features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
1393
+ save_as: If specified, a new table containing the produced result will be created. Name can be a fully
1394
+ qualified name or an unqualified name. If unqualified, defaults to the Feature Store database and schema
1395
+ spine_timestamp_col: Name of timestamp column in spine_df that will be used to join
1396
+ time-series features. If spine_timestamp_col is not none, the input features also must have
1397
+ timestamp_col.
1398
+ spine_label_cols: Name of column(s) in spine_df that contains labels.
1399
+ exclude_columns: Name of column(s) to exclude from the resulting training set.
1400
+ include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
1401
+ (if feature view has timestamp column) if set true. Default to false.
1402
+
1403
+ Returns:
1404
+ Returns a Snowpark DataFrame representing the training set.
1405
+
1406
+ Raises:
1407
+ SnowflakeMLException: [RuntimeError] Materialized table name already exists
1408
+ SnowflakeMLException: [RuntimeError] Failed to create materialized table.
1409
+
1410
+ Example::
1411
+
1412
+ >>> fs = FeatureStore(session, ...)
1413
+ >>> # Assume you already have feature view registered.
1414
+ >>> fv = fs.get_feature_view("MY_FV", "1")
1415
+ >>> # Spine dataframe has same join keys as the entity of fv.
1416
+ >>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
1417
+ >>> training_set = fs.generate_training_set(
1418
+ ... spine_df,
1419
+ ... [fv],
1420
+ ... save_as="my_training_set",
1421
+ ... )
1422
+ >>> print(type(training_set))
1423
+ <class 'snowflake.snowpark.table.Table'>
1424
+ <BLANKLINE>
1425
+ >>> print(training_set.queries)
1426
+ {'queries': ['SELECT * FROM (my_training_set)'], 'post_actions': []}
1427
+
1428
+ """
1429
+ if spine_timestamp_col is not None:
1430
+ spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
1431
+ if spine_label_cols is not None:
1432
+ spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
1433
+
1434
+ result_df, join_keys = self._join_features(
1435
+ spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
1436
+ )
1437
+
1438
+ if exclude_columns is not None:
1439
+ result_df = self._exclude_columns(result_df, exclude_columns)
1440
+
1441
+ if save_as is not None:
1442
+ try:
1443
+ save_as = self._get_fully_qualified_name(save_as)
1444
+ result_df.write.mode("errorifexists").save_as_table(save_as)
1445
+ return self._session.table(save_as)
1446
+ except SnowparkSQLException as e:
1447
+ if e.sql_error_code == sql_error_codes.OBJECT_ALREADY_EXISTS:
1448
+ raise snowml_exceptions.SnowflakeMLException(
1449
+ error_code=error_codes.OBJECT_ALREADY_EXISTS,
1450
+ original_exception=RuntimeError(str(e)),
1451
+ ) from e
1452
+ raise snowml_exceptions.SnowflakeMLException(
1453
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1454
+ original_exception=RuntimeError(f"An error occurred during training set materialization: {e}."),
1455
+ ) from e
1456
+ return result_df
1457
+
810
1458
  @overload
811
1459
  def generate_dataset(
812
1460
  self,
813
1461
  name: str,
814
1462
  spine_df: DataFrame,
815
1463
  features: List[Union[FeatureView, FeatureViewSlice]],
1464
+ *,
816
1465
  version: Optional[str] = None,
817
1466
  spine_timestamp_col: Optional[str] = None,
818
1467
  spine_label_cols: Optional[List[str]] = None,
@@ -829,6 +1478,7 @@ class FeatureStore:
829
1478
  name: str,
830
1479
  spine_df: DataFrame,
831
1480
  features: List[Union[FeatureView, FeatureViewSlice]],
1481
+ *,
832
1482
  output_type: Literal["table"],
833
1483
  version: Optional[str] = None,
834
1484
  spine_timestamp_col: Optional[str] = None,
@@ -845,6 +1495,7 @@ class FeatureStore:
845
1495
  name: str,
846
1496
  spine_df: DataFrame,
847
1497
  features: List[Union[FeatureView, FeatureViewSlice]],
1498
+ *,
848
1499
  version: Optional[str] = None,
849
1500
  spine_timestamp_col: Optional[str] = None,
850
1501
  spine_label_cols: Optional[List[str]] = None,
@@ -859,7 +1510,7 @@ class FeatureStore:
859
1510
  Args:
860
1511
  name: The name of the Dataset to be generated. Datasets are uniquely identified within a schema
861
1512
  by their name and version.
862
- spine_df: The fact table contains the raw dataset.
1513
+ spine_df: Snowpark DataFrame to join features into.
863
1514
  features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
864
1515
  version: The version of the Dataset to be generated. If none specified, the current timestamp
865
1516
  will be used instead.
@@ -867,84 +1518,106 @@ class FeatureStore:
867
1518
  time-series features. If spine_timestamp_col is not none, the input features also must have
868
1519
  timestamp_col.
869
1520
  spine_label_cols: Name of column(s) in spine_df that contains labels.
870
- exclude_columns: Column names to exclude from the result dataframe.
871
- The underlying storage will still contain the columns.
1521
+ exclude_columns: Name of column(s) to exclude from the resulting training set.
872
1522
  include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
873
1523
  (if feature view has timestamp column) if set true. Default to false.
874
1524
  desc: A description about this dataset.
875
- output_type: The type of Snowflake storage to use for the generated training data.
1525
+ output_type: (Deprecated) The type of Snowflake storage to use for the generated training data.
876
1526
 
877
1527
  Returns:
878
1528
  If output_type is "dataset" (default), returns a Dataset object.
879
1529
  If output_type is "table", returns a Snowpark DataFrame representing the table.
880
1530
 
881
1531
  Raises:
882
- SnowflakeMLException: [ValueError] Dataset name/version already exists
883
- SnowflakeMLException: [ValueError] Snapshot creation failed.
884
1532
  SnowflakeMLException: [ValueError] Invalid output_type specified.
885
- SnowflakeMLException: [RuntimeError] Failed to create clone from table.
1533
+ SnowflakeMLException: [RuntimeError] Dataset name/version already exists.
886
1534
  SnowflakeMLException: [RuntimeError] Failed to find resources.
1535
+
1536
+ Example::
1537
+
1538
+ >>> fs = FeatureStore(session, ...)
1539
+ >>> # Assume you already have feature view registered.
1540
+ >>> fv = fs.get_feature_view("MY_FV", "1")
1541
+ >>> # Spine dataframe has same join keys as the entity of fv.
1542
+ >>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
1543
+ >>> my_dataset = fs.generate_dataset(
1544
+ ... "my_dataset"
1545
+ ... spine_df,
1546
+ ... [fv],
1547
+ ... )
1548
+ >>> # Current timestamp will be used as default version name.
1549
+ >>> # You can explicitly overwrite by setting a version.
1550
+ >>> my_dataset.list_versions()
1551
+ ['2024_07_12_11_26_22']
1552
+ <BLANKLINE>
1553
+ >>> my_dataset.read.to_snowpark_dataframe().show(n=3)
1554
+ -------------------------------------------------------
1555
+ |"QUALITY" |"FIXED_ACIDITY" |"VOLATILE_ACIDITY" |
1556
+ -------------------------------------------------------
1557
+ |3 |11.600000381469727 |0.5799999833106995 |
1558
+ |3 |8.300000190734863 |1.0199999809265137 |
1559
+ |3 |7.400000095367432 |1.184999942779541 |
1560
+ -------------------------------------------------------
1561
+
887
1562
  """
888
1563
  if output_type not in {"table", "dataset"}:
889
1564
  raise snowml_exceptions.SnowflakeMLException(
890
1565
  error_code=error_codes.INVALID_ARGUMENT,
891
1566
  original_exception=ValueError(f"Invalid output_type: {output_type}."),
892
1567
  )
893
- if spine_timestamp_col is not None:
894
- spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
895
- if spine_label_cols is not None:
896
- spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
897
-
898
- result_df, join_keys = self._join_features(
899
- spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
900
- )
901
1568
 
902
1569
  # Convert name to fully qualified name if not already fully qualified
903
- db_name, schema_name, object_name, _ = identifier.parse_schema_level_object_identifier(name)
904
- name = "{}.{}.{}".format(
905
- db_name or self._config.database,
906
- schema_name or self._config.schema,
907
- object_name,
908
- )
1570
+ name = self._get_fully_qualified_name(name)
909
1571
  version = version or datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
910
1572
 
911
- if exclude_columns is not None:
912
- result_df = self._exclude_columns(result_df, exclude_columns)
913
-
914
1573
  fs_meta = FeatureStoreMetadata(
915
1574
  spine_query=spine_df.queries["queries"][-1],
916
1575
  serialized_feature_views=[fv.to_json() for fv in features],
917
1576
  spine_timestamp_col=spine_timestamp_col,
918
1577
  )
919
1578
 
1579
+ # Only set a save_as name if output_type is table
1580
+ table_name = f"{name}_{version}" if output_type == "table" else None
1581
+ result_df = self.generate_training_set(
1582
+ spine_df,
1583
+ features,
1584
+ spine_timestamp_col=spine_timestamp_col,
1585
+ spine_label_cols=spine_label_cols,
1586
+ exclude_columns=exclude_columns,
1587
+ include_feature_view_timestamp_col=include_feature_view_timestamp_col,
1588
+ save_as=table_name,
1589
+ )
1590
+ if output_type == "table":
1591
+ warnings.warn(
1592
+ "Generating a table from generate_dataset() is deprecated and will be removed in a future release,"
1593
+ " use generate_training_set() instead.",
1594
+ DeprecationWarning,
1595
+ stacklevel=2,
1596
+ )
1597
+ return result_df
1598
+
920
1599
  try:
921
- if output_type == "table":
922
- table_name = f"{name}_{version}"
923
- result_df.write.mode("errorifexists").save_as_table(table_name)
924
- ds_df = self._session.table(table_name)
925
- return ds_df
926
- else:
927
- assert output_type == "dataset"
928
- if not self._is_dataset_enabled():
929
- raise snowml_exceptions.SnowflakeMLException(
930
- error_code=error_codes.SNOWML_CREATE_FAILED,
931
- original_exception=RuntimeError(
932
- "Dataset is not enabled in your account. Ask your account admin to set"
933
- ' FEATURE_DATASET=ENABLED or set output_type="table" to generate the data'
934
- " as a Snowflake Table instead."
935
- ),
936
- )
937
- ds: dataset.Dataset = dataset.create_from_dataframe(
938
- self._session,
939
- name,
940
- version,
941
- input_dataframe=result_df,
942
- exclude_cols=[spine_timestamp_col],
943
- label_cols=spine_label_cols,
944
- properties=fs_meta,
945
- comment=desc,
1600
+ assert output_type == "dataset"
1601
+ if not self._is_dataset_enabled():
1602
+ raise snowml_exceptions.SnowflakeMLException(
1603
+ error_code=error_codes.SNOWML_CREATE_FAILED,
1604
+ original_exception=RuntimeError(
1605
+ "Dataset is not enabled in your account. Ask your account admin to set"
1606
+ " FEATURE_DATASET=ENABLED or use generate_training_set() instead"
1607
+ " to generate the data as a Snowflake Table."
1608
+ ),
946
1609
  )
947
- return ds
1610
+ ds: dataset.Dataset = dataset.create_from_dataframe(
1611
+ self._session,
1612
+ name,
1613
+ version,
1614
+ input_dataframe=result_df,
1615
+ exclude_cols=[spine_timestamp_col] if spine_timestamp_col is not None else [],
1616
+ label_cols=spine_label_cols,
1617
+ properties=fs_meta,
1618
+ comment=desc,
1619
+ )
1620
+ return ds
948
1621
 
949
1622
  except dataset_errors.DatasetExistError as e:
950
1623
  raise snowml_exceptions.SnowflakeMLException(
@@ -970,6 +1643,32 @@ class FeatureStore:
970
1643
 
971
1644
  Raises:
972
1645
  ValueError: if dataset object is not generated from feature store.
1646
+
1647
+ Example::
1648
+
1649
+ >>> fs = FeatureStore(session, ...)
1650
+ >>> # Assume you already have feature view registered.
1651
+ >>> fv = fs.get_feature_view("MY_FV", "1.0")
1652
+ >>> # Spine dataframe has same join keys as the entity of fv.
1653
+ >>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
1654
+ >>> my_dataset = fs.generate_dataset(
1655
+ ... "my_dataset"
1656
+ ... spine_df,
1657
+ ... [fv],
1658
+ ... )
1659
+ >>> fvs = fs.load_feature_views_from_dataset(my_dataset)
1660
+ >>> print(len(fvs))
1661
+ 1
1662
+ <BLANKLINE>
1663
+ >>> print(type(fvs[0]))
1664
+ <class 'snowflake.ml.feature_store.feature_view.FeatureView'>
1665
+ <BLANKLINE>
1666
+ >>> print(fvs[0].name)
1667
+ MY_FV
1668
+ <BLANKLINE>
1669
+ >>> print(fvs[0].version)
1670
+ 1.0
1671
+
973
1672
  """
974
1673
  assert ds.selected_version is not None
975
1674
  source_meta = ds.selected_version._get_metadata()
@@ -1007,11 +1706,11 @@ class FeatureStore:
1007
1706
  if dryrun:
1008
1707
  logger.info(
1009
1708
  "Following feature views and entities will be deleted."
1010
- + " Set 'dryrun=False' to perform the actual deletion."
1709
+ + " Set 'dryrun=False' to perform the actual deletion.",
1011
1710
  )
1012
1711
  logger.info(f"Total {len(all_fvs_rows)} Feature views to be deleted:")
1013
1712
  all_fvs_df.show(n=len(all_fvs_rows))
1014
- logger.info(f"\nTotal {len(all_entities_rows)} entities to be deleted:")
1713
+ logger.info(f"\nTotal {len(all_entities_rows)} Entities to be deleted:")
1015
1714
  all_entities_df.show(n=len(all_entities_rows))
1016
1715
  return
1017
1716
 
@@ -1391,20 +2090,36 @@ class FeatureStore:
1391
2090
  return SqlIdentifier(identifier.concat_names([_ENTITY_TAG_PREFIX, raw_name]))
1392
2091
 
1393
2092
  def _get_fully_qualified_name(self, name: Union[SqlIdentifier, str]) -> str:
1394
- return f"{self._config.full_schema_path}.{name}"
2093
+ # Do a quick check to see if we can skip regex operations
2094
+ if "." not in name:
2095
+ return f"{self._config.full_schema_path}.{name}"
2096
+
2097
+ db_name, schema_name, object_name, _ = identifier.parse_schema_level_object_identifier(name)
2098
+ return "{}.{}.{}".format(
2099
+ db_name or self._config.database,
2100
+ schema_name or self._config.schema,
2101
+ object_name,
2102
+ )
1395
2103
 
1396
2104
  # TODO: SHOW DYNAMIC TABLES is very slow while other show objects are fast, investigate with DT in SNOW-902804.
1397
2105
  def _get_fv_backend_representations(
1398
2106
  self, object_name: Optional[SqlIdentifier], prefix_match: bool = False
1399
- ) -> List[Row]:
1400
- dynamic_table_results = self._find_object("DYNAMIC TABLES", object_name, prefix_match)
1401
- view_results = self._find_object("VIEWS", object_name, prefix_match)
2107
+ ) -> List[Tuple[Row, _FeatureStoreObjTypes]]:
2108
+ dynamic_table_results = [
2109
+ (d, _FeatureStoreObjTypes.MANAGED_FEATURE_VIEW)
2110
+ for d in self._find_object("DYNAMIC TABLES", object_name, prefix_match)
2111
+ ]
2112
+ view_results = [
2113
+ (d, _FeatureStoreObjTypes.EXTERNAL_FEATURE_VIEW)
2114
+ for d in self._find_object("VIEWS", object_name, prefix_match)
2115
+ ]
1402
2116
  return dynamic_table_results + view_results
1403
2117
 
1404
2118
  def _update_feature_view_status(self, feature_view: FeatureView, operation: str) -> FeatureView:
1405
2119
  assert operation in [
1406
2120
  "RESUME",
1407
2121
  "SUSPEND",
2122
+ "REFRESH",
1408
2123
  ], f"Operation: {operation} not supported"
1409
2124
  if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
1410
2125
  raise snowml_exceptions.SnowflakeMLException(
@@ -1417,9 +2132,10 @@ class FeatureStore:
1417
2132
  self._session.sql(f"ALTER DYNAMIC TABLE {fully_qualified_name} {operation}").collect(
1418
2133
  statement_params=self._telemetry_stmp
1419
2134
  )
1420
- self._session.sql(f"ALTER TASK IF EXISTS {fully_qualified_name} {operation}").collect(
1421
- statement_params=self._telemetry_stmp
1422
- )
2135
+ if operation != "REFRESH":
2136
+ self._session.sql(f"ALTER TASK IF EXISTS {fully_qualified_name} {operation}").collect(
2137
+ statement_params=self._telemetry_stmp
2138
+ )
1423
2139
  except Exception as e:
1424
2140
  raise snowml_exceptions.SnowflakeMLException(
1425
2141
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
@@ -1438,46 +2154,28 @@ class FeatureStore:
1438
2154
  # TODO: this can be optimized further by directly getting all possible FVs and filter by tag
1439
2155
  # it's easier to rewrite the code once we can remove the tag_reference path
1440
2156
  all_fvs = self._get_fv_backend_representations(object_name=None)
1441
- fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
2157
+ fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r, _ in all_fvs}
1442
2158
 
1443
2159
  if len(fv_maps.keys()) == 0:
1444
2160
  return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1445
2161
 
1446
- filter_clause = f"WHERE OBJECT_NAME LIKE '{feature_view_name.resolved()}%'" if feature_view_name else ""
1447
- try:
1448
- res = self._session.sql(
1449
- f"""
1450
- SELECT
1451
- OBJECT_NAME
1452
- FROM TABLE(
1453
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1454
- TAG_NAME => '{self._get_fully_qualified_name(self._get_entity_name(entity_name))}'
1455
- )
1456
- ) {filter_clause}"""
1457
- ).collect(statement_params=self._telemetry_stmp)
1458
- except Exception as e:
1459
- raise snowml_exceptions.SnowflakeMLException(
1460
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1461
- original_exception=RuntimeError(f"Failed to find feature views' by entity {entity_name}: {e}"),
1462
- ) from e
2162
+ filters = (
2163
+ [lambda d: d["entityName"].startswith(feature_view_name.resolved())] # type: ignore[union-attr]
2164
+ if feature_view_name
2165
+ else None
2166
+ )
2167
+ res = self._lookup_tagged_objects(self._get_entity_name(entity_name), filters)
1463
2168
 
1464
2169
  output_values: List[List[Any]] = []
1465
2170
  for r in res:
1466
- row = fv_maps[SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)]
2171
+ row = fv_maps[SqlIdentifier(r["entityName"], case_sensitive=True)]
1467
2172
  self._extract_feature_view_info(row, output_values)
1468
2173
 
1469
2174
  return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
1470
2175
 
1471
2176
  def _extract_feature_view_info(self, row: Row, output_values: List[List[Any]]) -> None:
1472
2177
  name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
1473
- m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
1474
- if m is None:
1475
- raise snowml_exceptions.SnowflakeMLException(
1476
- error_code=error_codes.INTERNAL_SNOWML_ERROR,
1477
- original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
1478
- )
1479
-
1480
- fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
2178
+ fv_metadata, _ = self._lookup_feature_view_metadata(row, FeatureView._get_physical_name(name, version))
1481
2179
 
1482
2180
  values: List[Any] = []
1483
2181
  values.append(name)
@@ -1488,63 +2186,42 @@ class FeatureStore:
1488
2186
  values.append(row["owner"])
1489
2187
  values.append(row["comment"])
1490
2188
  values.append(fv_metadata.entities)
2189
+ values.append(row["target_lag"] if "target_lag" in row else None)
2190
+ values.append(row["refresh_mode"] if "refresh_mode" in row else None)
2191
+ values.append(row["scheduling_state"] if "scheduling_state" in row else None)
2192
+ values.append(row["warehouse"] if "warehouse" in row else None)
1491
2193
  output_values.append(values)
1492
2194
 
1493
- def _find_feature_views(self, entity_name: SqlIdentifier, feature_view_name: Optional[SqlIdentifier]) -> DataFrame:
1494
- if not self._validate_entity_exists(entity_name):
1495
- return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1496
-
1497
- all_fvs = self._get_fv_backend_representations(object_name=None)
1498
- fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
1499
-
1500
- if len(fv_maps.keys()) == 0:
1501
- return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1502
-
1503
- # NOTE: querying INFORMATION_SCHEMA for Entity lineage can be expensive depending on how many active
1504
- # FeatureViews there are. If this ever become an issue, consider exploring improvements.
1505
- try:
1506
- queries = [
1507
- f"""
1508
- SELECT
1509
- TAG_VALUE,
1510
- OBJECT_NAME
1511
- FROM TABLE(
1512
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1513
- '{self._get_fully_qualified_name(fv_name)}',
1514
- 'table'
1515
- )
1516
- )
1517
- WHERE LEVEL = 'TABLE'
1518
- AND TAG_NAME = '{_FEATURE_VIEW_METADATA_TAG}'
1519
- """
1520
- for fv_name in fv_maps.keys()
1521
- ]
1522
-
1523
- results = self._session.sql("\nUNION\n".join(queries)).collect(statement_params=self._telemetry_stmp)
1524
- except Exception as e:
1525
- raise snowml_exceptions.SnowflakeMLException(
1526
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1527
- original_exception=RuntimeError(f"Failed to retrieve feature views' information: {e}"),
1528
- ) from e
1529
-
1530
- output_values: List[List[Any]] = []
1531
- for r in results:
1532
- fv_metadata = _FeatureViewMetadata.from_json(r["TAG_VALUE"])
1533
- for retrieved_entity in fv_metadata.entities:
1534
- if entity_name == SqlIdentifier(retrieved_entity, case_sensitive=True):
1535
- fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
1536
- fv_name = SqlIdentifier(fv_name, case_sensitive=True)
1537
- obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
1538
- if feature_view_name is not None:
1539
- if fv_name == feature_view_name:
1540
- self._extract_feature_view_info(fv_maps[obj_name], output_values)
1541
- else:
1542
- continue
1543
- else:
1544
- self._extract_feature_view_info(fv_maps[obj_name], output_values)
1545
- return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
2195
+ def _lookup_feature_view_metadata(self, row: Row, fv_name: str) -> Tuple[_FeatureViewMetadata, str]:
2196
+ if len(row["text"]) == 0:
2197
+ # NOTE: if this is a shared feature view, then text column will be empty due to privacy constraints.
2198
+ # So instead of looking at original query text, we will obtain metadata by querying the tag value.
2199
+ # For query body, we will just use a simple select instead of original DDL query since shared feature views
2200
+ # are read-only.
2201
+ try:
2202
+ res = self._lookup_tags(
2203
+ domain="table", obj_name=fv_name, filter_fns=[lambda d: d["tagName"] == _FEATURE_VIEW_METADATA_TAG]
2204
+ )
2205
+ fv_metadata = _FeatureViewMetadata.from_json(res[0]["tagValue"])
2206
+ query = f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}"
2207
+ return (fv_metadata, query)
2208
+ except Exception as e:
2209
+ raise snowml_exceptions.SnowflakeMLException(
2210
+ error_code=error_codes.INTERNAL_SNOWML_ERROR,
2211
+ original_exception=RuntimeError(f"Failed to extract feature_view metadata for {fv_name}: {e}."),
2212
+ )
2213
+ else:
2214
+ m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
2215
+ if m is None:
2216
+ raise snowml_exceptions.SnowflakeMLException(
2217
+ error_code=error_codes.INTERNAL_SNOWML_ERROR,
2218
+ original_exception=RuntimeError(f"Failed to parse query text for FeatureView {fv_name}: {row}."),
2219
+ )
2220
+ fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
2221
+ query = m.group("query")
2222
+ return (fv_metadata, query)
1546
2223
 
1547
- def _compose_feature_view(self, row: Row, entity_list: List[Row]) -> FeatureView:
2224
+ def _compose_feature_view(self, row: Row, obj_type: _FeatureStoreObjTypes, entity_list: List[Row]) -> FeatureView:
1548
2225
  def find_and_compose_entity(name: str) -> Entity:
1549
2226
  name = SqlIdentifier(name).resolved()
1550
2227
  for e in entity_list:
@@ -1558,21 +2235,14 @@ class FeatureStore:
1558
2235
 
1559
2236
  name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
1560
2237
  name = SqlIdentifier(name, case_sensitive=True)
1561
- m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
1562
- if m is None:
1563
- raise snowml_exceptions.SnowflakeMLException(
1564
- error_code=error_codes.INTERNAL_SNOWML_ERROR,
1565
- original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
1566
- )
1567
-
1568
2238
  fv_name = FeatureView._get_physical_name(name, version)
2239
+ fv_metadata, query = self._lookup_feature_view_metadata(row, fv_name)
2240
+
1569
2241
  infer_schema_df = self._session.sql(f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}")
2242
+ desc = row["comment"]
1570
2243
 
1571
- if m.group("obj_type") == "DYNAMIC TABLE":
1572
- query = m.group("query")
2244
+ if obj_type == _FeatureStoreObjTypes.MANAGED_FEATURE_VIEW:
1573
2245
  df = self._session.sql(query)
1574
- desc = m.group("comment")
1575
- fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1576
2246
  entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1577
2247
  ts_col = fv_metadata.timestamp_col
1578
2248
  timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
@@ -1584,23 +2254,25 @@ class FeatureStore:
1584
2254
  timestamp_col=timestamp_col,
1585
2255
  desc=desc,
1586
2256
  version=version,
1587
- status=FeatureViewStatus(row["scheduling_state"]),
2257
+ status=FeatureViewStatus(row["scheduling_state"])
2258
+ if len(row["scheduling_state"]) > 0
2259
+ else FeatureViewStatus.MASKED,
1588
2260
  feature_descs=self._fetch_column_descs("DYNAMIC TABLE", fv_name),
1589
2261
  refresh_freq=row["target_lag"],
1590
2262
  database=self._config.database.identifier(),
1591
2263
  schema=self._config.schema.identifier(),
1592
- warehouse=SqlIdentifier(row["warehouse"], case_sensitive=True).identifier(),
2264
+ warehouse=SqlIdentifier(row["warehouse"], case_sensitive=True).identifier()
2265
+ if len(row["warehouse"]) > 0
2266
+ else None,
1593
2267
  refresh_mode=row["refresh_mode"],
1594
2268
  refresh_mode_reason=row["refresh_mode_reason"],
1595
2269
  owner=row["owner"],
1596
2270
  infer_schema_df=infer_schema_df,
2271
+ session=self._session,
1597
2272
  )
1598
2273
  return fv
1599
2274
  else:
1600
- query = m.group("query")
1601
2275
  df = self._session.sql(query)
1602
- desc = m.group("comment")
1603
- fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1604
2276
  entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1605
2277
  ts_col = fv_metadata.timestamp_col
1606
2278
  timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
@@ -1622,6 +2294,7 @@ class FeatureStore:
1622
2294
  refresh_mode_reason=None,
1623
2295
  owner=row["owner"],
1624
2296
  infer_schema_df=infer_schema_df,
2297
+ session=self._session,
1625
2298
  )
1626
2299
  return fv
1627
2300
 
@@ -1675,42 +2348,10 @@ class FeatureStore:
1675
2348
  )
1676
2349
  # There could be none-FS objects under FS schema, thus filter on objects with FS special tag.
1677
2350
  if object_type not in tag_free_object_types and len(all_rows) > 0:
1678
- if self._use_optimized_tag_ref:
1679
- fs_obj_rows = self._session.sql(
1680
- f"""
1681
- SELECT
1682
- OBJECT_NAME
1683
- FROM TABLE(
1684
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1685
- TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
1686
- )
1687
- )
1688
- WHERE DOMAIN='{obj_domain}'
1689
- """
1690
- ).collect(statement_params=self._telemetry_stmp)
1691
- else:
1692
- # TODO: remove this after tag_ref_internal rollout
1693
- # Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
1694
- # use double quotes to make it case-sensitive.
1695
- queries = [
1696
- f"""
1697
- SELECT OBJECT_NAME
1698
- FROM TABLE(
1699
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1700
- '{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
1701
- '{obj_domain}'
1702
- )
1703
- )
1704
- WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
1705
- AND TAG_SCHEMA = '{self._config.schema.resolved()}'
1706
- """
1707
- for row in all_rows
1708
- ]
1709
- fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
1710
- statement_params=self._telemetry_stmp
1711
- )
1712
-
1713
- fs_tag_objects = [row["OBJECT_NAME"] for row in fs_obj_rows]
2351
+ fs_obj_rows = self._lookup_tagged_objects(
2352
+ _FEATURE_STORE_OBJECT_TAG, [lambda d: d["domain"] == obj_domain]
2353
+ )
2354
+ fs_tag_objects = [row["entityName"] for row in fs_obj_rows]
1714
2355
  except Exception as e:
1715
2356
  raise snowml_exceptions.SnowflakeMLException(
1716
2357
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
@@ -1756,21 +2397,6 @@ class FeatureStore:
1756
2397
  )
1757
2398
  return cast(DataFrame, df.drop(exclude_columns))
1758
2399
 
1759
- def _tag_ref_internal_enabled(self) -> bool:
1760
- try:
1761
- self._session.sql(
1762
- f"""
1763
- SELECT * FROM TABLE(
1764
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1765
- TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
1766
- )
1767
- ) LIMIT 1;
1768
- """
1769
- ).collect()
1770
- return True
1771
- except Exception:
1772
- return False
1773
-
1774
2400
  def _is_dataset_enabled(self) -> bool:
1775
2401
  try:
1776
2402
  self._session.sql(f"SHOW DATASETS IN SCHEMA {self._config.full_schema_path}").collect()
@@ -1790,21 +2416,98 @@ class FeatureStore:
1790
2416
  category=UserWarning,
1791
2417
  )
1792
2418
 
1793
- def _collapse_object_versions(self) -> List[pkg_version.Version]:
1794
- if not self._use_optimized_tag_ref:
1795
- return []
2419
+ def _filter_results(
2420
+ self, results: List[Dict[str, str]], filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
2421
+ ) -> List[Dict[str, str]]:
2422
+ if filter_fns is None:
2423
+ return results
1796
2424
 
1797
- query = f"""
1798
- SELECT
1799
- TAG_VALUE
1800
- FROM TABLE(
1801
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1802
- TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
1803
- )
1804
- )
2425
+ filtered_results = []
2426
+ for r in results:
2427
+ if all([fn(r) for fn in filter_fns]):
2428
+ filtered_results.append(r)
2429
+ return filtered_results
2430
+
2431
+ def _lookup_tags(
2432
+ self, domain: str, obj_name: str, filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
2433
+ ) -> List[Dict[str, str]]:
1805
2434
  """
2435
+ Lookup tag values for a given object, optionally apply filters on the results.
2436
+
2437
+ Args:
2438
+ domain: Domain of the obj to look for tag. E.g. table
2439
+ obj_name: Name of the obj.
2440
+ filter_fns: List of filter functions applied on the results.
2441
+
2442
+ Returns:
2443
+ List of tag values in dictionary format.
2444
+
2445
+ Raises:
2446
+ SnowflakeMLException: [RuntimeError] Failed to lookup tags.
2447
+
2448
+ Example::
2449
+
2450
+ self._lookup_tags("TABLE", "MY_FV", [lambda d: d["tagName"] == "TARGET_TAG_NAME"])
2451
+
2452
+ """
2453
+ # NOTE: use ENTITY_DETAIL system fn to query tags for given object for it to work in
2454
+ # processes using owner's right. e.g. Streamlit, or stored procedure
1806
2455
  try:
1807
- res = self._session.sql(query).collect(statement_params=self._telemetry_stmp)
2456
+ res = self._session.sql(
2457
+ f"""
2458
+ SELECT ENTITY_DETAIL('{domain}','{self._get_fully_qualified_name(obj_name)}', '["TAG_REFERENCES"]');
2459
+ """
2460
+ ).collect(statement_params=self._telemetry_stmp)
2461
+ entity_detail = json.loads(res[0][0])
2462
+ results = entity_detail["tagReferencesInfo"]["tagReferenceList"]
2463
+ return self._filter_results(results, filter_fns)
2464
+ except Exception as e:
2465
+ raise snowml_exceptions.SnowflakeMLException(
2466
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
2467
+ original_exception=RuntimeError(f"Failed to lookup tags for object for {obj_name}: {e}"),
2468
+ ) from e
2469
+
2470
+ def _lookup_tagged_objects(
2471
+ self, tag_name: str, filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
2472
+ ) -> List[Dict[str, str]]:
2473
+ """
2474
+ Lookup objects based on specified tag name, optionally apply filters on the results.
2475
+
2476
+ Args:
2477
+ tag_name: Name of the tag.
2478
+ filter_fns: List of filter functions applied on the results.
2479
+
2480
+ Returns:
2481
+ List of objects in dictionary format.
2482
+
2483
+ Raises:
2484
+ SnowflakeMLException: [RuntimeError] Failed to lookup tagged objects.
2485
+
2486
+ Example::
2487
+
2488
+ self._lookup_tagged_objects("TARGET_TAG_NAME", [lambda d: d["entityName"] == "MY_FV"])
2489
+
2490
+ """
2491
+ # NOTE: use ENTITY_DETAIL system fn to query objects from tag for it to work in
2492
+ # processes using owner's right. e.g. Streamlit, or stored procedure
2493
+ try:
2494
+ res = self._session.sql(
2495
+ f"""
2496
+ SELECT ENTITY_DETAIL('TAG','{self._get_fully_qualified_name(tag_name)}', '["TAG_REFERENCES_INTERNAL"]');
2497
+ """
2498
+ ).collect(statement_params=self._telemetry_stmp)
2499
+ entity_detail = json.loads(res[0][0])
2500
+ results = entity_detail["referencedEntities"]["tagReferenceList"]
2501
+ return self._filter_results(results, filter_fns)
2502
+ except Exception as e:
2503
+ raise snowml_exceptions.SnowflakeMLException(
2504
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
2505
+ original_exception=RuntimeError(f"Failed to lookup tagged objects for {tag_name}: {e}"),
2506
+ ) from e
2507
+
2508
+ def _collapse_object_versions(self) -> List[pkg_version.Version]:
2509
+ try:
2510
+ res = self._lookup_tagged_objects(_FEATURE_STORE_OBJECT_TAG)
1808
2511
  except Exception:
1809
2512
  # since this is a best effort user warning to upgrade pkg versions
1810
2513
  # we are treating failures as benign error
@@ -1812,7 +2515,7 @@ class FeatureStore:
1812
2515
  versions = set()
1813
2516
  compatibility_breakage_detected = False
1814
2517
  for r in res:
1815
- info = _FeatureStoreObjInfo.from_json(r["TAG_VALUE"])
2518
+ info = _FeatureStoreObjInfo.from_json(r["tagValue"])
1816
2519
  if info.type == _FeatureStoreObjTypes.UNKNOWN:
1817
2520
  compatibility_breakage_detected = True
1818
2521
  versions.add(pkg_version.parse(info.pkg_version))
@@ -1827,3 +2530,23 @@ class FeatureStore:
1827
2530
  ),
1828
2531
  )
1829
2532
  return sorted_versions
2533
+
2534
+ def _validate_feature_view_name_and_version_input(
2535
+ self, feature_view: Union[FeatureView, str], version: Optional[str] = None
2536
+ ) -> FeatureView:
2537
+ if isinstance(feature_view, str):
2538
+ if version is None:
2539
+ raise snowml_exceptions.SnowflakeMLException(
2540
+ error_code=error_codes.INVALID_ARGUMENT,
2541
+ original_exception=ValueError("Version must be provided when argument feature_view is a str."),
2542
+ )
2543
+ feature_view = self.get_feature_view(feature_view, version)
2544
+ elif not isinstance(feature_view, FeatureView):
2545
+ raise snowml_exceptions.SnowflakeMLException(
2546
+ error_code=error_codes.INVALID_ARGUMENT,
2547
+ original_exception=ValueError(
2548
+ "Invalid type of argument feature_view. It must be either str or FeatureView type."
2549
+ ),
2550
+ )
2551
+
2552
+ return feature_view