snowflake-ml-python 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +4 -1
- snowflake/cortex/_classify_text.py +36 -0
- snowflake/cortex/_complete.py +281 -21
- snowflake/cortex/_extract_answer.py +0 -1
- snowflake/cortex/_sentiment.py +0 -1
- snowflake/cortex/_summarize.py +0 -1
- snowflake/cortex/_translate.py +0 -1
- snowflake/cortex/_util.py +12 -85
- snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
- snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
- snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
- snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
- snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
- snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
- snowflake/ml/_internal/lineage/lineage_utils.py +4 -4
- snowflake/ml/_internal/telemetry.py +38 -2
- snowflake/ml/_internal/utils/identifier.py +14 -0
- snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
- snowflake/ml/data/_internal/arrow_ingestor.py +228 -0
- snowflake/ml/data/_internal/ingestor_utils.py +58 -0
- snowflake/ml/data/data_connector.py +133 -0
- snowflake/ml/data/data_ingestor.py +28 -0
- snowflake/ml/data/data_source.py +23 -0
- snowflake/ml/dataset/dataset.py +39 -32
- snowflake/ml/dataset/dataset_reader.py +18 -118
- snowflake/ml/feature_store/access_manager.py +7 -1
- snowflake/ml/feature_store/entity.py +19 -2
- snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +20 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +31 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +24 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +4 -0
- snowflake/ml/feature_store/examples/example_helper.py +240 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +12 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/dropoff_features.py +39 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/pickup_features.py +58 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +5 -0
- snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +36 -0
- snowflake/ml/feature_store/examples/source_data/fraud_transactions.yaml +29 -0
- snowflake/ml/feature_store/examples/source_data/nyc_yellow_trips.yaml +4 -0
- snowflake/ml/feature_store/examples/source_data/winequality_red.yaml +32 -0
- snowflake/ml/feature_store/examples/wine_quality_features/entities.py +14 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +29 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +21 -0
- snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +5 -0
- snowflake/ml/feature_store/feature_store.py +987 -264
- snowflake/ml/feature_store/feature_view.py +228 -13
- snowflake/ml/fileset/embedded_stage_fs.py +25 -21
- snowflake/ml/fileset/fileset.py +2 -2
- snowflake/ml/fileset/snowfs.py +4 -15
- snowflake/ml/fileset/stage_fs.py +24 -18
- snowflake/ml/lineage/__init__.py +3 -0
- snowflake/ml/lineage/lineage_node.py +139 -0
- snowflake/ml/model/_client/model/model_impl.py +47 -14
- snowflake/ml/model/_client/model/model_version_impl.py +82 -2
- snowflake/ml/model/_client/ops/model_ops.py +77 -5
- snowflake/ml/model/_client/sql/model.py +1 -0
- snowflake/ml/model/_client/sql/model_version.py +45 -2
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +4 -6
- snowflake/ml/model/_model_composer/model_composer.py +15 -17
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -17
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -1
- snowflake/ml/model/_model_composer/model_method/function_generator.py +20 -4
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +3 -32
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +55 -0
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +5 -34
- snowflake/ml/model/_model_composer/model_method/model_method.py +10 -7
- snowflake/ml/model/_packager/model_handlers/_base.py +13 -3
- snowflake/ml/model/_packager/model_handlers/_utils.py +59 -1
- snowflake/ml/model/_packager/model_handlers/catboost.py +44 -2
- snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +70 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
- snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
- snowflake/ml/model/_packager/model_handlers/xgboost.py +61 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_packager.py +9 -4
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -5
- snowflake/ml/model/custom_model.py +22 -2
- snowflake/ml/model/model_signature.py +4 -4
- snowflake/ml/model/type_hints.py +77 -4
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +3 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +13 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +1 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +6 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +1 -0
- snowflake/ml/modeling/cluster/affinity_propagation.py +4 -2
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +4 -2
- snowflake/ml/modeling/cluster/birch.py +4 -2
- snowflake/ml/modeling/cluster/bisecting_k_means.py +4 -2
- snowflake/ml/modeling/cluster/dbscan.py +4 -2
- snowflake/ml/modeling/cluster/feature_agglomeration.py +4 -2
- snowflake/ml/modeling/cluster/k_means.py +4 -2
- snowflake/ml/modeling/cluster/mean_shift.py +4 -2
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +4 -2
- snowflake/ml/modeling/cluster/optics.py +4 -2
- snowflake/ml/modeling/cluster/spectral_biclustering.py +4 -2
- snowflake/ml/modeling/cluster/spectral_clustering.py +4 -2
- snowflake/ml/modeling/cluster/spectral_coclustering.py +4 -2
- snowflake/ml/modeling/compose/column_transformer.py +4 -2
- snowflake/ml/modeling/covariance/elliptic_envelope.py +4 -2
- snowflake/ml/modeling/covariance/empirical_covariance.py +4 -2
- snowflake/ml/modeling/covariance/graphical_lasso.py +4 -2
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +4 -2
- snowflake/ml/modeling/covariance/ledoit_wolf.py +4 -2
- snowflake/ml/modeling/covariance/min_cov_det.py +4 -2
- snowflake/ml/modeling/covariance/oas.py +4 -2
- snowflake/ml/modeling/covariance/shrunk_covariance.py +4 -2
- snowflake/ml/modeling/decomposition/dictionary_learning.py +4 -2
- snowflake/ml/modeling/decomposition/factor_analysis.py +4 -2
- snowflake/ml/modeling/decomposition/fast_ica.py +4 -2
- snowflake/ml/modeling/decomposition/incremental_pca.py +4 -2
- snowflake/ml/modeling/decomposition/kernel_pca.py +4 -2
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +4 -2
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +4 -2
- snowflake/ml/modeling/decomposition/pca.py +4 -2
- snowflake/ml/modeling/decomposition/sparse_pca.py +4 -2
- snowflake/ml/modeling/decomposition/truncated_svd.py +4 -2
- snowflake/ml/modeling/ensemble/isolation_forest.py +4 -2
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +4 -2
- snowflake/ml/modeling/feature_selection/variance_threshold.py +4 -2
- snowflake/ml/modeling/impute/iterative_imputer.py +4 -2
- snowflake/ml/modeling/impute/knn_imputer.py +4 -2
- snowflake/ml/modeling/impute/missing_indicator.py +4 -2
- snowflake/ml/modeling/impute/simple_imputer.py +26 -0
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +4 -2
- snowflake/ml/modeling/kernel_approximation/nystroem.py +4 -2
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +4 -2
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +4 -2
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +4 -2
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +4 -2
- snowflake/ml/modeling/manifold/isomap.py +4 -2
- snowflake/ml/modeling/manifold/mds.py +4 -2
- snowflake/ml/modeling/manifold/spectral_embedding.py +4 -2
- snowflake/ml/modeling/manifold/tsne.py +4 -2
- snowflake/ml/modeling/metrics/ranking.py +3 -0
- snowflake/ml/modeling/metrics/regression.py +3 -0
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +4 -2
- snowflake/ml/modeling/mixture/gaussian_mixture.py +4 -2
- snowflake/ml/modeling/neighbors/kernel_density.py +4 -2
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +4 -2
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +4 -2
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +4 -2
- snowflake/ml/modeling/pipeline/pipeline.py +5 -4
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +43 -9
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +36 -8
- snowflake/ml/modeling/preprocessing/polynomial_features.py +4 -2
- snowflake/ml/registry/_manager/model_manager.py +16 -3
- snowflake/ml/registry/registry.py +100 -13
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/METADATA +81 -7
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/RECORD +165 -139
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/lineage/data_source.py +0 -10
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,7 @@ from snowflake.ml._internal.exceptions import (
|
|
33
33
|
dataset_errors,
|
34
34
|
error_codes,
|
35
35
|
exceptions as snowml_exceptions,
|
36
|
+
sql_error_codes,
|
36
37
|
)
|
37
38
|
from snowflake.ml._internal.utils import identifier
|
38
39
|
from snowflake.ml._internal.utils.sql_identifier import (
|
@@ -131,6 +132,10 @@ _LIST_FEATURE_VIEW_SCHEMA = StructType(
|
|
131
132
|
StructField("owner", StringType()),
|
132
133
|
StructField("desc", StringType()),
|
133
134
|
StructField("entities", ArrayType(StringType())),
|
135
|
+
StructField("refresh_freq", StringType()),
|
136
|
+
StructField("refresh_mode", StringType()),
|
137
|
+
StructField("scheduling_state", StringType()),
|
138
|
+
StructField("warehouse", StringType()),
|
134
139
|
]
|
135
140
|
)
|
136
141
|
|
@@ -201,6 +206,7 @@ class FeatureStore:
|
|
201
206
|
database: str,
|
202
207
|
name: str,
|
203
208
|
default_warehouse: str,
|
209
|
+
*,
|
204
210
|
creation_mode: CreationMode = CreationMode.FAIL_IF_NOT_EXIST,
|
205
211
|
) -> None:
|
206
212
|
"""
|
@@ -220,6 +226,32 @@ class FeatureStore:
|
|
220
226
|
SnowflakeMLException: [ValueError] Required resources not exist when mode is FAIL_IF_NOT_EXIST.
|
221
227
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
222
228
|
SnowflakeMLException: [RuntimeError] Failed to create feature store.
|
229
|
+
|
230
|
+
Example::
|
231
|
+
|
232
|
+
>>> from snowflake.ml.feature_store import (
|
233
|
+
... FeatureStore,
|
234
|
+
... CreationMode,
|
235
|
+
... )
|
236
|
+
<BLANKLINE>
|
237
|
+
>>> # Create a new Feature Store:
|
238
|
+
>>> fs = FeatureStore(
|
239
|
+
... session=session,
|
240
|
+
... database="MYDB",
|
241
|
+
... name="MYSCHEMA",
|
242
|
+
... default_warehouse="MYWH",
|
243
|
+
... creation_mode=CreationMode.CREATE_IF_NOT_EXIST
|
244
|
+
... )
|
245
|
+
<BLANKLINE>
|
246
|
+
>>> # Connect to an existing Feature Store:
|
247
|
+
>>> fs = FeatureStore(
|
248
|
+
... session=session,
|
249
|
+
... database="MYDB",
|
250
|
+
... name="MYSCHEMA",
|
251
|
+
... default_warehouse="MYWH",
|
252
|
+
... creation_mode=CreationMode.FAIL_IF_NOT_EXIST
|
253
|
+
... )
|
254
|
+
|
223
255
|
"""
|
224
256
|
|
225
257
|
database = SqlIdentifier(database)
|
@@ -267,10 +299,7 @@ class FeatureStore:
|
|
267
299
|
raise snowml_exceptions.SnowflakeMLException(
|
268
300
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
269
301
|
original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
|
270
|
-
)
|
271
|
-
|
272
|
-
# TODO: remove this after tag_ref_internal rollout
|
273
|
-
self._use_optimized_tag_ref = self._tag_ref_internal_enabled()
|
302
|
+
) from e
|
274
303
|
self._check_feature_store_object_versions()
|
275
304
|
logger.info(f"Successfully connected to feature store: {self._config.full_schema_path}.")
|
276
305
|
|
@@ -283,6 +312,16 @@ class FeatureStore:
|
|
283
312
|
|
284
313
|
Raises:
|
285
314
|
SnowflakeMLException: If warehouse does not exists.
|
315
|
+
|
316
|
+
Example::
|
317
|
+
|
318
|
+
>>> fs = FeatureStore(...)
|
319
|
+
>>> fs.update_default_warehouse("MYWH_2")
|
320
|
+
>>> draft_fv = FeatureView("my_fv", ...)
|
321
|
+
>>> registered_fv = fs.register_feature_view(draft_fv, '2.0')
|
322
|
+
>>> print(registered_fv.warehouse)
|
323
|
+
MYWH_2
|
324
|
+
|
286
325
|
"""
|
287
326
|
warehouse = SqlIdentifier(warehouse_name)
|
288
327
|
warehouse_result = self._find_object("WAREHOUSES", warehouse)
|
@@ -300,15 +339,27 @@ class FeatureStore:
|
|
300
339
|
Register Entity in the FeatureStore.
|
301
340
|
|
302
341
|
Args:
|
303
|
-
entity: Entity object to
|
342
|
+
entity: Entity object to be registered.
|
304
343
|
|
305
344
|
Returns:
|
306
345
|
A registered entity object.
|
307
346
|
|
308
347
|
Raises:
|
309
348
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
310
|
-
"""
|
311
349
|
|
350
|
+
Example::
|
351
|
+
|
352
|
+
>>> fs = FeatureStore(...)
|
353
|
+
>>> e = Entity('BAR', ['A'], desc='entity bar')
|
354
|
+
>>> fs.register_entity(e)
|
355
|
+
>>> fs.list_entities().show()
|
356
|
+
--------------------------------------------------
|
357
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
358
|
+
--------------------------------------------------
|
359
|
+
|BAR |["A"] |entity bar |REGTEST_RL |
|
360
|
+
--------------------------------------------------
|
361
|
+
|
362
|
+
"""
|
312
363
|
tag_name = self._get_entity_name(entity.name)
|
313
364
|
found_rows = self._find_object("TAGS", tag_name)
|
314
365
|
if len(found_rows) > 0:
|
@@ -340,12 +391,74 @@ class FeatureStore:
|
|
340
391
|
|
341
392
|
return self.get_entity(entity.name)
|
342
393
|
|
394
|
+
def update_entity(self, name: str, *, desc: Optional[str] = None) -> Optional[Entity]:
|
395
|
+
"""Update a registered entity with provided information.
|
396
|
+
|
397
|
+
Args:
|
398
|
+
name: Name of entity to update.
|
399
|
+
desc: Optional new description to apply. Default to None.
|
400
|
+
|
401
|
+
Raises:
|
402
|
+
SnowflakeMLException: Error happen when updating.
|
403
|
+
|
404
|
+
Returns:
|
405
|
+
A new entity with updated information or None if the entity doesn't exist.
|
406
|
+
|
407
|
+
Example::
|
408
|
+
|
409
|
+
>>> fs = FeatureStore(...)
|
410
|
+
<BLANKLINE>
|
411
|
+
>>> e = Entity(name='foo', join_keys=['COL_1'], desc='old desc')
|
412
|
+
>>> fs.list_entities().show()
|
413
|
+
------------------------------------------------
|
414
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
415
|
+
------------------------------------------------
|
416
|
+
|FOO |["COL_1"] |old desc |REGTEST_RL |
|
417
|
+
------------------------------------------------
|
418
|
+
<BLANKLINE>
|
419
|
+
>>> fs.update_entity('foo', desc='NEW DESC')
|
420
|
+
>>> fs.list_entities().show()
|
421
|
+
------------------------------------------------
|
422
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
423
|
+
------------------------------------------------
|
424
|
+
|FOO |["COL_1"] |NEW DESC |REGTEST_RL |
|
425
|
+
------------------------------------------------
|
426
|
+
|
427
|
+
"""
|
428
|
+
name = SqlIdentifier(name)
|
429
|
+
found_rows = self.list_entities().filter(F.col("NAME") == name.resolved()).collect()
|
430
|
+
|
431
|
+
if len(found_rows) == 0:
|
432
|
+
warnings.warn(
|
433
|
+
f"Entity {name} does not exist.",
|
434
|
+
stacklevel=2,
|
435
|
+
category=UserWarning,
|
436
|
+
)
|
437
|
+
return None
|
438
|
+
|
439
|
+
new_desc = desc if desc is not None else found_rows[0]["DESC"]
|
440
|
+
|
441
|
+
try:
|
442
|
+
full_name = f"{self._config.full_schema_path}.{self._get_entity_name(name)}"
|
443
|
+
self._session.sql(f"ALTER TAG {full_name} SET COMMENT = '{new_desc}'").collect(
|
444
|
+
statement_params=self._telemetry_stmp
|
445
|
+
)
|
446
|
+
except Exception as e:
|
447
|
+
raise snowml_exceptions.SnowflakeMLException(
|
448
|
+
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
449
|
+
original_exception=RuntimeError(f"Failed to update entity `{name}`: {e}."),
|
450
|
+
) from e
|
451
|
+
|
452
|
+
logger.info(f"Successfully updated Entity {name}.")
|
453
|
+
return self.get_entity(name)
|
454
|
+
|
343
455
|
# TODO: add support to update column desc once SNOW-894249 is fixed
|
344
456
|
@dispatch_decorator()
|
345
457
|
def register_feature_view(
|
346
458
|
self,
|
347
459
|
feature_view: FeatureView,
|
348
460
|
version: str,
|
461
|
+
*,
|
349
462
|
block: bool = True,
|
350
463
|
overwrite: bool = False,
|
351
464
|
) -> FeatureView:
|
@@ -355,12 +468,6 @@ class FeatureStore:
|
|
355
468
|
NOTE: Each new materialization will trigger a full FeatureView history refresh for the data included in the
|
356
469
|
FeatureView.
|
357
470
|
|
358
|
-
Examples:
|
359
|
-
...
|
360
|
-
draft_fv = FeatureView(name="my_fv", entities=[entities], feature_df)
|
361
|
-
registered_fv = fs.register_feature_view(feature_view=draft_fv, version="v1")
|
362
|
-
...
|
363
|
-
|
364
471
|
Args:
|
365
472
|
feature_view: FeatureView instance to materialize.
|
366
473
|
version: version of the registered FeatureView.
|
@@ -379,6 +486,35 @@ class FeatureStore:
|
|
379
486
|
SnowflakeMLException: [ValueError] Warehouse or default warehouse is not specified.
|
380
487
|
SnowflakeMLException: [RuntimeError] Failed to create dynamic table, task, or view.
|
381
488
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
489
|
+
|
490
|
+
Example::
|
491
|
+
|
492
|
+
>>> fs = FeatureStore(...)
|
493
|
+
>>> # draft_fv is a local object that hasn't materiaized to Snowflake backend yet.
|
494
|
+
>>> feature_df = session.sql("select f_1, f_2 from source_table")
|
495
|
+
>>> draft_fv = FeatureView("my_fv", [entities], feature_df)
|
496
|
+
>>> print(draft_fv.status)
|
497
|
+
FeatureViewStatus.DRAFT
|
498
|
+
<BLANKLINE>
|
499
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
500
|
+
-------------------------------------------
|
501
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
502
|
+
-------------------------------------------
|
503
|
+
| | | |
|
504
|
+
-------------------------------------------
|
505
|
+
<BLANKLINE>
|
506
|
+
>>> # registered_fv is a local object that maps to a Snowflake backend object.
|
507
|
+
>>> registered_fv = fs.register_feature_view(draft_fv, "v1")
|
508
|
+
>>> print(registered_fv.status)
|
509
|
+
FeatureViewStatus.ACTIVE
|
510
|
+
<BLANKLINE>
|
511
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
512
|
+
-------------------------------------------
|
513
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
514
|
+
-------------------------------------------
|
515
|
+
|MY_FV |v1 |ACTIVE |
|
516
|
+
-------------------------------------------
|
517
|
+
|
382
518
|
"""
|
383
519
|
version = FeatureViewVersion(version)
|
384
520
|
|
@@ -443,7 +579,7 @@ class FeatureStore:
|
|
443
579
|
column_descs,
|
444
580
|
tagging_clause_str,
|
445
581
|
schedule_task,
|
446
|
-
self._default_warehouse,
|
582
|
+
feature_view.warehouse if feature_view.warehouse is not None else self._default_warehouse,
|
447
583
|
block,
|
448
584
|
overwrite,
|
449
585
|
)
|
@@ -469,7 +605,13 @@ class FeatureStore:
|
|
469
605
|
|
470
606
|
@dispatch_decorator()
|
471
607
|
def update_feature_view(
|
472
|
-
self,
|
608
|
+
self,
|
609
|
+
name: str,
|
610
|
+
version: str,
|
611
|
+
*,
|
612
|
+
refresh_freq: Optional[str] = None,
|
613
|
+
warehouse: Optional[str] = None,
|
614
|
+
desc: Optional[str] = None,
|
473
615
|
) -> FeatureView:
|
474
616
|
"""Update a registered feature view.
|
475
617
|
Check feature_view.py for which fields are allowed to be updated after registration.
|
@@ -479,32 +621,73 @@ class FeatureStore:
|
|
479
621
|
version: version of the FeatureView to be updated.
|
480
622
|
refresh_freq: updated refresh frequency.
|
481
623
|
warehouse: updated warehouse.
|
624
|
+
desc: description of feature view.
|
482
625
|
|
483
626
|
Returns:
|
484
627
|
Updated FeatureView.
|
485
628
|
|
629
|
+
Example::
|
630
|
+
|
631
|
+
>>> fs = FeatureStore(...)
|
632
|
+
>>> fv = FeatureView(
|
633
|
+
... name='foo',
|
634
|
+
... entities=[e1, e2],
|
635
|
+
... feature_df=session.sql('...'),
|
636
|
+
... desc='this is old description',
|
637
|
+
... )
|
638
|
+
>>> fv = fs.register_feature_view(feature_view=fv, version='v1')
|
639
|
+
>>> fs.list_feature_views().select("name", "version", "desc").show()
|
640
|
+
------------------------------------------------
|
641
|
+
|"NAME" |"VERSION" |"DESC" |
|
642
|
+
------------------------------------------------
|
643
|
+
|FOO |v1 |this is old description |
|
644
|
+
------------------------------------------------
|
645
|
+
<BLANKLINE>
|
646
|
+
>>> # update_feature_view will apply new arguments to the registered feature view.
|
647
|
+
>>> new_fv = fs.update_feature_view(
|
648
|
+
... name='foo',
|
649
|
+
... version='v1',
|
650
|
+
... desc='that is new descption',
|
651
|
+
... )
|
652
|
+
>>> fs.list_feature_views().select("name", "version", "desc").show()
|
653
|
+
------------------------------------------------
|
654
|
+
|"NAME" |"VERSION" |"DESC" |
|
655
|
+
------------------------------------------------
|
656
|
+
|FOO |v1 |THAT IS NEW DESCRIPTION |
|
657
|
+
------------------------------------------------
|
658
|
+
|
486
659
|
Raises:
|
487
660
|
SnowflakeMLException: [RuntimeError] If FeatureView is not managed and refresh_freq is defined.
|
488
661
|
SnowflakeMLException: [RuntimeError] Failed to update feature view.
|
489
662
|
"""
|
490
663
|
feature_view = self.get_feature_view(name=name, version=version)
|
491
|
-
if
|
492
|
-
full_name = f"{feature_view.name}/{feature_view.version}"
|
493
|
-
raise snowml_exceptions.SnowflakeMLException(
|
494
|
-
error_code=error_codes.INVALID_ARGUMENT,
|
495
|
-
original_exception=RuntimeError(f"Feature view {full_name} must be non-static so that can be updated."),
|
496
|
-
)
|
664
|
+
new_desc = desc if desc is not None else feature_view.desc
|
497
665
|
|
498
|
-
|
666
|
+
if feature_view.status == FeatureViewStatus.STATIC:
|
667
|
+
if refresh_freq is not None or warehouse is not None:
|
668
|
+
full_name = f"{feature_view.name}/{feature_view.version}"
|
669
|
+
raise snowml_exceptions.SnowflakeMLException(
|
670
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
671
|
+
original_exception=RuntimeError(
|
672
|
+
f"Static feature view '{full_name}' does not support refresh_freq and warehouse."
|
673
|
+
),
|
674
|
+
)
|
675
|
+
new_query = f"""
|
676
|
+
ALTER VIEW {feature_view.fully_qualified_name()} SET
|
677
|
+
COMMENT = '{new_desc}'
|
678
|
+
"""
|
679
|
+
else:
|
680
|
+
warehouse = SqlIdentifier(warehouse) if warehouse else feature_view.warehouse
|
681
|
+
# TODO(@wezhou): we need to properly handle cron expr
|
682
|
+
new_query = f"""
|
683
|
+
ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
|
684
|
+
TARGET_LAG = '{refresh_freq or feature_view.refresh_freq}'
|
685
|
+
WAREHOUSE = {warehouse}
|
686
|
+
COMMENT = '{new_desc}'
|
687
|
+
"""
|
499
688
|
|
500
|
-
# TODO(@wezhou): we need to properly handle cron expr
|
501
689
|
try:
|
502
|
-
self._session.sql(
|
503
|
-
f"""ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
|
504
|
-
TARGET_LAG = '{refresh_freq or feature_view.refresh_freq}'
|
505
|
-
WAREHOUSE = {warehouse}
|
506
|
-
"""
|
507
|
-
).collect(statement_params=self._telemetry_stmp)
|
690
|
+
self._session.sql(new_query).collect(statement_params=self._telemetry_stmp)
|
508
691
|
except Exception as e:
|
509
692
|
raise snowml_exceptions.SnowflakeMLException(
|
510
693
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
@@ -514,20 +697,56 @@ class FeatureStore:
|
|
514
697
|
) from e
|
515
698
|
return self.get_feature_view(name=name, version=version)
|
516
699
|
|
517
|
-
@
|
700
|
+
@overload
|
701
|
+
def read_feature_view(self, feature_view: str, version: str) -> DataFrame:
|
702
|
+
...
|
703
|
+
|
704
|
+
@overload
|
518
705
|
def read_feature_view(self, feature_view: FeatureView) -> DataFrame:
|
706
|
+
...
|
707
|
+
|
708
|
+
@dispatch_decorator() # type: ignore[misc]
|
709
|
+
def read_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> DataFrame:
|
519
710
|
"""
|
520
|
-
Read FeatureView
|
711
|
+
Read values from a FeatureView.
|
521
712
|
|
522
713
|
Args:
|
523
|
-
feature_view: FeatureView to
|
714
|
+
feature_view: A FeatureView object to read from, or the name of feature view.
|
715
|
+
If name is provided then version also must be provided.
|
716
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
524
717
|
|
525
718
|
Returns:
|
526
719
|
Snowpark DataFrame(lazy mode) containing the FeatureView data.
|
527
720
|
|
528
721
|
Raises:
|
722
|
+
SnowflakeMLException: [ValueError] version argument is missing when argument feature_view is a str.
|
529
723
|
SnowflakeMLException: [ValueError] FeatureView is not registered.
|
724
|
+
|
725
|
+
Example::
|
726
|
+
|
727
|
+
>>> fs = FeatureStore(...)
|
728
|
+
>>> # Read from feature view name and version.
|
729
|
+
>>> fs.read_feature_view('foo', 'v1').show()
|
730
|
+
------------------------------------------
|
731
|
+
|"NAME" |"ID" |"TITLE" |"AGE" |"TS" |
|
732
|
+
------------------------------------------
|
733
|
+
|jonh |1 |boss |20 |100 |
|
734
|
+
|porter |2 |manager |30 |200 |
|
735
|
+
------------------------------------------
|
736
|
+
<BLANKLINE>
|
737
|
+
>>> # Read from feature view object.
|
738
|
+
>>> fv = fs.get_feature_view('foo', 'v1')
|
739
|
+
>>> fs.read_feature_view(fv).show()
|
740
|
+
------------------------------------------
|
741
|
+
|"NAME" |"ID" |"TITLE" |"AGE" |"TS" |
|
742
|
+
------------------------------------------
|
743
|
+
|jonh |1 |boss |20 |100 |
|
744
|
+
|porter |2 |manager |30 |200 |
|
745
|
+
------------------------------------------
|
746
|
+
|
530
747
|
"""
|
748
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
749
|
+
|
531
750
|
if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
|
532
751
|
raise snowml_exceptions.SnowflakeMLException(
|
533
752
|
error_code=error_codes.NOT_FOUND,
|
@@ -539,6 +758,7 @@ class FeatureStore:
|
|
539
758
|
@dispatch_decorator()
|
540
759
|
def list_feature_views(
|
541
760
|
self,
|
761
|
+
*,
|
542
762
|
entity_name: Optional[str] = None,
|
543
763
|
feature_view_name: Optional[str] = None,
|
544
764
|
) -> DataFrame:
|
@@ -553,19 +773,34 @@ class FeatureStore:
|
|
553
773
|
|
554
774
|
Returns:
|
555
775
|
FeatureViews information as a Snowpark DataFrame.
|
776
|
+
|
777
|
+
Example::
|
778
|
+
|
779
|
+
>>> fs = FeatureStore(...)
|
780
|
+
>>> draft_fv = FeatureView(
|
781
|
+
... name='foo',
|
782
|
+
... entities=[e1, e2],
|
783
|
+
... feature_df=session.sql('...'),
|
784
|
+
... desc='this is description',
|
785
|
+
... )
|
786
|
+
>>> fs.register_feature_view(feature_view=draft_fv, version='v1')
|
787
|
+
>>> fs.list_feature_views().select("name", "version", "desc").show()
|
788
|
+
--------------------------------------------
|
789
|
+
|"NAME" |"VERSION" |"DESC" |
|
790
|
+
--------------------------------------------
|
791
|
+
|FOO |v1 |this is description |
|
792
|
+
--------------------------------------------
|
793
|
+
|
556
794
|
"""
|
557
795
|
if feature_view_name is not None:
|
558
796
|
feature_view_name = SqlIdentifier(feature_view_name)
|
559
797
|
|
560
798
|
if entity_name is not None:
|
561
799
|
entity_name = SqlIdentifier(entity_name)
|
562
|
-
|
563
|
-
return self._optimized_find_feature_views(entity_name, feature_view_name)
|
564
|
-
else:
|
565
|
-
return self._find_feature_views(entity_name, feature_view_name)
|
800
|
+
return self._optimized_find_feature_views(entity_name, feature_view_name)
|
566
801
|
else:
|
567
802
|
output_values: List[List[Any]] = []
|
568
|
-
for row in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
|
803
|
+
for row, _ in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
|
569
804
|
self._extract_feature_view_info(row, output_values)
|
570
805
|
return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
|
571
806
|
|
@@ -584,6 +819,28 @@ class FeatureStore:
|
|
584
819
|
Raises:
|
585
820
|
SnowflakeMLException: [ValueError] FeatureView with name and version is not found,
|
586
821
|
or incurred exception when reconstructing the FeatureView object.
|
822
|
+
|
823
|
+
Example::
|
824
|
+
|
825
|
+
>>> fs = FeatureStore(...)
|
826
|
+
>>> # draft_fv is a local object that hasn't materiaized to Snowflake backend yet.
|
827
|
+
>>> draft_fv = FeatureView(
|
828
|
+
... name='foo',
|
829
|
+
... entities=[e1],
|
830
|
+
... feature_df=session.sql('...'),
|
831
|
+
... desc='this is description',
|
832
|
+
... )
|
833
|
+
>>> fs.register_feature_view(feature_view=draft_fv, version='v1')
|
834
|
+
<BLANKLINE>
|
835
|
+
>>> # fv is a local object that maps to a Snowflake backend object.
|
836
|
+
>>> fv = fs.get_feature_view('foo', 'v1')
|
837
|
+
>>> print(f"name: {fv.name}")
|
838
|
+
>>> print(f"version:{fv.version}")
|
839
|
+
>>> print(f"desc:{fv.desc}")
|
840
|
+
name: FOO
|
841
|
+
version:v1
|
842
|
+
desc:this is description
|
843
|
+
|
587
844
|
"""
|
588
845
|
name = SqlIdentifier(name)
|
589
846
|
version = FeatureViewVersion(version)
|
@@ -596,45 +853,287 @@ class FeatureStore:
|
|
596
853
|
original_exception=ValueError(f"Failed to find FeatureView {name}/{version}: {results}"),
|
597
854
|
)
|
598
855
|
|
599
|
-
return self._compose_feature_view(results[0], self.list_entities().collect())
|
856
|
+
return self._compose_feature_view(results[0][0], results[0][1], self.list_entities().collect())
|
600
857
|
|
601
|
-
@
|
858
|
+
@overload
|
859
|
+
def refresh_feature_view(self, feature_view: FeatureView) -> None:
|
860
|
+
...
|
861
|
+
|
862
|
+
@overload
|
863
|
+
def refresh_feature_view(self, feature_view: str, version: str) -> None:
|
864
|
+
...
|
865
|
+
|
866
|
+
@dispatch_decorator() # type: ignore[misc]
|
867
|
+
def refresh_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> None:
|
868
|
+
"""Manually refresh a feature view.
|
869
|
+
|
870
|
+
Args:
|
871
|
+
feature_view: A registered feature view object, or the name of feature view.
|
872
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
873
|
+
|
874
|
+
Example::
|
875
|
+
|
876
|
+
>>> fs = FeatureStore(...)
|
877
|
+
>>> fv = fs.get_feature_view(name='MY_FV', version='v1')
|
878
|
+
<BLANKLINE>
|
879
|
+
>>> # refresh with name and version
|
880
|
+
>>> fs.refresh_feature_view('MY_FV', 'v1')
|
881
|
+
>>> fs.get_refresh_history('MY_FV', 'v1').show()
|
882
|
+
-----------------------------------------------------------------------------------------------------
|
883
|
+
|"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
|
884
|
+
-----------------------------------------------------------------------------------------------------
|
885
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
|
886
|
+
-----------------------------------------------------------------------------------------------------
|
887
|
+
<BLANKLINE>
|
888
|
+
>>> # refresh with feature view object
|
889
|
+
>>> fs.refresh_feature_view(fv)
|
890
|
+
>>> fs.get_refresh_history(fv).show()
|
891
|
+
-----------------------------------------------------------------------------------------------------
|
892
|
+
|"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
|
893
|
+
-----------------------------------------------------------------------------------------------------
|
894
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:54:06.680000 |2024-07-10 14:54:07.226000 |INCREMENTAL |
|
895
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
|
896
|
+
-----------------------------------------------------------------------------------------------------
|
897
|
+
|
898
|
+
"""
|
899
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
900
|
+
|
901
|
+
if feature_view.status == FeatureViewStatus.STATIC:
|
902
|
+
warnings.warn(
|
903
|
+
"Static feature view can't be refreshed. You must set refresh_freq when register_feature_view().",
|
904
|
+
stacklevel=2,
|
905
|
+
category=UserWarning,
|
906
|
+
)
|
907
|
+
return
|
908
|
+
self._update_feature_view_status(feature_view, "REFRESH")
|
909
|
+
|
910
|
+
@overload
|
911
|
+
def get_refresh_history(
|
912
|
+
self, feature_view: FeatureView, version: Optional[str] = None, *, verbose: bool = False
|
913
|
+
) -> DataFrame:
|
914
|
+
...
|
915
|
+
|
916
|
+
@overload
|
917
|
+
def get_refresh_history(self, feature_view: str, version: str, *, verbose: bool = False) -> DataFrame:
|
918
|
+
...
|
919
|
+
|
920
|
+
def get_refresh_history(
|
921
|
+
self, feature_view: Union[FeatureView, str], version: Optional[str] = None, *, verbose: bool = False
|
922
|
+
) -> DataFrame:
|
923
|
+
"""Get refresh hisotry statistics about a feature view.
|
924
|
+
|
925
|
+
Args:
|
926
|
+
feature_view: A registered feature view object, or the name of feature view.
|
927
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
928
|
+
verbose: Return more detailed history when set true.
|
929
|
+
|
930
|
+
Returns:
|
931
|
+
A dataframe contains the refresh history information.
|
932
|
+
|
933
|
+
Example::
|
934
|
+
|
935
|
+
>>> fs = FeatureStore(...)
|
936
|
+
>>> fv = fs.get_feature_view(name='MY_FV', version='v1')
|
937
|
+
>>> # refresh with name and version
|
938
|
+
>>> fs.refresh_feature_view('MY_FV', 'v1')
|
939
|
+
>>> fs.get_refresh_history('MY_FV', 'v1').show()
|
940
|
+
-----------------------------------------------------------------------------------------------------
|
941
|
+
|"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
|
942
|
+
-----------------------------------------------------------------------------------------------------
|
943
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
|
944
|
+
-----------------------------------------------------------------------------------------------------
|
945
|
+
<BLANKLINE>
|
946
|
+
>>> # refresh with feature view object
|
947
|
+
>>> fs.refresh_feature_view(fv)
|
948
|
+
>>> fs.get_refresh_history(fv).show()
|
949
|
+
-----------------------------------------------------------------------------------------------------
|
950
|
+
|"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
|
951
|
+
-----------------------------------------------------------------------------------------------------
|
952
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:54:06.680000 |2024-07-10 14:54:07.226000 |INCREMENTAL |
|
953
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
|
954
|
+
-----------------------------------------------------------------------------------------------------
|
955
|
+
|
956
|
+
"""
|
957
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
958
|
+
|
959
|
+
if feature_view.status == FeatureViewStatus.STATIC:
|
960
|
+
warnings.warn(
|
961
|
+
"Static feature view never refreshes.",
|
962
|
+
stacklevel=2,
|
963
|
+
category=UserWarning,
|
964
|
+
)
|
965
|
+
return self._session.create_dataframe([Row()])
|
966
|
+
|
967
|
+
if feature_view.status == FeatureViewStatus.DRAFT:
|
968
|
+
warnings.warn(
|
969
|
+
"This feature view has not been registered thus has no refresh history.",
|
970
|
+
stacklevel=2,
|
971
|
+
category=UserWarning,
|
972
|
+
)
|
973
|
+
return self._session.create_dataframe([Row()])
|
974
|
+
|
975
|
+
fv_resolved_name = FeatureView._get_physical_name(
|
976
|
+
feature_view.name,
|
977
|
+
feature_view.version, # type: ignore[arg-type]
|
978
|
+
).resolved()
|
979
|
+
select_cols = "*" if verbose else "name, state, refresh_start_time, refresh_end_time, refresh_action"
|
980
|
+
return self._session.sql(
|
981
|
+
f"""
|
982
|
+
SELECT
|
983
|
+
{select_cols}
|
984
|
+
FROM TABLE (
|
985
|
+
{self._config.database}.INFORMATION_SCHEMA.DYNAMIC_TABLE_REFRESH_HISTORY ()
|
986
|
+
)
|
987
|
+
WHERE NAME = '{fv_resolved_name}'
|
988
|
+
AND SCHEMA_NAME = '{self._config.schema}'
|
989
|
+
"""
|
990
|
+
)
|
991
|
+
|
992
|
+
@overload
|
602
993
|
def resume_feature_view(self, feature_view: FeatureView) -> FeatureView:
|
994
|
+
...
|
995
|
+
|
996
|
+
@overload
|
997
|
+
def resume_feature_view(self, feature_view: str, version: str) -> FeatureView:
|
998
|
+
...
|
999
|
+
|
1000
|
+
@dispatch_decorator() # type: ignore[misc]
|
1001
|
+
def resume_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> FeatureView:
|
603
1002
|
"""
|
604
1003
|
Resume a previously suspended FeatureView.
|
605
1004
|
|
606
1005
|
Args:
|
607
|
-
feature_view: FeatureView to resume.
|
1006
|
+
feature_view: FeatureView object or name to resume.
|
1007
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
608
1008
|
|
609
1009
|
Returns:
|
610
1010
|
A new feature view with updated status.
|
1011
|
+
|
1012
|
+
Example::
|
1013
|
+
|
1014
|
+
>>> fs = FeatureStore(...)
|
1015
|
+
>>> # you must already have feature views registered
|
1016
|
+
>>> fv = fs.get_feature_view(name='MY_FV', version='v1')
|
1017
|
+
>>> fs.suspend_feature_view('MY_FV', 'v1')
|
1018
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
1019
|
+
-------------------------------------------
|
1020
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
1021
|
+
-------------------------------------------
|
1022
|
+
|MY_FV |v1 |SUSPENDED |
|
1023
|
+
-------------------------------------------
|
1024
|
+
<BLANKLINE>
|
1025
|
+
>>> fs.resume_feature_view('MY_FV', 'v1')
|
1026
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
1027
|
+
-------------------------------------------
|
1028
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
1029
|
+
-------------------------------------------
|
1030
|
+
|MY_FV |v1 |ACTIVE |
|
1031
|
+
-------------------------------------------
|
1032
|
+
|
611
1033
|
"""
|
1034
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
612
1035
|
return self._update_feature_view_status(feature_view, "RESUME")
|
613
1036
|
|
614
|
-
@
|
1037
|
+
@overload
|
615
1038
|
def suspend_feature_view(self, feature_view: FeatureView) -> FeatureView:
|
1039
|
+
...
|
1040
|
+
|
1041
|
+
@overload
|
1042
|
+
def suspend_feature_view(self, feature_view: str, version: str) -> FeatureView:
|
1043
|
+
...
|
1044
|
+
|
1045
|
+
@dispatch_decorator() # type: ignore[misc]
|
1046
|
+
def suspend_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> FeatureView:
|
616
1047
|
"""
|
617
1048
|
Suspend an active FeatureView.
|
618
1049
|
|
619
1050
|
Args:
|
620
|
-
feature_view: FeatureView to suspend.
|
1051
|
+
feature_view: FeatureView object or name to suspend.
|
1052
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
621
1053
|
|
622
1054
|
Returns:
|
623
1055
|
A new feature view with updated status.
|
1056
|
+
|
1057
|
+
Example::
|
1058
|
+
|
1059
|
+
>>> fs = FeatureStore(...)
|
1060
|
+
>>> # assume you already have feature views registered
|
1061
|
+
>>> fv = fs.get_feature_view(name='MY_FV', version='v1')
|
1062
|
+
>>> fs.suspend_feature_view('MY_FV', 'v1')
|
1063
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
1064
|
+
-------------------------------------------
|
1065
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
1066
|
+
-------------------------------------------
|
1067
|
+
|MY_FV |v1 |SUSPENDED |
|
1068
|
+
-------------------------------------------
|
1069
|
+
<BLANKLINE>
|
1070
|
+
>>> fs.resume_feature_view('MY_FV', 'v1')
|
1071
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
1072
|
+
-------------------------------------------
|
1073
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
1074
|
+
-------------------------------------------
|
1075
|
+
|MY_FV |v1 |ACTIVE |
|
1076
|
+
-------------------------------------------
|
1077
|
+
|
624
1078
|
"""
|
1079
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
625
1080
|
return self._update_feature_view_status(feature_view, "SUSPEND")
|
626
1081
|
|
627
|
-
@
|
1082
|
+
@overload
|
628
1083
|
def delete_feature_view(self, feature_view: FeatureView) -> None:
|
1084
|
+
...
|
1085
|
+
|
1086
|
+
@overload
|
1087
|
+
def delete_feature_view(self, feature_view: str, version: str) -> None:
|
1088
|
+
...
|
1089
|
+
|
1090
|
+
@dispatch_decorator() # type: ignore[misc]
|
1091
|
+
def delete_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> None:
|
629
1092
|
"""
|
630
1093
|
Delete a FeatureView.
|
631
1094
|
|
632
1095
|
Args:
|
633
|
-
feature_view: FeatureView to delete.
|
1096
|
+
feature_view: FeatureView object or name to delete.
|
1097
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
634
1098
|
|
635
1099
|
Raises:
|
636
1100
|
SnowflakeMLException: [ValueError] FeatureView is not registered.
|
1101
|
+
|
1102
|
+
Example::
|
1103
|
+
|
1104
|
+
>>> fs = FeatureStore(...)
|
1105
|
+
>>> fv = FeatureView('FV0', ...)
|
1106
|
+
>>> fv1 = fs.register_feature_view(fv, 'FIRST')
|
1107
|
+
>>> fv2 = fs.register_feature_view(fv, 'SECOND')
|
1108
|
+
>>> fs.list_feature_views().select('NAME', 'VERSION').show()
|
1109
|
+
----------------------
|
1110
|
+
|"NAME" |"VERSION" |
|
1111
|
+
----------------------
|
1112
|
+
|FV0 |SECOND |
|
1113
|
+
|FV0 |FIRST |
|
1114
|
+
----------------------
|
1115
|
+
<BLANKLINE>
|
1116
|
+
>>> # delete with name and version
|
1117
|
+
>>> fs.delete_feature_view('FV0', 'FIRST')
|
1118
|
+
>>> fs.list_feature_views().select('NAME', 'VERSION').show()
|
1119
|
+
----------------------
|
1120
|
+
|"NAME" |"VERSION" |
|
1121
|
+
----------------------
|
1122
|
+
|FV0 |SECOND |
|
1123
|
+
----------------------
|
1124
|
+
<BLANKLINE>
|
1125
|
+
>>> # delete with feature view object
|
1126
|
+
>>> fs.delete_feature_view(fv2)
|
1127
|
+
>>> fs.list_feature_views().select('NAME', 'VERSION').show()
|
1128
|
+
----------------------
|
1129
|
+
|"NAME" |"VERSION" |
|
1130
|
+
----------------------
|
1131
|
+
| | |
|
1132
|
+
----------------------
|
1133
|
+
|
637
1134
|
"""
|
1135
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
1136
|
+
|
638
1137
|
# TODO: we should leverage lineage graph to check downstream deps, and block the deletion
|
639
1138
|
# if there're other FVs depending on this
|
640
1139
|
if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
|
@@ -666,6 +1165,19 @@ class FeatureStore:
|
|
666
1165
|
|
667
1166
|
Returns:
|
668
1167
|
Snowpark DataFrame containing the results.
|
1168
|
+
|
1169
|
+
Example::
|
1170
|
+
|
1171
|
+
>>> fs = FeatureStore(...)
|
1172
|
+
>>> e_1 = Entity("my_entity", ['col_1'], desc='My first entity.')
|
1173
|
+
>>> fs.register_entity(e_1)
|
1174
|
+
>>> fs.list_entities().show()
|
1175
|
+
-----------------------------------------------------------
|
1176
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
1177
|
+
-----------------------------------------------------------
|
1178
|
+
|MY_ENTITY |["COL_1"] |My first entity. |REGTEST_RL |
|
1179
|
+
-----------------------------------------------------------
|
1180
|
+
|
669
1181
|
"""
|
670
1182
|
prefix_len = len(_ENTITY_TAG_PREFIX) + 1
|
671
1183
|
return cast(
|
@@ -695,6 +1207,19 @@ class FeatureStore:
|
|
695
1207
|
SnowflakeMLException: [ValueError] Entity is not found.
|
696
1208
|
SnowflakeMLException: [RuntimeError] Failed to retrieve tag reference information.
|
697
1209
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
1210
|
+
|
1211
|
+
Example::
|
1212
|
+
|
1213
|
+
>>> fs = FeatureStore(...)
|
1214
|
+
>>> # e_1 is a local object that hasn't registered to Snowflake backend yet.
|
1215
|
+
>>> e_1 = Entity("my_entity", ['col_1'], desc='My first entity.')
|
1216
|
+
>>> fs.register_entity(e_1)
|
1217
|
+
<BLANKLINE>
|
1218
|
+
>>> # e_2 is a local object that points a backend object in Snowflake.
|
1219
|
+
>>> e_2 = fs.get_entity("my_entity")
|
1220
|
+
>>> print(e_2)
|
1221
|
+
Entity(name=MY_ENTITY, join_keys=['COL_1'], owner=REGTEST_RL, desc=My first entity.)
|
1222
|
+
|
698
1223
|
"""
|
699
1224
|
name = SqlIdentifier(name)
|
700
1225
|
try:
|
@@ -725,12 +1250,33 @@ class FeatureStore:
|
|
725
1250
|
Delete a previously registered Entity.
|
726
1251
|
|
727
1252
|
Args:
|
728
|
-
name:
|
1253
|
+
name: Name of entity to be deleted.
|
729
1254
|
|
730
1255
|
Raises:
|
731
1256
|
SnowflakeMLException: [ValueError] Entity with given name not exists.
|
732
1257
|
SnowflakeMLException: [RuntimeError] Failed to alter schema or drop tag.
|
733
1258
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
1259
|
+
|
1260
|
+
Example::
|
1261
|
+
|
1262
|
+
>>> fs = FeatureStore(...)
|
1263
|
+
>>> e_1 = Entity("my_entity", ['col_1'], desc='My first entity.')
|
1264
|
+
>>> fs.register_entity(e_1)
|
1265
|
+
>>> fs.list_entities().show()
|
1266
|
+
-----------------------------------------------------------
|
1267
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
1268
|
+
-----------------------------------------------------------
|
1269
|
+
|MY_ENTITY |["COL_1"] |My first entity. |REGTEST_RL |
|
1270
|
+
-----------------------------------------------------------
|
1271
|
+
<BLANKLINE>
|
1272
|
+
>>> fs.delete_entity("my_entity")
|
1273
|
+
>>> fs.list_entities().show()
|
1274
|
+
-------------------------------------------
|
1275
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
1276
|
+
-------------------------------------------
|
1277
|
+
| | | | |
|
1278
|
+
-------------------------------------------
|
1279
|
+
|
734
1280
|
"""
|
735
1281
|
name = SqlIdentifier(name)
|
736
1282
|
|
@@ -764,6 +1310,7 @@ class FeatureStore:
|
|
764
1310
|
self,
|
765
1311
|
spine_df: DataFrame,
|
766
1312
|
features: Union[List[Union[FeatureView, FeatureViewSlice]], List[str]],
|
1313
|
+
*,
|
767
1314
|
spine_timestamp_col: Optional[str] = None,
|
768
1315
|
exclude_columns: Optional[List[str]] = None,
|
769
1316
|
include_feature_view_timestamp_col: bool = False,
|
@@ -786,6 +1333,23 @@ class FeatureStore:
|
|
786
1333
|
|
787
1334
|
Raises:
|
788
1335
|
ValueError: if features is empty.
|
1336
|
+
|
1337
|
+
Example::
|
1338
|
+
|
1339
|
+
>>> fs = FeatureStore(...)
|
1340
|
+
>>> # Assume you already have feature view registered.
|
1341
|
+
>>> fv = fs.get_feature_view('my_fv', 'v1')
|
1342
|
+
>>> # Spine dataframe has same join keys as the entity of fv.
|
1343
|
+
>>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
|
1344
|
+
>>> fs.retrieve_feature_values(spine_df, [fv]).show()
|
1345
|
+
--------------------
|
1346
|
+
|"END_STATION_ID" |
|
1347
|
+
--------------------
|
1348
|
+
|505 |
|
1349
|
+
|347 |
|
1350
|
+
|466 |
|
1351
|
+
--------------------
|
1352
|
+
|
789
1353
|
"""
|
790
1354
|
if spine_timestamp_col is not None:
|
791
1355
|
spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
|
@@ -807,12 +1371,97 @@ class FeatureStore:
|
|
807
1371
|
|
808
1372
|
return df
|
809
1373
|
|
1374
|
+
@dispatch_decorator()
|
1375
|
+
def generate_training_set(
|
1376
|
+
self,
|
1377
|
+
spine_df: DataFrame,
|
1378
|
+
features: List[Union[FeatureView, FeatureViewSlice]],
|
1379
|
+
*,
|
1380
|
+
save_as: Optional[str] = None,
|
1381
|
+
spine_timestamp_col: Optional[str] = None,
|
1382
|
+
spine_label_cols: Optional[List[str]] = None,
|
1383
|
+
exclude_columns: Optional[List[str]] = None,
|
1384
|
+
include_feature_view_timestamp_col: bool = False,
|
1385
|
+
) -> DataFrame:
|
1386
|
+
"""
|
1387
|
+
Generate a training set from the specified Spine DataFrame and Feature Views. Result is
|
1388
|
+
materialized to a Snowflake Table if `save_as` is specified.
|
1389
|
+
|
1390
|
+
Args:
|
1391
|
+
spine_df: Snowpark DataFrame to join features into.
|
1392
|
+
features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
|
1393
|
+
save_as: If specified, a new table containing the produced result will be created. Name can be a fully
|
1394
|
+
qualified name or an unqualified name. If unqualified, defaults to the Feature Store database and schema
|
1395
|
+
spine_timestamp_col: Name of timestamp column in spine_df that will be used to join
|
1396
|
+
time-series features. If spine_timestamp_col is not none, the input features also must have
|
1397
|
+
timestamp_col.
|
1398
|
+
spine_label_cols: Name of column(s) in spine_df that contains labels.
|
1399
|
+
exclude_columns: Name of column(s) to exclude from the resulting training set.
|
1400
|
+
include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
|
1401
|
+
(if feature view has timestamp column) if set true. Default to false.
|
1402
|
+
|
1403
|
+
Returns:
|
1404
|
+
Returns a Snowpark DataFrame representing the training set.
|
1405
|
+
|
1406
|
+
Raises:
|
1407
|
+
SnowflakeMLException: [RuntimeError] Materialized table name already exists
|
1408
|
+
SnowflakeMLException: [RuntimeError] Failed to create materialized table.
|
1409
|
+
|
1410
|
+
Example::
|
1411
|
+
|
1412
|
+
>>> fs = FeatureStore(session, ...)
|
1413
|
+
>>> # Assume you already have feature view registered.
|
1414
|
+
>>> fv = fs.get_feature_view("MY_FV", "1")
|
1415
|
+
>>> # Spine dataframe has same join keys as the entity of fv.
|
1416
|
+
>>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
|
1417
|
+
>>> training_set = fs.generate_training_set(
|
1418
|
+
... spine_df,
|
1419
|
+
... [fv],
|
1420
|
+
... save_as="my_training_set",
|
1421
|
+
... )
|
1422
|
+
>>> print(type(training_set))
|
1423
|
+
<class 'snowflake.snowpark.table.Table'>
|
1424
|
+
<BLANKLINE>
|
1425
|
+
>>> print(training_set.queries)
|
1426
|
+
{'queries': ['SELECT * FROM (my_training_set)'], 'post_actions': []}
|
1427
|
+
|
1428
|
+
"""
|
1429
|
+
if spine_timestamp_col is not None:
|
1430
|
+
spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
|
1431
|
+
if spine_label_cols is not None:
|
1432
|
+
spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
|
1433
|
+
|
1434
|
+
result_df, join_keys = self._join_features(
|
1435
|
+
spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
|
1436
|
+
)
|
1437
|
+
|
1438
|
+
if exclude_columns is not None:
|
1439
|
+
result_df = self._exclude_columns(result_df, exclude_columns)
|
1440
|
+
|
1441
|
+
if save_as is not None:
|
1442
|
+
try:
|
1443
|
+
save_as = self._get_fully_qualified_name(save_as)
|
1444
|
+
result_df.write.mode("errorifexists").save_as_table(save_as)
|
1445
|
+
return self._session.table(save_as)
|
1446
|
+
except SnowparkSQLException as e:
|
1447
|
+
if e.sql_error_code == sql_error_codes.OBJECT_ALREADY_EXISTS:
|
1448
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1449
|
+
error_code=error_codes.OBJECT_ALREADY_EXISTS,
|
1450
|
+
original_exception=RuntimeError(str(e)),
|
1451
|
+
) from e
|
1452
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1453
|
+
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
1454
|
+
original_exception=RuntimeError(f"An error occurred during training set materialization: {e}."),
|
1455
|
+
) from e
|
1456
|
+
return result_df
|
1457
|
+
|
810
1458
|
@overload
|
811
1459
|
def generate_dataset(
|
812
1460
|
self,
|
813
1461
|
name: str,
|
814
1462
|
spine_df: DataFrame,
|
815
1463
|
features: List[Union[FeatureView, FeatureViewSlice]],
|
1464
|
+
*,
|
816
1465
|
version: Optional[str] = None,
|
817
1466
|
spine_timestamp_col: Optional[str] = None,
|
818
1467
|
spine_label_cols: Optional[List[str]] = None,
|
@@ -829,6 +1478,7 @@ class FeatureStore:
|
|
829
1478
|
name: str,
|
830
1479
|
spine_df: DataFrame,
|
831
1480
|
features: List[Union[FeatureView, FeatureViewSlice]],
|
1481
|
+
*,
|
832
1482
|
output_type: Literal["table"],
|
833
1483
|
version: Optional[str] = None,
|
834
1484
|
spine_timestamp_col: Optional[str] = None,
|
@@ -845,6 +1495,7 @@ class FeatureStore:
|
|
845
1495
|
name: str,
|
846
1496
|
spine_df: DataFrame,
|
847
1497
|
features: List[Union[FeatureView, FeatureViewSlice]],
|
1498
|
+
*,
|
848
1499
|
version: Optional[str] = None,
|
849
1500
|
spine_timestamp_col: Optional[str] = None,
|
850
1501
|
spine_label_cols: Optional[List[str]] = None,
|
@@ -859,7 +1510,7 @@ class FeatureStore:
|
|
859
1510
|
Args:
|
860
1511
|
name: The name of the Dataset to be generated. Datasets are uniquely identified within a schema
|
861
1512
|
by their name and version.
|
862
|
-
spine_df:
|
1513
|
+
spine_df: Snowpark DataFrame to join features into.
|
863
1514
|
features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
|
864
1515
|
version: The version of the Dataset to be generated. If none specified, the current timestamp
|
865
1516
|
will be used instead.
|
@@ -867,84 +1518,106 @@ class FeatureStore:
|
|
867
1518
|
time-series features. If spine_timestamp_col is not none, the input features also must have
|
868
1519
|
timestamp_col.
|
869
1520
|
spine_label_cols: Name of column(s) in spine_df that contains labels.
|
870
|
-
exclude_columns:
|
871
|
-
The underlying storage will still contain the columns.
|
1521
|
+
exclude_columns: Name of column(s) to exclude from the resulting training set.
|
872
1522
|
include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
|
873
1523
|
(if feature view has timestamp column) if set true. Default to false.
|
874
1524
|
desc: A description about this dataset.
|
875
|
-
output_type: The type of Snowflake storage to use for the generated training data.
|
1525
|
+
output_type: (Deprecated) The type of Snowflake storage to use for the generated training data.
|
876
1526
|
|
877
1527
|
Returns:
|
878
1528
|
If output_type is "dataset" (default), returns a Dataset object.
|
879
1529
|
If output_type is "table", returns a Snowpark DataFrame representing the table.
|
880
1530
|
|
881
1531
|
Raises:
|
882
|
-
SnowflakeMLException: [ValueError] Dataset name/version already exists
|
883
|
-
SnowflakeMLException: [ValueError] Snapshot creation failed.
|
884
1532
|
SnowflakeMLException: [ValueError] Invalid output_type specified.
|
885
|
-
SnowflakeMLException: [RuntimeError]
|
1533
|
+
SnowflakeMLException: [RuntimeError] Dataset name/version already exists.
|
886
1534
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
1535
|
+
|
1536
|
+
Example::
|
1537
|
+
|
1538
|
+
>>> fs = FeatureStore(session, ...)
|
1539
|
+
>>> # Assume you already have feature view registered.
|
1540
|
+
>>> fv = fs.get_feature_view("MY_FV", "1")
|
1541
|
+
>>> # Spine dataframe has same join keys as the entity of fv.
|
1542
|
+
>>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
|
1543
|
+
>>> my_dataset = fs.generate_dataset(
|
1544
|
+
... "my_dataset"
|
1545
|
+
... spine_df,
|
1546
|
+
... [fv],
|
1547
|
+
... )
|
1548
|
+
>>> # Current timestamp will be used as default version name.
|
1549
|
+
>>> # You can explicitly overwrite by setting a version.
|
1550
|
+
>>> my_dataset.list_versions()
|
1551
|
+
['2024_07_12_11_26_22']
|
1552
|
+
<BLANKLINE>
|
1553
|
+
>>> my_dataset.read.to_snowpark_dataframe().show(n=3)
|
1554
|
+
-------------------------------------------------------
|
1555
|
+
|"QUALITY" |"FIXED_ACIDITY" |"VOLATILE_ACIDITY" |
|
1556
|
+
-------------------------------------------------------
|
1557
|
+
|3 |11.600000381469727 |0.5799999833106995 |
|
1558
|
+
|3 |8.300000190734863 |1.0199999809265137 |
|
1559
|
+
|3 |7.400000095367432 |1.184999942779541 |
|
1560
|
+
-------------------------------------------------------
|
1561
|
+
|
887
1562
|
"""
|
888
1563
|
if output_type not in {"table", "dataset"}:
|
889
1564
|
raise snowml_exceptions.SnowflakeMLException(
|
890
1565
|
error_code=error_codes.INVALID_ARGUMENT,
|
891
1566
|
original_exception=ValueError(f"Invalid output_type: {output_type}."),
|
892
1567
|
)
|
893
|
-
if spine_timestamp_col is not None:
|
894
|
-
spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
|
895
|
-
if spine_label_cols is not None:
|
896
|
-
spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
|
897
|
-
|
898
|
-
result_df, join_keys = self._join_features(
|
899
|
-
spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
|
900
|
-
)
|
901
1568
|
|
902
1569
|
# Convert name to fully qualified name if not already fully qualified
|
903
|
-
|
904
|
-
name = "{}.{}.{}".format(
|
905
|
-
db_name or self._config.database,
|
906
|
-
schema_name or self._config.schema,
|
907
|
-
object_name,
|
908
|
-
)
|
1570
|
+
name = self._get_fully_qualified_name(name)
|
909
1571
|
version = version or datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
910
1572
|
|
911
|
-
if exclude_columns is not None:
|
912
|
-
result_df = self._exclude_columns(result_df, exclude_columns)
|
913
|
-
|
914
1573
|
fs_meta = FeatureStoreMetadata(
|
915
1574
|
spine_query=spine_df.queries["queries"][-1],
|
916
1575
|
serialized_feature_views=[fv.to_json() for fv in features],
|
917
1576
|
spine_timestamp_col=spine_timestamp_col,
|
918
1577
|
)
|
919
1578
|
|
1579
|
+
# Only set a save_as name if output_type is table
|
1580
|
+
table_name = f"{name}_{version}" if output_type == "table" else None
|
1581
|
+
result_df = self.generate_training_set(
|
1582
|
+
spine_df,
|
1583
|
+
features,
|
1584
|
+
spine_timestamp_col=spine_timestamp_col,
|
1585
|
+
spine_label_cols=spine_label_cols,
|
1586
|
+
exclude_columns=exclude_columns,
|
1587
|
+
include_feature_view_timestamp_col=include_feature_view_timestamp_col,
|
1588
|
+
save_as=table_name,
|
1589
|
+
)
|
1590
|
+
if output_type == "table":
|
1591
|
+
warnings.warn(
|
1592
|
+
"Generating a table from generate_dataset() is deprecated and will be removed in a future release,"
|
1593
|
+
" use generate_training_set() instead.",
|
1594
|
+
DeprecationWarning,
|
1595
|
+
stacklevel=2,
|
1596
|
+
)
|
1597
|
+
return result_df
|
1598
|
+
|
920
1599
|
try:
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
error_code=error_codes.SNOWML_CREATE_FAILED,
|
931
|
-
original_exception=RuntimeError(
|
932
|
-
"Dataset is not enabled in your account. Ask your account admin to set"
|
933
|
-
' FEATURE_DATASET=ENABLED or set output_type="table" to generate the data'
|
934
|
-
" as a Snowflake Table instead."
|
935
|
-
),
|
936
|
-
)
|
937
|
-
ds: dataset.Dataset = dataset.create_from_dataframe(
|
938
|
-
self._session,
|
939
|
-
name,
|
940
|
-
version,
|
941
|
-
input_dataframe=result_df,
|
942
|
-
exclude_cols=[spine_timestamp_col],
|
943
|
-
label_cols=spine_label_cols,
|
944
|
-
properties=fs_meta,
|
945
|
-
comment=desc,
|
1600
|
+
assert output_type == "dataset"
|
1601
|
+
if not self._is_dataset_enabled():
|
1602
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1603
|
+
error_code=error_codes.SNOWML_CREATE_FAILED,
|
1604
|
+
original_exception=RuntimeError(
|
1605
|
+
"Dataset is not enabled in your account. Ask your account admin to set"
|
1606
|
+
" FEATURE_DATASET=ENABLED or use generate_training_set() instead"
|
1607
|
+
" to generate the data as a Snowflake Table."
|
1608
|
+
),
|
946
1609
|
)
|
947
|
-
|
1610
|
+
ds: dataset.Dataset = dataset.create_from_dataframe(
|
1611
|
+
self._session,
|
1612
|
+
name,
|
1613
|
+
version,
|
1614
|
+
input_dataframe=result_df,
|
1615
|
+
exclude_cols=[spine_timestamp_col] if spine_timestamp_col is not None else [],
|
1616
|
+
label_cols=spine_label_cols,
|
1617
|
+
properties=fs_meta,
|
1618
|
+
comment=desc,
|
1619
|
+
)
|
1620
|
+
return ds
|
948
1621
|
|
949
1622
|
except dataset_errors.DatasetExistError as e:
|
950
1623
|
raise snowml_exceptions.SnowflakeMLException(
|
@@ -970,6 +1643,32 @@ class FeatureStore:
|
|
970
1643
|
|
971
1644
|
Raises:
|
972
1645
|
ValueError: if dataset object is not generated from feature store.
|
1646
|
+
|
1647
|
+
Example::
|
1648
|
+
|
1649
|
+
>>> fs = FeatureStore(session, ...)
|
1650
|
+
>>> # Assume you already have feature view registered.
|
1651
|
+
>>> fv = fs.get_feature_view("MY_FV", "1.0")
|
1652
|
+
>>> # Spine dataframe has same join keys as the entity of fv.
|
1653
|
+
>>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
|
1654
|
+
>>> my_dataset = fs.generate_dataset(
|
1655
|
+
... "my_dataset"
|
1656
|
+
... spine_df,
|
1657
|
+
... [fv],
|
1658
|
+
... )
|
1659
|
+
>>> fvs = fs.load_feature_views_from_dataset(my_dataset)
|
1660
|
+
>>> print(len(fvs))
|
1661
|
+
1
|
1662
|
+
<BLANKLINE>
|
1663
|
+
>>> print(type(fvs[0]))
|
1664
|
+
<class 'snowflake.ml.feature_store.feature_view.FeatureView'>
|
1665
|
+
<BLANKLINE>
|
1666
|
+
>>> print(fvs[0].name)
|
1667
|
+
MY_FV
|
1668
|
+
<BLANKLINE>
|
1669
|
+
>>> print(fvs[0].version)
|
1670
|
+
1.0
|
1671
|
+
|
973
1672
|
"""
|
974
1673
|
assert ds.selected_version is not None
|
975
1674
|
source_meta = ds.selected_version._get_metadata()
|
@@ -1007,11 +1706,11 @@ class FeatureStore:
|
|
1007
1706
|
if dryrun:
|
1008
1707
|
logger.info(
|
1009
1708
|
"Following feature views and entities will be deleted."
|
1010
|
-
+ " Set 'dryrun=False' to perform the actual deletion."
|
1709
|
+
+ " Set 'dryrun=False' to perform the actual deletion.",
|
1011
1710
|
)
|
1012
1711
|
logger.info(f"Total {len(all_fvs_rows)} Feature views to be deleted:")
|
1013
1712
|
all_fvs_df.show(n=len(all_fvs_rows))
|
1014
|
-
logger.info(f"\nTotal {len(all_entities_rows)}
|
1713
|
+
logger.info(f"\nTotal {len(all_entities_rows)} Entities to be deleted:")
|
1015
1714
|
all_entities_df.show(n=len(all_entities_rows))
|
1016
1715
|
return
|
1017
1716
|
|
@@ -1391,20 +2090,36 @@ class FeatureStore:
|
|
1391
2090
|
return SqlIdentifier(identifier.concat_names([_ENTITY_TAG_PREFIX, raw_name]))
|
1392
2091
|
|
1393
2092
|
def _get_fully_qualified_name(self, name: Union[SqlIdentifier, str]) -> str:
|
1394
|
-
|
2093
|
+
# Do a quick check to see if we can skip regex operations
|
2094
|
+
if "." not in name:
|
2095
|
+
return f"{self._config.full_schema_path}.{name}"
|
2096
|
+
|
2097
|
+
db_name, schema_name, object_name, _ = identifier.parse_schema_level_object_identifier(name)
|
2098
|
+
return "{}.{}.{}".format(
|
2099
|
+
db_name or self._config.database,
|
2100
|
+
schema_name or self._config.schema,
|
2101
|
+
object_name,
|
2102
|
+
)
|
1395
2103
|
|
1396
2104
|
# TODO: SHOW DYNAMIC TABLES is very slow while other show objects are fast, investigate with DT in SNOW-902804.
|
1397
2105
|
def _get_fv_backend_representations(
|
1398
2106
|
self, object_name: Optional[SqlIdentifier], prefix_match: bool = False
|
1399
|
-
) -> List[Row]:
|
1400
|
-
dynamic_table_results =
|
1401
|
-
|
2107
|
+
) -> List[Tuple[Row, _FeatureStoreObjTypes]]:
|
2108
|
+
dynamic_table_results = [
|
2109
|
+
(d, _FeatureStoreObjTypes.MANAGED_FEATURE_VIEW)
|
2110
|
+
for d in self._find_object("DYNAMIC TABLES", object_name, prefix_match)
|
2111
|
+
]
|
2112
|
+
view_results = [
|
2113
|
+
(d, _FeatureStoreObjTypes.EXTERNAL_FEATURE_VIEW)
|
2114
|
+
for d in self._find_object("VIEWS", object_name, prefix_match)
|
2115
|
+
]
|
1402
2116
|
return dynamic_table_results + view_results
|
1403
2117
|
|
1404
2118
|
def _update_feature_view_status(self, feature_view: FeatureView, operation: str) -> FeatureView:
|
1405
2119
|
assert operation in [
|
1406
2120
|
"RESUME",
|
1407
2121
|
"SUSPEND",
|
2122
|
+
"REFRESH",
|
1408
2123
|
], f"Operation: {operation} not supported"
|
1409
2124
|
if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
|
1410
2125
|
raise snowml_exceptions.SnowflakeMLException(
|
@@ -1417,9 +2132,10 @@ class FeatureStore:
|
|
1417
2132
|
self._session.sql(f"ALTER DYNAMIC TABLE {fully_qualified_name} {operation}").collect(
|
1418
2133
|
statement_params=self._telemetry_stmp
|
1419
2134
|
)
|
1420
|
-
|
1421
|
-
|
1422
|
-
|
2135
|
+
if operation != "REFRESH":
|
2136
|
+
self._session.sql(f"ALTER TASK IF EXISTS {fully_qualified_name} {operation}").collect(
|
2137
|
+
statement_params=self._telemetry_stmp
|
2138
|
+
)
|
1423
2139
|
except Exception as e:
|
1424
2140
|
raise snowml_exceptions.SnowflakeMLException(
|
1425
2141
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
@@ -1438,46 +2154,28 @@ class FeatureStore:
|
|
1438
2154
|
# TODO: this can be optimized further by directly getting all possible FVs and filter by tag
|
1439
2155
|
# it's easier to rewrite the code once we can remove the tag_reference path
|
1440
2156
|
all_fvs = self._get_fv_backend_representations(object_name=None)
|
1441
|
-
fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
|
2157
|
+
fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r, _ in all_fvs}
|
1442
2158
|
|
1443
2159
|
if len(fv_maps.keys()) == 0:
|
1444
2160
|
return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
|
1445
2161
|
|
1446
|
-
|
1447
|
-
|
1448
|
-
|
1449
|
-
|
1450
|
-
|
1451
|
-
|
1452
|
-
FROM TABLE(
|
1453
|
-
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1454
|
-
TAG_NAME => '{self._get_fully_qualified_name(self._get_entity_name(entity_name))}'
|
1455
|
-
)
|
1456
|
-
) {filter_clause}"""
|
1457
|
-
).collect(statement_params=self._telemetry_stmp)
|
1458
|
-
except Exception as e:
|
1459
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1460
|
-
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
1461
|
-
original_exception=RuntimeError(f"Failed to find feature views' by entity {entity_name}: {e}"),
|
1462
|
-
) from e
|
2162
|
+
filters = (
|
2163
|
+
[lambda d: d["entityName"].startswith(feature_view_name.resolved())] # type: ignore[union-attr]
|
2164
|
+
if feature_view_name
|
2165
|
+
else None
|
2166
|
+
)
|
2167
|
+
res = self._lookup_tagged_objects(self._get_entity_name(entity_name), filters)
|
1463
2168
|
|
1464
2169
|
output_values: List[List[Any]] = []
|
1465
2170
|
for r in res:
|
1466
|
-
row = fv_maps[SqlIdentifier(r["
|
2171
|
+
row = fv_maps[SqlIdentifier(r["entityName"], case_sensitive=True)]
|
1467
2172
|
self._extract_feature_view_info(row, output_values)
|
1468
2173
|
|
1469
2174
|
return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
|
1470
2175
|
|
1471
2176
|
def _extract_feature_view_info(self, row: Row, output_values: List[List[Any]]) -> None:
|
1472
2177
|
name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
|
1473
|
-
|
1474
|
-
if m is None:
|
1475
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1476
|
-
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
1477
|
-
original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
|
1478
|
-
)
|
1479
|
-
|
1480
|
-
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
2178
|
+
fv_metadata, _ = self._lookup_feature_view_metadata(row, FeatureView._get_physical_name(name, version))
|
1481
2179
|
|
1482
2180
|
values: List[Any] = []
|
1483
2181
|
values.append(name)
|
@@ -1488,63 +2186,42 @@ class FeatureStore:
|
|
1488
2186
|
values.append(row["owner"])
|
1489
2187
|
values.append(row["comment"])
|
1490
2188
|
values.append(fv_metadata.entities)
|
2189
|
+
values.append(row["target_lag"] if "target_lag" in row else None)
|
2190
|
+
values.append(row["refresh_mode"] if "refresh_mode" in row else None)
|
2191
|
+
values.append(row["scheduling_state"] if "scheduling_state" in row else None)
|
2192
|
+
values.append(row["warehouse"] if "warehouse" in row else None)
|
1491
2193
|
output_values.append(values)
|
1492
2194
|
|
1493
|
-
def
|
1494
|
-
if
|
1495
|
-
|
1496
|
-
|
1497
|
-
|
1498
|
-
|
1499
|
-
|
1500
|
-
|
1501
|
-
|
1502
|
-
|
1503
|
-
|
1504
|
-
|
1505
|
-
|
1506
|
-
|
1507
|
-
|
1508
|
-
|
1509
|
-
|
1510
|
-
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1514
|
-
|
1515
|
-
|
1516
|
-
)
|
1517
|
-
|
1518
|
-
|
1519
|
-
|
1520
|
-
|
1521
|
-
]
|
1522
|
-
|
1523
|
-
results = self._session.sql("\nUNION\n".join(queries)).collect(statement_params=self._telemetry_stmp)
|
1524
|
-
except Exception as e:
|
1525
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1526
|
-
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
1527
|
-
original_exception=RuntimeError(f"Failed to retrieve feature views' information: {e}"),
|
1528
|
-
) from e
|
1529
|
-
|
1530
|
-
output_values: List[List[Any]] = []
|
1531
|
-
for r in results:
|
1532
|
-
fv_metadata = _FeatureViewMetadata.from_json(r["TAG_VALUE"])
|
1533
|
-
for retrieved_entity in fv_metadata.entities:
|
1534
|
-
if entity_name == SqlIdentifier(retrieved_entity, case_sensitive=True):
|
1535
|
-
fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
|
1536
|
-
fv_name = SqlIdentifier(fv_name, case_sensitive=True)
|
1537
|
-
obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
|
1538
|
-
if feature_view_name is not None:
|
1539
|
-
if fv_name == feature_view_name:
|
1540
|
-
self._extract_feature_view_info(fv_maps[obj_name], output_values)
|
1541
|
-
else:
|
1542
|
-
continue
|
1543
|
-
else:
|
1544
|
-
self._extract_feature_view_info(fv_maps[obj_name], output_values)
|
1545
|
-
return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
|
2195
|
+
def _lookup_feature_view_metadata(self, row: Row, fv_name: str) -> Tuple[_FeatureViewMetadata, str]:
|
2196
|
+
if len(row["text"]) == 0:
|
2197
|
+
# NOTE: if this is a shared feature view, then text column will be empty due to privacy constraints.
|
2198
|
+
# So instead of looking at original query text, we will obtain metadata by querying the tag value.
|
2199
|
+
# For query body, we will just use a simple select instead of original DDL query since shared feature views
|
2200
|
+
# are read-only.
|
2201
|
+
try:
|
2202
|
+
res = self._lookup_tags(
|
2203
|
+
domain="table", obj_name=fv_name, filter_fns=[lambda d: d["tagName"] == _FEATURE_VIEW_METADATA_TAG]
|
2204
|
+
)
|
2205
|
+
fv_metadata = _FeatureViewMetadata.from_json(res[0]["tagValue"])
|
2206
|
+
query = f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}"
|
2207
|
+
return (fv_metadata, query)
|
2208
|
+
except Exception as e:
|
2209
|
+
raise snowml_exceptions.SnowflakeMLException(
|
2210
|
+
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
2211
|
+
original_exception=RuntimeError(f"Failed to extract feature_view metadata for {fv_name}: {e}."),
|
2212
|
+
)
|
2213
|
+
else:
|
2214
|
+
m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
|
2215
|
+
if m is None:
|
2216
|
+
raise snowml_exceptions.SnowflakeMLException(
|
2217
|
+
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
2218
|
+
original_exception=RuntimeError(f"Failed to parse query text for FeatureView {fv_name}: {row}."),
|
2219
|
+
)
|
2220
|
+
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
2221
|
+
query = m.group("query")
|
2222
|
+
return (fv_metadata, query)
|
1546
2223
|
|
1547
|
-
def _compose_feature_view(self, row: Row, entity_list: List[Row]) -> FeatureView:
|
2224
|
+
def _compose_feature_view(self, row: Row, obj_type: _FeatureStoreObjTypes, entity_list: List[Row]) -> FeatureView:
|
1548
2225
|
def find_and_compose_entity(name: str) -> Entity:
|
1549
2226
|
name = SqlIdentifier(name).resolved()
|
1550
2227
|
for e in entity_list:
|
@@ -1558,21 +2235,14 @@ class FeatureStore:
|
|
1558
2235
|
|
1559
2236
|
name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
|
1560
2237
|
name = SqlIdentifier(name, case_sensitive=True)
|
1561
|
-
m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
|
1562
|
-
if m is None:
|
1563
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1564
|
-
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
1565
|
-
original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
|
1566
|
-
)
|
1567
|
-
|
1568
2238
|
fv_name = FeatureView._get_physical_name(name, version)
|
2239
|
+
fv_metadata, query = self._lookup_feature_view_metadata(row, fv_name)
|
2240
|
+
|
1569
2241
|
infer_schema_df = self._session.sql(f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}")
|
2242
|
+
desc = row["comment"]
|
1570
2243
|
|
1571
|
-
if
|
1572
|
-
query = m.group("query")
|
2244
|
+
if obj_type == _FeatureStoreObjTypes.MANAGED_FEATURE_VIEW:
|
1573
2245
|
df = self._session.sql(query)
|
1574
|
-
desc = m.group("comment")
|
1575
|
-
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
1576
2246
|
entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
|
1577
2247
|
ts_col = fv_metadata.timestamp_col
|
1578
2248
|
timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
|
@@ -1584,23 +2254,25 @@ class FeatureStore:
|
|
1584
2254
|
timestamp_col=timestamp_col,
|
1585
2255
|
desc=desc,
|
1586
2256
|
version=version,
|
1587
|
-
status=FeatureViewStatus(row["scheduling_state"])
|
2257
|
+
status=FeatureViewStatus(row["scheduling_state"])
|
2258
|
+
if len(row["scheduling_state"]) > 0
|
2259
|
+
else FeatureViewStatus.MASKED,
|
1588
2260
|
feature_descs=self._fetch_column_descs("DYNAMIC TABLE", fv_name),
|
1589
2261
|
refresh_freq=row["target_lag"],
|
1590
2262
|
database=self._config.database.identifier(),
|
1591
2263
|
schema=self._config.schema.identifier(),
|
1592
|
-
warehouse=SqlIdentifier(row["warehouse"], case_sensitive=True).identifier()
|
2264
|
+
warehouse=SqlIdentifier(row["warehouse"], case_sensitive=True).identifier()
|
2265
|
+
if len(row["warehouse"]) > 0
|
2266
|
+
else None,
|
1593
2267
|
refresh_mode=row["refresh_mode"],
|
1594
2268
|
refresh_mode_reason=row["refresh_mode_reason"],
|
1595
2269
|
owner=row["owner"],
|
1596
2270
|
infer_schema_df=infer_schema_df,
|
2271
|
+
session=self._session,
|
1597
2272
|
)
|
1598
2273
|
return fv
|
1599
2274
|
else:
|
1600
|
-
query = m.group("query")
|
1601
2275
|
df = self._session.sql(query)
|
1602
|
-
desc = m.group("comment")
|
1603
|
-
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
1604
2276
|
entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
|
1605
2277
|
ts_col = fv_metadata.timestamp_col
|
1606
2278
|
timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
|
@@ -1622,6 +2294,7 @@ class FeatureStore:
|
|
1622
2294
|
refresh_mode_reason=None,
|
1623
2295
|
owner=row["owner"],
|
1624
2296
|
infer_schema_df=infer_schema_df,
|
2297
|
+
session=self._session,
|
1625
2298
|
)
|
1626
2299
|
return fv
|
1627
2300
|
|
@@ -1675,42 +2348,10 @@ class FeatureStore:
|
|
1675
2348
|
)
|
1676
2349
|
# There could be none-FS objects under FS schema, thus filter on objects with FS special tag.
|
1677
2350
|
if object_type not in tag_free_object_types and len(all_rows) > 0:
|
1678
|
-
|
1679
|
-
|
1680
|
-
|
1681
|
-
|
1682
|
-
OBJECT_NAME
|
1683
|
-
FROM TABLE(
|
1684
|
-
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1685
|
-
TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
|
1686
|
-
)
|
1687
|
-
)
|
1688
|
-
WHERE DOMAIN='{obj_domain}'
|
1689
|
-
"""
|
1690
|
-
).collect(statement_params=self._telemetry_stmp)
|
1691
|
-
else:
|
1692
|
-
# TODO: remove this after tag_ref_internal rollout
|
1693
|
-
# Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
|
1694
|
-
# use double quotes to make it case-sensitive.
|
1695
|
-
queries = [
|
1696
|
-
f"""
|
1697
|
-
SELECT OBJECT_NAME
|
1698
|
-
FROM TABLE(
|
1699
|
-
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
|
1700
|
-
'{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
|
1701
|
-
'{obj_domain}'
|
1702
|
-
)
|
1703
|
-
)
|
1704
|
-
WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
|
1705
|
-
AND TAG_SCHEMA = '{self._config.schema.resolved()}'
|
1706
|
-
"""
|
1707
|
-
for row in all_rows
|
1708
|
-
]
|
1709
|
-
fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
|
1710
|
-
statement_params=self._telemetry_stmp
|
1711
|
-
)
|
1712
|
-
|
1713
|
-
fs_tag_objects = [row["OBJECT_NAME"] for row in fs_obj_rows]
|
2351
|
+
fs_obj_rows = self._lookup_tagged_objects(
|
2352
|
+
_FEATURE_STORE_OBJECT_TAG, [lambda d: d["domain"] == obj_domain]
|
2353
|
+
)
|
2354
|
+
fs_tag_objects = [row["entityName"] for row in fs_obj_rows]
|
1714
2355
|
except Exception as e:
|
1715
2356
|
raise snowml_exceptions.SnowflakeMLException(
|
1716
2357
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
@@ -1756,21 +2397,6 @@ class FeatureStore:
|
|
1756
2397
|
)
|
1757
2398
|
return cast(DataFrame, df.drop(exclude_columns))
|
1758
2399
|
|
1759
|
-
def _tag_ref_internal_enabled(self) -> bool:
|
1760
|
-
try:
|
1761
|
-
self._session.sql(
|
1762
|
-
f"""
|
1763
|
-
SELECT * FROM TABLE(
|
1764
|
-
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1765
|
-
TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
|
1766
|
-
)
|
1767
|
-
) LIMIT 1;
|
1768
|
-
"""
|
1769
|
-
).collect()
|
1770
|
-
return True
|
1771
|
-
except Exception:
|
1772
|
-
return False
|
1773
|
-
|
1774
2400
|
def _is_dataset_enabled(self) -> bool:
|
1775
2401
|
try:
|
1776
2402
|
self._session.sql(f"SHOW DATASETS IN SCHEMA {self._config.full_schema_path}").collect()
|
@@ -1790,21 +2416,98 @@ class FeatureStore:
|
|
1790
2416
|
category=UserWarning,
|
1791
2417
|
)
|
1792
2418
|
|
1793
|
-
def
|
1794
|
-
|
1795
|
-
|
2419
|
+
def _filter_results(
|
2420
|
+
self, results: List[Dict[str, str]], filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
|
2421
|
+
) -> List[Dict[str, str]]:
|
2422
|
+
if filter_fns is None:
|
2423
|
+
return results
|
1796
2424
|
|
1797
|
-
|
1798
|
-
|
1799
|
-
|
1800
|
-
|
1801
|
-
|
1802
|
-
|
1803
|
-
|
1804
|
-
|
2425
|
+
filtered_results = []
|
2426
|
+
for r in results:
|
2427
|
+
if all([fn(r) for fn in filter_fns]):
|
2428
|
+
filtered_results.append(r)
|
2429
|
+
return filtered_results
|
2430
|
+
|
2431
|
+
def _lookup_tags(
|
2432
|
+
self, domain: str, obj_name: str, filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
|
2433
|
+
) -> List[Dict[str, str]]:
|
1805
2434
|
"""
|
2435
|
+
Lookup tag values for a given object, optionally apply filters on the results.
|
2436
|
+
|
2437
|
+
Args:
|
2438
|
+
domain: Domain of the obj to look for tag. E.g. table
|
2439
|
+
obj_name: Name of the obj.
|
2440
|
+
filter_fns: List of filter functions applied on the results.
|
2441
|
+
|
2442
|
+
Returns:
|
2443
|
+
List of tag values in dictionary format.
|
2444
|
+
|
2445
|
+
Raises:
|
2446
|
+
SnowflakeMLException: [RuntimeError] Failed to lookup tags.
|
2447
|
+
|
2448
|
+
Example::
|
2449
|
+
|
2450
|
+
self._lookup_tags("TABLE", "MY_FV", [lambda d: d["tagName"] == "TARGET_TAG_NAME"])
|
2451
|
+
|
2452
|
+
"""
|
2453
|
+
# NOTE: use ENTITY_DETAIL system fn to query tags for given object for it to work in
|
2454
|
+
# processes using owner's right. e.g. Streamlit, or stored procedure
|
1806
2455
|
try:
|
1807
|
-
res = self._session.sql(
|
2456
|
+
res = self._session.sql(
|
2457
|
+
f"""
|
2458
|
+
SELECT ENTITY_DETAIL('{domain}','{self._get_fully_qualified_name(obj_name)}', '["TAG_REFERENCES"]');
|
2459
|
+
"""
|
2460
|
+
).collect(statement_params=self._telemetry_stmp)
|
2461
|
+
entity_detail = json.loads(res[0][0])
|
2462
|
+
results = entity_detail["tagReferencesInfo"]["tagReferenceList"]
|
2463
|
+
return self._filter_results(results, filter_fns)
|
2464
|
+
except Exception as e:
|
2465
|
+
raise snowml_exceptions.SnowflakeMLException(
|
2466
|
+
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
2467
|
+
original_exception=RuntimeError(f"Failed to lookup tags for object for {obj_name}: {e}"),
|
2468
|
+
) from e
|
2469
|
+
|
2470
|
+
def _lookup_tagged_objects(
|
2471
|
+
self, tag_name: str, filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
|
2472
|
+
) -> List[Dict[str, str]]:
|
2473
|
+
"""
|
2474
|
+
Lookup objects based on specified tag name, optionally apply filters on the results.
|
2475
|
+
|
2476
|
+
Args:
|
2477
|
+
tag_name: Name of the tag.
|
2478
|
+
filter_fns: List of filter functions applied on the results.
|
2479
|
+
|
2480
|
+
Returns:
|
2481
|
+
List of objects in dictionary format.
|
2482
|
+
|
2483
|
+
Raises:
|
2484
|
+
SnowflakeMLException: [RuntimeError] Failed to lookup tagged objects.
|
2485
|
+
|
2486
|
+
Example::
|
2487
|
+
|
2488
|
+
self._lookup_tagged_objects("TARGET_TAG_NAME", [lambda d: d["entityName"] == "MY_FV"])
|
2489
|
+
|
2490
|
+
"""
|
2491
|
+
# NOTE: use ENTITY_DETAIL system fn to query objects from tag for it to work in
|
2492
|
+
# processes using owner's right. e.g. Streamlit, or stored procedure
|
2493
|
+
try:
|
2494
|
+
res = self._session.sql(
|
2495
|
+
f"""
|
2496
|
+
SELECT ENTITY_DETAIL('TAG','{self._get_fully_qualified_name(tag_name)}', '["TAG_REFERENCES_INTERNAL"]');
|
2497
|
+
"""
|
2498
|
+
).collect(statement_params=self._telemetry_stmp)
|
2499
|
+
entity_detail = json.loads(res[0][0])
|
2500
|
+
results = entity_detail["referencedEntities"]["tagReferenceList"]
|
2501
|
+
return self._filter_results(results, filter_fns)
|
2502
|
+
except Exception as e:
|
2503
|
+
raise snowml_exceptions.SnowflakeMLException(
|
2504
|
+
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
2505
|
+
original_exception=RuntimeError(f"Failed to lookup tagged objects for {tag_name}: {e}"),
|
2506
|
+
) from e
|
2507
|
+
|
2508
|
+
def _collapse_object_versions(self) -> List[pkg_version.Version]:
|
2509
|
+
try:
|
2510
|
+
res = self._lookup_tagged_objects(_FEATURE_STORE_OBJECT_TAG)
|
1808
2511
|
except Exception:
|
1809
2512
|
# since this is a best effort user warning to upgrade pkg versions
|
1810
2513
|
# we are treating failures as benign error
|
@@ -1812,7 +2515,7 @@ class FeatureStore:
|
|
1812
2515
|
versions = set()
|
1813
2516
|
compatibility_breakage_detected = False
|
1814
2517
|
for r in res:
|
1815
|
-
info = _FeatureStoreObjInfo.from_json(r["
|
2518
|
+
info = _FeatureStoreObjInfo.from_json(r["tagValue"])
|
1816
2519
|
if info.type == _FeatureStoreObjTypes.UNKNOWN:
|
1817
2520
|
compatibility_breakage_detected = True
|
1818
2521
|
versions.add(pkg_version.parse(info.pkg_version))
|
@@ -1827,3 +2530,23 @@ class FeatureStore:
|
|
1827
2530
|
),
|
1828
2531
|
)
|
1829
2532
|
return sorted_versions
|
2533
|
+
|
2534
|
+
def _validate_feature_view_name_and_version_input(
|
2535
|
+
self, feature_view: Union[FeatureView, str], version: Optional[str] = None
|
2536
|
+
) -> FeatureView:
|
2537
|
+
if isinstance(feature_view, str):
|
2538
|
+
if version is None:
|
2539
|
+
raise snowml_exceptions.SnowflakeMLException(
|
2540
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
2541
|
+
original_exception=ValueError("Version must be provided when argument feature_view is a str."),
|
2542
|
+
)
|
2543
|
+
feature_view = self.get_feature_view(feature_view, version)
|
2544
|
+
elif not isinstance(feature_view, FeatureView):
|
2545
|
+
raise snowml_exceptions.SnowflakeMLException(
|
2546
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
2547
|
+
original_exception=ValueError(
|
2548
|
+
"Invalid type of argument feature_view. It must be either str or FeatureView type."
|
2549
|
+
),
|
2550
|
+
)
|
2551
|
+
|
2552
|
+
return feature_view
|