snowflake-ml-python 1.5.4__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +2 -0
- snowflake/cortex/_classify_text.py +36 -0
- snowflake/cortex/_complete.py +66 -35
- snowflake/cortex/_util.py +4 -4
- snowflake/ml/_internal/env_utils.py +11 -5
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +4 -1
- snowflake/ml/_internal/lineage/lineage_utils.py +4 -4
- snowflake/ml/_internal/telemetry.py +26 -2
- snowflake/ml/_internal/utils/pkg_version_utils.py +8 -22
- snowflake/ml/data/_internal/arrow_ingestor.py +284 -0
- snowflake/ml/data/data_connector.py +186 -0
- snowflake/ml/data/data_ingestor.py +45 -0
- snowflake/ml/data/data_source.py +23 -0
- snowflake/ml/data/ingestor_utils.py +62 -0
- snowflake/ml/data/torch_dataset.py +33 -0
- snowflake/ml/dataset/dataset.py +1 -13
- snowflake/ml/dataset/dataset_metadata.py +3 -1
- snowflake/ml/dataset/dataset_reader.py +23 -117
- snowflake/ml/feature_store/access_manager.py +7 -1
- snowflake/ml/feature_store/entity.py +19 -2
- snowflake/ml/feature_store/examples/airline_features/entities.py +16 -0
- snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +31 -0
- snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +42 -0
- snowflake/ml/feature_store/examples/airline_features/source.yaml +7 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +20 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +37 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +30 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +7 -0
- snowflake/ml/feature_store/examples/example_helper.py +278 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +12 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/location_features.py +44 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +36 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +9 -0
- snowflake/ml/feature_store/examples/source_data/airline.yaml +4 -0
- snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +36 -0
- snowflake/ml/feature_store/examples/source_data/fraud_transactions.yaml +29 -0
- snowflake/ml/feature_store/examples/source_data/nyc_yellow_trips.yaml +4 -0
- snowflake/ml/feature_store/examples/source_data/winequality_red.yaml +32 -0
- snowflake/ml/feature_store/examples/wine_quality_features/entities.py +14 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +36 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +24 -0
- snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +8 -0
- snowflake/ml/feature_store/feature_store.py +637 -76
- snowflake/ml/feature_store/feature_view.py +316 -9
- snowflake/ml/fileset/stage_fs.py +18 -10
- snowflake/ml/lineage/lineage_node.py +1 -1
- snowflake/ml/model/_client/model/model_impl.py +11 -2
- snowflake/ml/model/_client/model/model_version_impl.py +171 -20
- snowflake/ml/model/_client/ops/model_ops.py +105 -27
- snowflake/ml/model/_client/ops/service_ops.py +121 -0
- snowflake/ml/model/_client/service/model_deployment_spec.py +95 -0
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +31 -0
- snowflake/ml/model/_client/sql/model_version.py +13 -4
- snowflake/ml/model/_client/sql/service.py +129 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +2 -3
- snowflake/ml/model/_model_composer/model_composer.py +14 -14
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +33 -17
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +5 -1
- snowflake/ml/model/_model_composer/model_method/function_generator.py +3 -3
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +3 -32
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +3 -27
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -32
- snowflake/ml/model/_model_composer/model_method/model_method.py +5 -2
- snowflake/ml/model/_packager/model_env/model_env.py +7 -2
- snowflake/ml/model/_packager/model_handlers/_base.py +30 -3
- snowflake/ml/model/_packager/model_handlers/_utils.py +58 -1
- snowflake/ml/model/_packager/model_handlers/catboost.py +52 -3
- snowflake/ml/model/_packager/model_handlers/custom.py +6 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +9 -5
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +80 -3
- snowflake/ml/model/_packager/model_handlers/llm.py +7 -3
- snowflake/ml/model/_packager/model_handlers/mlflow.py +8 -3
- snowflake/ml/model/_packager/model_handlers/pytorch.py +8 -3
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -3
- snowflake/ml/model/_packager/model_handlers/sklearn.py +87 -4
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +7 -2
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +9 -4
- snowflake/ml/model/_packager/model_handlers/torchscript.py +8 -3
- snowflake/ml/model/_packager/model_handlers/xgboost.py +71 -3
- snowflake/ml/model/_packager/model_meta/model_meta.py +32 -2
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +19 -0
- snowflake/ml/model/_packager/model_packager.py +2 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +7 -7
- snowflake/ml/model/model_signature.py +4 -4
- snowflake/ml/model/type_hints.py +2 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +1 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +13 -1
- snowflake/ml/modeling/framework/base.py +28 -19
- snowflake/ml/modeling/impute/simple_imputer.py +26 -0
- snowflake/ml/modeling/pipeline/pipeline.py +7 -4
- snowflake/ml/registry/_manager/model_manager.py +16 -2
- snowflake/ml/registry/registry.py +100 -13
- snowflake/ml/utils/sql_client.py +22 -0
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/METADATA +81 -2
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/RECORD +99 -66
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/lineage/data_source.py +0 -10
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/top_level.txt +0 -0
@@ -52,6 +52,7 @@ from snowflake.ml.feature_store.feature_view import (
|
|
52
52
|
FeatureViewVersion,
|
53
53
|
_FeatureViewMetadata,
|
54
54
|
)
|
55
|
+
from snowflake.ml.utils import sql_client
|
55
56
|
from snowflake.snowpark import DataFrame, Row, Session, functions as F
|
56
57
|
from snowflake.snowpark.exceptions import SnowparkSQLException
|
57
58
|
from snowflake.snowpark.types import (
|
@@ -94,13 +95,12 @@ class _FeatureStoreObjInfo:
|
|
94
95
|
return cls(**state_dict) # type: ignore[arg-type]
|
95
96
|
|
96
97
|
|
97
|
-
# TODO: remove "" after dataset is updated
|
98
98
|
class _FeatureStoreObjTypes(Enum):
|
99
99
|
UNKNOWN = "UNKNOWN" # for forward compatibility
|
100
100
|
MANAGED_FEATURE_VIEW = "MANAGED_FEATURE_VIEW"
|
101
101
|
EXTERNAL_FEATURE_VIEW = "EXTERNAL_FEATURE_VIEW"
|
102
102
|
FEATURE_VIEW_REFRESH_TASK = "FEATURE_VIEW_REFRESH_TASK"
|
103
|
-
TRAINING_DATA = ""
|
103
|
+
TRAINING_DATA = "TRAINING_DATA"
|
104
104
|
|
105
105
|
@classmethod
|
106
106
|
def parse(cls, val: str) -> _FeatureStoreObjTypes:
|
@@ -135,13 +135,13 @@ _LIST_FEATURE_VIEW_SCHEMA = StructType(
|
|
135
135
|
StructField("refresh_freq", StringType()),
|
136
136
|
StructField("refresh_mode", StringType()),
|
137
137
|
StructField("scheduling_state", StringType()),
|
138
|
+
StructField("warehouse", StringType()),
|
138
139
|
]
|
139
140
|
)
|
140
141
|
|
141
142
|
|
142
|
-
|
143
|
-
|
144
|
-
CREATE_IF_NOT_EXIST = 2
|
143
|
+
CreationMode = sql_client.CreationOption
|
144
|
+
CreationMode.__module__ = __name__
|
145
145
|
|
146
146
|
|
147
147
|
@dataclass(frozen=True)
|
@@ -205,6 +205,7 @@ class FeatureStore:
|
|
205
205
|
database: str,
|
206
206
|
name: str,
|
207
207
|
default_warehouse: str,
|
208
|
+
*,
|
208
209
|
creation_mode: CreationMode = CreationMode.FAIL_IF_NOT_EXIST,
|
209
210
|
) -> None:
|
210
211
|
"""
|
@@ -224,6 +225,32 @@ class FeatureStore:
|
|
224
225
|
SnowflakeMLException: [ValueError] Required resources not exist when mode is FAIL_IF_NOT_EXIST.
|
225
226
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
226
227
|
SnowflakeMLException: [RuntimeError] Failed to create feature store.
|
228
|
+
|
229
|
+
Example::
|
230
|
+
|
231
|
+
>>> from snowflake.ml.feature_store import (
|
232
|
+
... FeatureStore,
|
233
|
+
... CreationMode,
|
234
|
+
... )
|
235
|
+
<BLANKLINE>
|
236
|
+
>>> # Create a new Feature Store:
|
237
|
+
>>> fs = FeatureStore(
|
238
|
+
... session=session,
|
239
|
+
... database="MYDB",
|
240
|
+
... name="MYSCHEMA",
|
241
|
+
... default_warehouse="MYWH",
|
242
|
+
... creation_mode=CreationMode.CREATE_IF_NOT_EXIST
|
243
|
+
... )
|
244
|
+
<BLANKLINE>
|
245
|
+
>>> # Connect to an existing Feature Store:
|
246
|
+
>>> fs = FeatureStore(
|
247
|
+
... session=session,
|
248
|
+
... database="MYDB",
|
249
|
+
... name="MYSCHEMA",
|
250
|
+
... default_warehouse="MYWH",
|
251
|
+
... creation_mode=CreationMode.FAIL_IF_NOT_EXIST
|
252
|
+
... )
|
253
|
+
|
227
254
|
"""
|
228
255
|
|
229
256
|
database = SqlIdentifier(database)
|
@@ -284,6 +311,16 @@ class FeatureStore:
|
|
284
311
|
|
285
312
|
Raises:
|
286
313
|
SnowflakeMLException: If warehouse does not exists.
|
314
|
+
|
315
|
+
Example::
|
316
|
+
|
317
|
+
>>> fs = FeatureStore(...)
|
318
|
+
>>> fs.update_default_warehouse("MYWH_2")
|
319
|
+
>>> draft_fv = FeatureView("my_fv", ...)
|
320
|
+
>>> registered_fv = fs.register_feature_view(draft_fv, '2.0')
|
321
|
+
>>> print(registered_fv.warehouse)
|
322
|
+
MYWH_2
|
323
|
+
|
287
324
|
"""
|
288
325
|
warehouse = SqlIdentifier(warehouse_name)
|
289
326
|
warehouse_result = self._find_object("WAREHOUSES", warehouse)
|
@@ -301,15 +338,27 @@ class FeatureStore:
|
|
301
338
|
Register Entity in the FeatureStore.
|
302
339
|
|
303
340
|
Args:
|
304
|
-
entity: Entity object to
|
341
|
+
entity: Entity object to be registered.
|
305
342
|
|
306
343
|
Returns:
|
307
344
|
A registered entity object.
|
308
345
|
|
309
346
|
Raises:
|
310
347
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
311
|
-
"""
|
312
348
|
|
349
|
+
Example::
|
350
|
+
|
351
|
+
>>> fs = FeatureStore(...)
|
352
|
+
>>> e = Entity('BAR', ['A'], desc='entity bar')
|
353
|
+
>>> fs.register_entity(e)
|
354
|
+
>>> fs.list_entities().show()
|
355
|
+
--------------------------------------------------
|
356
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
357
|
+
--------------------------------------------------
|
358
|
+
|BAR |["A"] |entity bar |REGTEST_RL |
|
359
|
+
--------------------------------------------------
|
360
|
+
|
361
|
+
"""
|
313
362
|
tag_name = self._get_entity_name(entity.name)
|
314
363
|
found_rows = self._find_object("TAGS", tag_name)
|
315
364
|
if len(found_rows) > 0:
|
@@ -341,12 +390,76 @@ class FeatureStore:
|
|
341
390
|
|
342
391
|
return self.get_entity(entity.name)
|
343
392
|
|
393
|
+
def update_entity(self, name: str, *, desc: Optional[str] = None) -> Optional[Entity]:
|
394
|
+
"""Update a registered entity with provided information.
|
395
|
+
|
396
|
+
Args:
|
397
|
+
name: Name of entity to update.
|
398
|
+
desc: Optional new description to apply. Default to None.
|
399
|
+
|
400
|
+
Raises:
|
401
|
+
SnowflakeMLException: Error happen when updating.
|
402
|
+
|
403
|
+
Returns:
|
404
|
+
A new entity with updated information or None if the entity doesn't exist.
|
405
|
+
|
406
|
+
Example::
|
407
|
+
|
408
|
+
>>> fs = FeatureStore(...)
|
409
|
+
<BLANKLINE>
|
410
|
+
>>> e = Entity(name='foo', join_keys=['COL_1'], desc='old desc')
|
411
|
+
>>> fs.list_entities().show()
|
412
|
+
------------------------------------------------
|
413
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
414
|
+
------------------------------------------------
|
415
|
+
|FOO |["COL_1"] |old desc |REGTEST_RL |
|
416
|
+
------------------------------------------------
|
417
|
+
<BLANKLINE>
|
418
|
+
>>> fs.update_entity('foo', desc='NEW DESC')
|
419
|
+
>>> fs.list_entities().show()
|
420
|
+
------------------------------------------------
|
421
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
422
|
+
------------------------------------------------
|
423
|
+
|FOO |["COL_1"] |NEW DESC |REGTEST_RL |
|
424
|
+
------------------------------------------------
|
425
|
+
|
426
|
+
"""
|
427
|
+
name = SqlIdentifier(name)
|
428
|
+
found_rows = (
|
429
|
+
self.list_entities().filter(F.col("NAME") == name.resolved()).collect(statement_params=self._telemetry_stmp)
|
430
|
+
)
|
431
|
+
|
432
|
+
if len(found_rows) == 0:
|
433
|
+
warnings.warn(
|
434
|
+
f"Entity {name} does not exist.",
|
435
|
+
stacklevel=2,
|
436
|
+
category=UserWarning,
|
437
|
+
)
|
438
|
+
return None
|
439
|
+
|
440
|
+
new_desc = desc if desc is not None else found_rows[0]["DESC"]
|
441
|
+
|
442
|
+
try:
|
443
|
+
full_name = f"{self._config.full_schema_path}.{self._get_entity_name(name)}"
|
444
|
+
self._session.sql(f"ALTER TAG {full_name} SET COMMENT = '{new_desc}'").collect(
|
445
|
+
statement_params=self._telemetry_stmp
|
446
|
+
)
|
447
|
+
except Exception as e:
|
448
|
+
raise snowml_exceptions.SnowflakeMLException(
|
449
|
+
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
450
|
+
original_exception=RuntimeError(f"Failed to update entity `{name}`: {e}."),
|
451
|
+
) from e
|
452
|
+
|
453
|
+
logger.info(f"Successfully updated Entity {name}.")
|
454
|
+
return self.get_entity(name)
|
455
|
+
|
344
456
|
# TODO: add support to update column desc once SNOW-894249 is fixed
|
345
457
|
@dispatch_decorator()
|
346
458
|
def register_feature_view(
|
347
459
|
self,
|
348
460
|
feature_view: FeatureView,
|
349
461
|
version: str,
|
462
|
+
*,
|
350
463
|
block: bool = True,
|
351
464
|
overwrite: bool = False,
|
352
465
|
) -> FeatureView:
|
@@ -356,12 +469,6 @@ class FeatureStore:
|
|
356
469
|
NOTE: Each new materialization will trigger a full FeatureView history refresh for the data included in the
|
357
470
|
FeatureView.
|
358
471
|
|
359
|
-
Examples:
|
360
|
-
...
|
361
|
-
draft_fv = FeatureView(name="my_fv", entities=[entities], feature_df)
|
362
|
-
registered_fv = fs.register_feature_view(feature_view=draft_fv, version="v1")
|
363
|
-
...
|
364
|
-
|
365
472
|
Args:
|
366
473
|
feature_view: FeatureView instance to materialize.
|
367
474
|
version: version of the registered FeatureView.
|
@@ -380,6 +487,35 @@ class FeatureStore:
|
|
380
487
|
SnowflakeMLException: [ValueError] Warehouse or default warehouse is not specified.
|
381
488
|
SnowflakeMLException: [RuntimeError] Failed to create dynamic table, task, or view.
|
382
489
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
490
|
+
|
491
|
+
Example::
|
492
|
+
|
493
|
+
>>> fs = FeatureStore(...)
|
494
|
+
>>> # draft_fv is a local object that hasn't materiaized to Snowflake backend yet.
|
495
|
+
>>> feature_df = session.sql("select f_1, f_2 from source_table")
|
496
|
+
>>> draft_fv = FeatureView("my_fv", [entities], feature_df)
|
497
|
+
>>> print(draft_fv.status)
|
498
|
+
FeatureViewStatus.DRAFT
|
499
|
+
<BLANKLINE>
|
500
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
501
|
+
-------------------------------------------
|
502
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
503
|
+
-------------------------------------------
|
504
|
+
| | | |
|
505
|
+
-------------------------------------------
|
506
|
+
<BLANKLINE>
|
507
|
+
>>> # registered_fv is a local object that maps to a Snowflake backend object.
|
508
|
+
>>> registered_fv = fs.register_feature_view(draft_fv, "v1")
|
509
|
+
>>> print(registered_fv.status)
|
510
|
+
FeatureViewStatus.ACTIVE
|
511
|
+
<BLANKLINE>
|
512
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
513
|
+
-------------------------------------------
|
514
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
515
|
+
-------------------------------------------
|
516
|
+
|MY_FV |v1 |ACTIVE |
|
517
|
+
-------------------------------------------
|
518
|
+
|
383
519
|
"""
|
384
520
|
version = FeatureViewVersion(version)
|
385
521
|
|
@@ -444,7 +580,7 @@ class FeatureStore:
|
|
444
580
|
column_descs,
|
445
581
|
tagging_clause_str,
|
446
582
|
schedule_task,
|
447
|
-
self._default_warehouse,
|
583
|
+
feature_view.warehouse if feature_view.warehouse is not None else self._default_warehouse,
|
448
584
|
block,
|
449
585
|
overwrite,
|
450
586
|
)
|
@@ -473,6 +609,7 @@ class FeatureStore:
|
|
473
609
|
self,
|
474
610
|
name: str,
|
475
611
|
version: str,
|
612
|
+
*,
|
476
613
|
refresh_freq: Optional[str] = None,
|
477
614
|
warehouse: Optional[str] = None,
|
478
615
|
desc: Optional[str] = None,
|
@@ -492,27 +629,33 @@ class FeatureStore:
|
|
492
629
|
|
493
630
|
Example::
|
494
631
|
|
495
|
-
>>> fs = FeatureStore(
|
496
|
-
... ...,
|
497
|
-
... default_warehouse='ORIGINAL_WH',
|
498
|
-
... )
|
632
|
+
>>> fs = FeatureStore(...)
|
499
633
|
>>> fv = FeatureView(
|
500
634
|
... name='foo',
|
501
635
|
... entities=[e1, e2],
|
502
636
|
... feature_df=session.sql('...'),
|
503
|
-
...
|
504
|
-
... refresh_freq='1d',
|
505
|
-
... desc='this is old description'
|
637
|
+
... desc='this is old description',
|
506
638
|
... )
|
507
639
|
>>> fv = fs.register_feature_view(feature_view=fv, version='v1')
|
640
|
+
>>> fs.list_feature_views().select("name", "version", "desc").show()
|
641
|
+
------------------------------------------------
|
642
|
+
|"NAME" |"VERSION" |"DESC" |
|
643
|
+
------------------------------------------------
|
644
|
+
|FOO |v1 |this is old description |
|
645
|
+
------------------------------------------------
|
646
|
+
<BLANKLINE>
|
508
647
|
>>> # update_feature_view will apply new arguments to the registered feature view.
|
509
648
|
>>> new_fv = fs.update_feature_view(
|
510
649
|
... name='foo',
|
511
650
|
... version='v1',
|
512
|
-
... refresh_freq='2d',
|
513
|
-
... warehouse='MY_NEW_WH',
|
514
651
|
... desc='that is new descption',
|
515
652
|
... )
|
653
|
+
>>> fs.list_feature_views().select("name", "version", "desc").show()
|
654
|
+
------------------------------------------------
|
655
|
+
|"NAME" |"VERSION" |"DESC" |
|
656
|
+
------------------------------------------------
|
657
|
+
|FOO |v1 |THAT IS NEW DESCRIPTION |
|
658
|
+
------------------------------------------------
|
516
659
|
|
517
660
|
Raises:
|
518
661
|
SnowflakeMLException: [RuntimeError] If FeatureView is not managed and refresh_freq is defined.
|
@@ -555,20 +698,56 @@ class FeatureStore:
|
|
555
698
|
) from e
|
556
699
|
return self.get_feature_view(name=name, version=version)
|
557
700
|
|
558
|
-
@
|
701
|
+
@overload
|
702
|
+
def read_feature_view(self, feature_view: str, version: str) -> DataFrame:
|
703
|
+
...
|
704
|
+
|
705
|
+
@overload
|
559
706
|
def read_feature_view(self, feature_view: FeatureView) -> DataFrame:
|
707
|
+
...
|
708
|
+
|
709
|
+
@dispatch_decorator() # type: ignore[misc]
|
710
|
+
def read_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> DataFrame:
|
560
711
|
"""
|
561
|
-
Read FeatureView
|
712
|
+
Read values from a FeatureView.
|
562
713
|
|
563
714
|
Args:
|
564
|
-
feature_view: FeatureView to
|
715
|
+
feature_view: A FeatureView object to read from, or the name of feature view.
|
716
|
+
If name is provided then version also must be provided.
|
717
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
565
718
|
|
566
719
|
Returns:
|
567
720
|
Snowpark DataFrame(lazy mode) containing the FeatureView data.
|
568
721
|
|
569
722
|
Raises:
|
723
|
+
SnowflakeMLException: [ValueError] version argument is missing when argument feature_view is a str.
|
570
724
|
SnowflakeMLException: [ValueError] FeatureView is not registered.
|
725
|
+
|
726
|
+
Example::
|
727
|
+
|
728
|
+
>>> fs = FeatureStore(...)
|
729
|
+
>>> # Read from feature view name and version.
|
730
|
+
>>> fs.read_feature_view('foo', 'v1').show()
|
731
|
+
------------------------------------------
|
732
|
+
|"NAME" |"ID" |"TITLE" |"AGE" |"TS" |
|
733
|
+
------------------------------------------
|
734
|
+
|jonh |1 |boss |20 |100 |
|
735
|
+
|porter |2 |manager |30 |200 |
|
736
|
+
------------------------------------------
|
737
|
+
<BLANKLINE>
|
738
|
+
>>> # Read from feature view object.
|
739
|
+
>>> fv = fs.get_feature_view('foo', 'v1')
|
740
|
+
>>> fs.read_feature_view(fv).show()
|
741
|
+
------------------------------------------
|
742
|
+
|"NAME" |"ID" |"TITLE" |"AGE" |"TS" |
|
743
|
+
------------------------------------------
|
744
|
+
|jonh |1 |boss |20 |100 |
|
745
|
+
|porter |2 |manager |30 |200 |
|
746
|
+
------------------------------------------
|
747
|
+
|
571
748
|
"""
|
749
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
750
|
+
|
572
751
|
if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
|
573
752
|
raise snowml_exceptions.SnowflakeMLException(
|
574
753
|
error_code=error_codes.NOT_FOUND,
|
@@ -580,6 +759,7 @@ class FeatureStore:
|
|
580
759
|
@dispatch_decorator()
|
581
760
|
def list_feature_views(
|
582
761
|
self,
|
762
|
+
*,
|
583
763
|
entity_name: Optional[str] = None,
|
584
764
|
feature_view_name: Optional[str] = None,
|
585
765
|
) -> DataFrame:
|
@@ -594,6 +774,24 @@ class FeatureStore:
|
|
594
774
|
|
595
775
|
Returns:
|
596
776
|
FeatureViews information as a Snowpark DataFrame.
|
777
|
+
|
778
|
+
Example::
|
779
|
+
|
780
|
+
>>> fs = FeatureStore(...)
|
781
|
+
>>> draft_fv = FeatureView(
|
782
|
+
... name='foo',
|
783
|
+
... entities=[e1, e2],
|
784
|
+
... feature_df=session.sql('...'),
|
785
|
+
... desc='this is description',
|
786
|
+
... )
|
787
|
+
>>> fs.register_feature_view(feature_view=draft_fv, version='v1')
|
788
|
+
>>> fs.list_feature_views().select("name", "version", "desc").show()
|
789
|
+
--------------------------------------------
|
790
|
+
|"NAME" |"VERSION" |"DESC" |
|
791
|
+
--------------------------------------------
|
792
|
+
|FOO |v1 |this is description |
|
793
|
+
--------------------------------------------
|
794
|
+
|
597
795
|
"""
|
598
796
|
if feature_view_name is not None:
|
599
797
|
feature_view_name = SqlIdentifier(feature_view_name)
|
@@ -622,6 +820,28 @@ class FeatureStore:
|
|
622
820
|
Raises:
|
623
821
|
SnowflakeMLException: [ValueError] FeatureView with name and version is not found,
|
624
822
|
or incurred exception when reconstructing the FeatureView object.
|
823
|
+
|
824
|
+
Example::
|
825
|
+
|
826
|
+
>>> fs = FeatureStore(...)
|
827
|
+
>>> # draft_fv is a local object that hasn't materiaized to Snowflake backend yet.
|
828
|
+
>>> draft_fv = FeatureView(
|
829
|
+
... name='foo',
|
830
|
+
... entities=[e1],
|
831
|
+
... feature_df=session.sql('...'),
|
832
|
+
... desc='this is description',
|
833
|
+
... )
|
834
|
+
>>> fs.register_feature_view(feature_view=draft_fv, version='v1')
|
835
|
+
<BLANKLINE>
|
836
|
+
>>> # fv is a local object that maps to a Snowflake backend object.
|
837
|
+
>>> fv = fs.get_feature_view('foo', 'v1')
|
838
|
+
>>> print(f"name: {fv.name}")
|
839
|
+
>>> print(f"version:{fv.version}")
|
840
|
+
>>> print(f"desc:{fv.desc}")
|
841
|
+
name: FOO
|
842
|
+
version:v1
|
843
|
+
desc:this is description
|
844
|
+
|
625
845
|
"""
|
626
846
|
name = SqlIdentifier(name)
|
627
847
|
version = FeatureViewVersion(version)
|
@@ -634,27 +854,53 @@ class FeatureStore:
|
|
634
854
|
original_exception=ValueError(f"Failed to find FeatureView {name}/{version}: {results}"),
|
635
855
|
)
|
636
856
|
|
637
|
-
return self._compose_feature_view(
|
857
|
+
return self._compose_feature_view(
|
858
|
+
results[0][0], results[0][1], self.list_entities().collect(statement_params=self._telemetry_stmp)
|
859
|
+
)
|
638
860
|
|
639
|
-
@
|
861
|
+
@overload
|
640
862
|
def refresh_feature_view(self, feature_view: FeatureView) -> None:
|
863
|
+
...
|
864
|
+
|
865
|
+
@overload
|
866
|
+
def refresh_feature_view(self, feature_view: str, version: str) -> None:
|
867
|
+
...
|
868
|
+
|
869
|
+
@dispatch_decorator() # type: ignore[misc]
|
870
|
+
def refresh_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> None:
|
641
871
|
"""Manually refresh a feature view.
|
642
872
|
|
643
873
|
Args:
|
644
|
-
feature_view: A registered feature view.
|
874
|
+
feature_view: A registered feature view object, or the name of feature view.
|
875
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
645
876
|
|
646
877
|
Example::
|
647
878
|
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
879
|
+
>>> fs = FeatureStore(...)
|
880
|
+
>>> fv = fs.get_feature_view(name='MY_FV', version='v1')
|
881
|
+
<BLANKLINE>
|
882
|
+
>>> # refresh with name and version
|
883
|
+
>>> fs.refresh_feature_view('MY_FV', 'v1')
|
884
|
+
>>> fs.get_refresh_history('MY_FV', 'v1').show()
|
885
|
+
-----------------------------------------------------------------------------------------------------
|
886
|
+
|"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
|
887
|
+
-----------------------------------------------------------------------------------------------------
|
888
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
|
889
|
+
-----------------------------------------------------------------------------------------------------
|
890
|
+
<BLANKLINE>
|
891
|
+
>>> # refresh with feature view object
|
892
|
+
>>> fs.refresh_feature_view(fv)
|
893
|
+
>>> fs.get_refresh_history(fv).show()
|
894
|
+
-----------------------------------------------------------------------------------------------------
|
895
|
+
|"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
|
896
|
+
-----------------------------------------------------------------------------------------------------
|
897
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:54:06.680000 |2024-07-10 14:54:07.226000 |INCREMENTAL |
|
898
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
|
899
|
+
-----------------------------------------------------------------------------------------------------
|
900
|
+
|
657
901
|
"""
|
902
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
903
|
+
|
658
904
|
if feature_view.status == FeatureViewStatus.STATIC:
|
659
905
|
warnings.warn(
|
660
906
|
"Static feature view can't be refreshed. You must set refresh_freq when register_feature_view().",
|
@@ -664,11 +910,24 @@ class FeatureStore:
|
|
664
910
|
return
|
665
911
|
self._update_feature_view_status(feature_view, "REFRESH")
|
666
912
|
|
667
|
-
|
913
|
+
@overload
|
914
|
+
def get_refresh_history(
|
915
|
+
self, feature_view: FeatureView, version: Optional[str] = None, *, verbose: bool = False
|
916
|
+
) -> DataFrame:
|
917
|
+
...
|
918
|
+
|
919
|
+
@overload
|
920
|
+
def get_refresh_history(self, feature_view: str, version: str, *, verbose: bool = False) -> DataFrame:
|
921
|
+
...
|
922
|
+
|
923
|
+
def get_refresh_history(
|
924
|
+
self, feature_view: Union[FeatureView, str], version: Optional[str] = None, *, verbose: bool = False
|
925
|
+
) -> DataFrame:
|
668
926
|
"""Get refresh hisotry statistics about a feature view.
|
669
927
|
|
670
928
|
Args:
|
671
|
-
feature_view: A registered feature view.
|
929
|
+
feature_view: A registered feature view object, or the name of feature view.
|
930
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
672
931
|
verbose: Return more detailed history when set true.
|
673
932
|
|
674
933
|
Returns:
|
@@ -676,16 +935,30 @@ class FeatureStore:
|
|
676
935
|
|
677
936
|
Example::
|
678
937
|
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
938
|
+
>>> fs = FeatureStore(...)
|
939
|
+
>>> fv = fs.get_feature_view(name='MY_FV', version='v1')
|
940
|
+
>>> # refresh with name and version
|
941
|
+
>>> fs.refresh_feature_view('MY_FV', 'v1')
|
942
|
+
>>> fs.get_refresh_history('MY_FV', 'v1').show()
|
943
|
+
-----------------------------------------------------------------------------------------------------
|
944
|
+
|"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
|
945
|
+
-----------------------------------------------------------------------------------------------------
|
946
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
|
947
|
+
-----------------------------------------------------------------------------------------------------
|
948
|
+
<BLANKLINE>
|
949
|
+
>>> # refresh with feature view object
|
950
|
+
>>> fs.refresh_feature_view(fv)
|
951
|
+
>>> fs.get_refresh_history(fv).show()
|
952
|
+
-----------------------------------------------------------------------------------------------------
|
953
|
+
|"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
|
954
|
+
-----------------------------------------------------------------------------------------------------
|
955
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:54:06.680000 |2024-07-10 14:54:07.226000 |INCREMENTAL |
|
956
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-10 14:53:58.504000 |2024-07-10 14:53:59.088000 |INCREMENTAL |
|
957
|
+
-----------------------------------------------------------------------------------------------------
|
958
|
+
|
688
959
|
"""
|
960
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
961
|
+
|
689
962
|
if feature_view.status == FeatureViewStatus.STATIC:
|
690
963
|
warnings.warn(
|
691
964
|
"Static feature view never refreshes.",
|
@@ -719,43 +992,151 @@ class FeatureStore:
|
|
719
992
|
"""
|
720
993
|
)
|
721
994
|
|
722
|
-
@
|
995
|
+
@overload
|
723
996
|
def resume_feature_view(self, feature_view: FeatureView) -> FeatureView:
|
997
|
+
...
|
998
|
+
|
999
|
+
@overload
|
1000
|
+
def resume_feature_view(self, feature_view: str, version: str) -> FeatureView:
|
1001
|
+
...
|
1002
|
+
|
1003
|
+
@dispatch_decorator() # type: ignore[misc]
|
1004
|
+
def resume_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> FeatureView:
|
724
1005
|
"""
|
725
1006
|
Resume a previously suspended FeatureView.
|
726
1007
|
|
727
1008
|
Args:
|
728
|
-
feature_view: FeatureView to resume.
|
1009
|
+
feature_view: FeatureView object or name to resume.
|
1010
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
729
1011
|
|
730
1012
|
Returns:
|
731
1013
|
A new feature view with updated status.
|
1014
|
+
|
1015
|
+
Example::
|
1016
|
+
|
1017
|
+
>>> fs = FeatureStore(...)
|
1018
|
+
>>> # you must already have feature views registered
|
1019
|
+
>>> fv = fs.get_feature_view(name='MY_FV', version='v1')
|
1020
|
+
>>> fs.suspend_feature_view('MY_FV', 'v1')
|
1021
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
1022
|
+
-------------------------------------------
|
1023
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
1024
|
+
-------------------------------------------
|
1025
|
+
|MY_FV |v1 |SUSPENDED |
|
1026
|
+
-------------------------------------------
|
1027
|
+
<BLANKLINE>
|
1028
|
+
>>> fs.resume_feature_view('MY_FV', 'v1')
|
1029
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
1030
|
+
-------------------------------------------
|
1031
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
1032
|
+
-------------------------------------------
|
1033
|
+
|MY_FV |v1 |ACTIVE |
|
1034
|
+
-------------------------------------------
|
1035
|
+
|
732
1036
|
"""
|
1037
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
733
1038
|
return self._update_feature_view_status(feature_view, "RESUME")
|
734
1039
|
|
735
|
-
@
|
1040
|
+
@overload
|
736
1041
|
def suspend_feature_view(self, feature_view: FeatureView) -> FeatureView:
|
1042
|
+
...
|
1043
|
+
|
1044
|
+
@overload
|
1045
|
+
def suspend_feature_view(self, feature_view: str, version: str) -> FeatureView:
|
1046
|
+
...
|
1047
|
+
|
1048
|
+
@dispatch_decorator() # type: ignore[misc]
|
1049
|
+
def suspend_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> FeatureView:
|
737
1050
|
"""
|
738
1051
|
Suspend an active FeatureView.
|
739
1052
|
|
740
1053
|
Args:
|
741
|
-
feature_view: FeatureView to suspend.
|
1054
|
+
feature_view: FeatureView object or name to suspend.
|
1055
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
742
1056
|
|
743
1057
|
Returns:
|
744
1058
|
A new feature view with updated status.
|
1059
|
+
|
1060
|
+
Example::
|
1061
|
+
|
1062
|
+
>>> fs = FeatureStore(...)
|
1063
|
+
>>> # assume you already have feature views registered
|
1064
|
+
>>> fv = fs.get_feature_view(name='MY_FV', version='v1')
|
1065
|
+
>>> fs.suspend_feature_view('MY_FV', 'v1')
|
1066
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
1067
|
+
-------------------------------------------
|
1068
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
1069
|
+
-------------------------------------------
|
1070
|
+
|MY_FV |v1 |SUSPENDED |
|
1071
|
+
-------------------------------------------
|
1072
|
+
<BLANKLINE>
|
1073
|
+
>>> fs.resume_feature_view('MY_FV', 'v1')
|
1074
|
+
>>> fs.list_feature_views().select("NAME", "VERSION", "SCHEDULING_STATE").show()
|
1075
|
+
-------------------------------------------
|
1076
|
+
|"NAME" |"VERSION" |"SCHEDULING_STATE" |
|
1077
|
+
-------------------------------------------
|
1078
|
+
|MY_FV |v1 |ACTIVE |
|
1079
|
+
-------------------------------------------
|
1080
|
+
|
745
1081
|
"""
|
1082
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
746
1083
|
return self._update_feature_view_status(feature_view, "SUSPEND")
|
747
1084
|
|
748
|
-
@
|
1085
|
+
@overload
|
749
1086
|
def delete_feature_view(self, feature_view: FeatureView) -> None:
|
1087
|
+
...
|
1088
|
+
|
1089
|
+
@overload
|
1090
|
+
def delete_feature_view(self, feature_view: str, version: str) -> None:
|
1091
|
+
...
|
1092
|
+
|
1093
|
+
@dispatch_decorator() # type: ignore[misc]
|
1094
|
+
def delete_feature_view(self, feature_view: Union[FeatureView, str], version: Optional[str] = None) -> None:
|
750
1095
|
"""
|
751
1096
|
Delete a FeatureView.
|
752
1097
|
|
753
1098
|
Args:
|
754
|
-
feature_view: FeatureView to delete.
|
1099
|
+
feature_view: FeatureView object or name to delete.
|
1100
|
+
version: Optional version of feature view. Must set when argument feature_view is a str.
|
755
1101
|
|
756
1102
|
Raises:
|
757
1103
|
SnowflakeMLException: [ValueError] FeatureView is not registered.
|
1104
|
+
|
1105
|
+
Example::
|
1106
|
+
|
1107
|
+
>>> fs = FeatureStore(...)
|
1108
|
+
>>> fv = FeatureView('FV0', ...)
|
1109
|
+
>>> fv1 = fs.register_feature_view(fv, 'FIRST')
|
1110
|
+
>>> fv2 = fs.register_feature_view(fv, 'SECOND')
|
1111
|
+
>>> fs.list_feature_views().select('NAME', 'VERSION').show()
|
1112
|
+
----------------------
|
1113
|
+
|"NAME" |"VERSION" |
|
1114
|
+
----------------------
|
1115
|
+
|FV0 |SECOND |
|
1116
|
+
|FV0 |FIRST |
|
1117
|
+
----------------------
|
1118
|
+
<BLANKLINE>
|
1119
|
+
>>> # delete with name and version
|
1120
|
+
>>> fs.delete_feature_view('FV0', 'FIRST')
|
1121
|
+
>>> fs.list_feature_views().select('NAME', 'VERSION').show()
|
1122
|
+
----------------------
|
1123
|
+
|"NAME" |"VERSION" |
|
1124
|
+
----------------------
|
1125
|
+
|FV0 |SECOND |
|
1126
|
+
----------------------
|
1127
|
+
<BLANKLINE>
|
1128
|
+
>>> # delete with feature view object
|
1129
|
+
>>> fs.delete_feature_view(fv2)
|
1130
|
+
>>> fs.list_feature_views().select('NAME', 'VERSION').show()
|
1131
|
+
----------------------
|
1132
|
+
|"NAME" |"VERSION" |
|
1133
|
+
----------------------
|
1134
|
+
| | |
|
1135
|
+
----------------------
|
1136
|
+
|
758
1137
|
"""
|
1138
|
+
feature_view = self._validate_feature_view_name_and_version_input(feature_view, version)
|
1139
|
+
|
759
1140
|
# TODO: we should leverage lineage graph to check downstream deps, and block the deletion
|
760
1141
|
# if there're other FVs depending on this
|
761
1142
|
if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
|
@@ -787,6 +1168,19 @@ class FeatureStore:
|
|
787
1168
|
|
788
1169
|
Returns:
|
789
1170
|
Snowpark DataFrame containing the results.
|
1171
|
+
|
1172
|
+
Example::
|
1173
|
+
|
1174
|
+
>>> fs = FeatureStore(...)
|
1175
|
+
>>> e_1 = Entity("my_entity", ['col_1'], desc='My first entity.')
|
1176
|
+
>>> fs.register_entity(e_1)
|
1177
|
+
>>> fs.list_entities().show()
|
1178
|
+
-----------------------------------------------------------
|
1179
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
1180
|
+
-----------------------------------------------------------
|
1181
|
+
|MY_ENTITY |["COL_1"] |My first entity. |REGTEST_RL |
|
1182
|
+
-----------------------------------------------------------
|
1183
|
+
|
790
1184
|
"""
|
791
1185
|
prefix_len = len(_ENTITY_TAG_PREFIX) + 1
|
792
1186
|
return cast(
|
@@ -816,10 +1210,27 @@ class FeatureStore:
|
|
816
1210
|
SnowflakeMLException: [ValueError] Entity is not found.
|
817
1211
|
SnowflakeMLException: [RuntimeError] Failed to retrieve tag reference information.
|
818
1212
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
1213
|
+
|
1214
|
+
Example::
|
1215
|
+
|
1216
|
+
>>> fs = FeatureStore(...)
|
1217
|
+
>>> # e_1 is a local object that hasn't registered to Snowflake backend yet.
|
1218
|
+
>>> e_1 = Entity("my_entity", ['col_1'], desc='My first entity.')
|
1219
|
+
>>> fs.register_entity(e_1)
|
1220
|
+
<BLANKLINE>
|
1221
|
+
>>> # e_2 is a local object that points a backend object in Snowflake.
|
1222
|
+
>>> e_2 = fs.get_entity("my_entity")
|
1223
|
+
>>> print(e_2)
|
1224
|
+
Entity(name=MY_ENTITY, join_keys=['COL_1'], owner=REGTEST_RL, desc=My first entity.)
|
1225
|
+
|
819
1226
|
"""
|
820
1227
|
name = SqlIdentifier(name)
|
821
1228
|
try:
|
822
|
-
result =
|
1229
|
+
result = (
|
1230
|
+
self.list_entities()
|
1231
|
+
.filter(F.col("NAME") == name.resolved())
|
1232
|
+
.collect(statement_params=self._telemetry_stmp)
|
1233
|
+
)
|
823
1234
|
except Exception as e:
|
824
1235
|
raise snowml_exceptions.SnowflakeMLException(
|
825
1236
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
@@ -846,12 +1257,33 @@ class FeatureStore:
|
|
846
1257
|
Delete a previously registered Entity.
|
847
1258
|
|
848
1259
|
Args:
|
849
|
-
name:
|
1260
|
+
name: Name of entity to be deleted.
|
850
1261
|
|
851
1262
|
Raises:
|
852
1263
|
SnowflakeMLException: [ValueError] Entity with given name not exists.
|
853
1264
|
SnowflakeMLException: [RuntimeError] Failed to alter schema or drop tag.
|
854
1265
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
1266
|
+
|
1267
|
+
Example::
|
1268
|
+
|
1269
|
+
>>> fs = FeatureStore(...)
|
1270
|
+
>>> e_1 = Entity("my_entity", ['col_1'], desc='My first entity.')
|
1271
|
+
>>> fs.register_entity(e_1)
|
1272
|
+
>>> fs.list_entities().show()
|
1273
|
+
-----------------------------------------------------------
|
1274
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
1275
|
+
-----------------------------------------------------------
|
1276
|
+
|MY_ENTITY |["COL_1"] |My first entity. |REGTEST_RL |
|
1277
|
+
-----------------------------------------------------------
|
1278
|
+
<BLANKLINE>
|
1279
|
+
>>> fs.delete_entity("my_entity")
|
1280
|
+
>>> fs.list_entities().show()
|
1281
|
+
-------------------------------------------
|
1282
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
1283
|
+
-------------------------------------------
|
1284
|
+
| | | | |
|
1285
|
+
-------------------------------------------
|
1286
|
+
|
855
1287
|
"""
|
856
1288
|
name = SqlIdentifier(name)
|
857
1289
|
|
@@ -885,6 +1317,7 @@ class FeatureStore:
|
|
885
1317
|
self,
|
886
1318
|
spine_df: DataFrame,
|
887
1319
|
features: Union[List[Union[FeatureView, FeatureViewSlice]], List[str]],
|
1320
|
+
*,
|
888
1321
|
spine_timestamp_col: Optional[str] = None,
|
889
1322
|
exclude_columns: Optional[List[str]] = None,
|
890
1323
|
include_feature_view_timestamp_col: bool = False,
|
@@ -907,6 +1340,23 @@ class FeatureStore:
|
|
907
1340
|
|
908
1341
|
Raises:
|
909
1342
|
ValueError: if features is empty.
|
1343
|
+
|
1344
|
+
Example::
|
1345
|
+
|
1346
|
+
>>> fs = FeatureStore(...)
|
1347
|
+
>>> # Assume you already have feature view registered.
|
1348
|
+
>>> fv = fs.get_feature_view('my_fv', 'v1')
|
1349
|
+
>>> # Spine dataframe has same join keys as the entity of fv.
|
1350
|
+
>>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
|
1351
|
+
>>> fs.retrieve_feature_values(spine_df, [fv]).show()
|
1352
|
+
--------------------
|
1353
|
+
|"END_STATION_ID" |
|
1354
|
+
--------------------
|
1355
|
+
|505 |
|
1356
|
+
|347 |
|
1357
|
+
|466 |
|
1358
|
+
--------------------
|
1359
|
+
|
910
1360
|
"""
|
911
1361
|
if spine_timestamp_col is not None:
|
912
1362
|
spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
|
@@ -914,7 +1364,7 @@ class FeatureStore:
|
|
914
1364
|
if len(features) == 0:
|
915
1365
|
raise ValueError("features cannot be empty")
|
916
1366
|
if isinstance(features[0], str):
|
917
|
-
features = self.
|
1367
|
+
features = self._load_serialized_feature_views(cast(List[str], features))
|
918
1368
|
|
919
1369
|
df, _ = self._join_features(
|
920
1370
|
spine_df,
|
@@ -933,6 +1383,7 @@ class FeatureStore:
|
|
933
1383
|
self,
|
934
1384
|
spine_df: DataFrame,
|
935
1385
|
features: List[Union[FeatureView, FeatureViewSlice]],
|
1386
|
+
*,
|
936
1387
|
save_as: Optional[str] = None,
|
937
1388
|
spine_timestamp_col: Optional[str] = None,
|
938
1389
|
spine_label_cols: Optional[List[str]] = None,
|
@@ -966,8 +1417,10 @@ class FeatureStore:
|
|
966
1417
|
Example::
|
967
1418
|
|
968
1419
|
>>> fs = FeatureStore(session, ...)
|
1420
|
+
>>> # Assume you already have feature view registered.
|
969
1421
|
>>> fv = fs.get_feature_view("MY_FV", "1")
|
970
|
-
>>>
|
1422
|
+
>>> # Spine dataframe has same join keys as the entity of fv.
|
1423
|
+
>>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
|
971
1424
|
>>> training_set = fs.generate_training_set(
|
972
1425
|
... spine_df,
|
973
1426
|
... [fv],
|
@@ -975,6 +1428,7 @@ class FeatureStore:
|
|
975
1428
|
... )
|
976
1429
|
>>> print(type(training_set))
|
977
1430
|
<class 'snowflake.snowpark.table.Table'>
|
1431
|
+
<BLANKLINE>
|
978
1432
|
>>> print(training_set.queries)
|
979
1433
|
{'queries': ['SELECT * FROM (my_training_set)'], 'post_actions': []}
|
980
1434
|
|
@@ -994,8 +1448,19 @@ class FeatureStore:
|
|
994
1448
|
if save_as is not None:
|
995
1449
|
try:
|
996
1450
|
save_as = self._get_fully_qualified_name(save_as)
|
997
|
-
result_df.write.mode("errorifexists").save_as_table(save_as)
|
1451
|
+
result_df.write.mode("errorifexists").save_as_table(save_as, statement_params=self._telemetry_stmp)
|
1452
|
+
|
1453
|
+
# Add tag
|
1454
|
+
task_obj_info = _FeatureStoreObjInfo(_FeatureStoreObjTypes.TRAINING_DATA, snowml_version.VERSION)
|
1455
|
+
self._session.sql(
|
1456
|
+
f"""
|
1457
|
+
ALTER TABLE {save_as}
|
1458
|
+
SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}='{task_obj_info.to_json()}'
|
1459
|
+
"""
|
1460
|
+
).collect(statement_params=self._telemetry_stmp)
|
1461
|
+
|
998
1462
|
return self._session.table(save_as)
|
1463
|
+
|
999
1464
|
except SnowparkSQLException as e:
|
1000
1465
|
if e.sql_error_code == sql_error_codes.OBJECT_ALREADY_EXISTS:
|
1001
1466
|
raise snowml_exceptions.SnowflakeMLException(
|
@@ -1014,6 +1479,7 @@ class FeatureStore:
|
|
1014
1479
|
name: str,
|
1015
1480
|
spine_df: DataFrame,
|
1016
1481
|
features: List[Union[FeatureView, FeatureViewSlice]],
|
1482
|
+
*,
|
1017
1483
|
version: Optional[str] = None,
|
1018
1484
|
spine_timestamp_col: Optional[str] = None,
|
1019
1485
|
spine_label_cols: Optional[List[str]] = None,
|
@@ -1030,6 +1496,7 @@ class FeatureStore:
|
|
1030
1496
|
name: str,
|
1031
1497
|
spine_df: DataFrame,
|
1032
1498
|
features: List[Union[FeatureView, FeatureViewSlice]],
|
1499
|
+
*,
|
1033
1500
|
output_type: Literal["table"],
|
1034
1501
|
version: Optional[str] = None,
|
1035
1502
|
spine_timestamp_col: Optional[str] = None,
|
@@ -1046,6 +1513,7 @@ class FeatureStore:
|
|
1046
1513
|
name: str,
|
1047
1514
|
spine_df: DataFrame,
|
1048
1515
|
features: List[Union[FeatureView, FeatureViewSlice]],
|
1516
|
+
*,
|
1049
1517
|
version: Optional[str] = None,
|
1050
1518
|
spine_timestamp_col: Optional[str] = None,
|
1051
1519
|
spine_label_cols: Optional[List[str]] = None,
|
@@ -1082,6 +1550,33 @@ class FeatureStore:
|
|
1082
1550
|
SnowflakeMLException: [ValueError] Invalid output_type specified.
|
1083
1551
|
SnowflakeMLException: [RuntimeError] Dataset name/version already exists.
|
1084
1552
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
1553
|
+
|
1554
|
+
Example::
|
1555
|
+
|
1556
|
+
>>> fs = FeatureStore(session, ...)
|
1557
|
+
>>> # Assume you already have feature view registered.
|
1558
|
+
>>> fv = fs.get_feature_view("MY_FV", "1")
|
1559
|
+
>>> # Spine dataframe has same join keys as the entity of fv.
|
1560
|
+
>>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
|
1561
|
+
>>> my_dataset = fs.generate_dataset(
|
1562
|
+
... "my_dataset"
|
1563
|
+
... spine_df,
|
1564
|
+
... [fv],
|
1565
|
+
... )
|
1566
|
+
>>> # Current timestamp will be used as default version name.
|
1567
|
+
>>> # You can explicitly overwrite by setting a version.
|
1568
|
+
>>> my_dataset.list_versions()
|
1569
|
+
['2024_07_12_11_26_22']
|
1570
|
+
<BLANKLINE>
|
1571
|
+
>>> my_dataset.read.to_snowpark_dataframe().show(n=3)
|
1572
|
+
-------------------------------------------------------
|
1573
|
+
|"QUALITY" |"FIXED_ACIDITY" |"VOLATILE_ACIDITY" |
|
1574
|
+
-------------------------------------------------------
|
1575
|
+
|3 |11.600000381469727 |0.5799999833106995 |
|
1576
|
+
|3 |8.300000190734863 |1.0199999809265137 |
|
1577
|
+
|3 |7.400000095367432 |1.184999942779541 |
|
1578
|
+
-------------------------------------------------------
|
1579
|
+
|
1085
1580
|
"""
|
1086
1581
|
if output_type not in {"table", "dataset"}:
|
1087
1582
|
raise snowml_exceptions.SnowflakeMLException(
|
@@ -1095,7 +1590,7 @@ class FeatureStore:
|
|
1095
1590
|
|
1096
1591
|
fs_meta = FeatureStoreMetadata(
|
1097
1592
|
spine_query=spine_df.queries["queries"][-1],
|
1098
|
-
|
1593
|
+
compact_feature_views=[fv._get_compact_repr().to_json() for fv in features],
|
1099
1594
|
spine_timestamp_col=spine_timestamp_col,
|
1100
1595
|
)
|
1101
1596
|
|
@@ -1130,6 +1625,7 @@ class FeatureStore:
|
|
1130
1625
|
" to generate the data as a Snowflake Table."
|
1131
1626
|
),
|
1132
1627
|
)
|
1628
|
+
# TODO: Add feature store tag once Dataset (version) supports tags
|
1133
1629
|
ds: dataset.Dataset = dataset.create_from_dataframe(
|
1134
1630
|
self._session,
|
1135
1631
|
name,
|
@@ -1166,17 +1662,50 @@ class FeatureStore:
|
|
1166
1662
|
|
1167
1663
|
Raises:
|
1168
1664
|
ValueError: if dataset object is not generated from feature store.
|
1665
|
+
|
1666
|
+
Example::
|
1667
|
+
|
1668
|
+
>>> fs = FeatureStore(session, ...)
|
1669
|
+
>>> # Assume you already have feature view registered.
|
1670
|
+
>>> fv = fs.get_feature_view("MY_FV", "1.0")
|
1671
|
+
>>> # Spine dataframe has same join keys as the entity of fv.
|
1672
|
+
>>> spine_df = session.create_dataframe(["1", "2"], schema=["id"])
|
1673
|
+
>>> my_dataset = fs.generate_dataset(
|
1674
|
+
... "my_dataset"
|
1675
|
+
... spine_df,
|
1676
|
+
... [fv],
|
1677
|
+
... )
|
1678
|
+
>>> fvs = fs.load_feature_views_from_dataset(my_dataset)
|
1679
|
+
>>> print(len(fvs))
|
1680
|
+
1
|
1681
|
+
<BLANKLINE>
|
1682
|
+
>>> print(type(fvs[0]))
|
1683
|
+
<class 'snowflake.ml.feature_store.feature_view.FeatureView'>
|
1684
|
+
<BLANKLINE>
|
1685
|
+
>>> print(fvs[0].name)
|
1686
|
+
MY_FV
|
1687
|
+
<BLANKLINE>
|
1688
|
+
>>> print(fvs[0].version)
|
1689
|
+
1.0
|
1690
|
+
|
1169
1691
|
"""
|
1170
1692
|
assert ds.selected_version is not None
|
1171
1693
|
source_meta = ds.selected_version._get_metadata()
|
1172
1694
|
if (
|
1173
1695
|
source_meta is None
|
1174
1696
|
or not isinstance(source_meta.properties, FeatureStoreMetadata)
|
1175
|
-
or
|
1697
|
+
or (
|
1698
|
+
source_meta.properties.serialized_feature_views is None
|
1699
|
+
and source_meta.properties.compact_feature_views is None
|
1700
|
+
)
|
1176
1701
|
):
|
1177
1702
|
raise ValueError(f"Dataset {ds} does not contain valid feature view information.")
|
1178
1703
|
|
1179
|
-
|
1704
|
+
properties = source_meta.properties
|
1705
|
+
if properties.serialized_feature_views:
|
1706
|
+
return self._load_serialized_feature_views(properties.serialized_feature_views)
|
1707
|
+
else:
|
1708
|
+
return self._load_compact_feature_views(properties.compact_feature_views) # type: ignore[arg-type]
|
1180
1709
|
|
1181
1710
|
@dispatch_decorator()
|
1182
1711
|
def _clear(self, dryrun: bool = True) -> None:
|
@@ -1197,17 +1726,17 @@ class FeatureStore:
|
|
1197
1726
|
|
1198
1727
|
all_fvs_df = self.list_feature_views()
|
1199
1728
|
all_entities_df = self.list_entities()
|
1200
|
-
all_fvs_rows = all_fvs_df.collect()
|
1201
|
-
all_entities_rows = all_entities_df.collect()
|
1729
|
+
all_fvs_rows = all_fvs_df.collect(statement_params=self._telemetry_stmp)
|
1730
|
+
all_entities_rows = all_entities_df.collect(statement_params=self._telemetry_stmp)
|
1202
1731
|
|
1203
1732
|
if dryrun:
|
1204
1733
|
logger.info(
|
1205
1734
|
"Following feature views and entities will be deleted."
|
1206
|
-
+ " Set 'dryrun=False' to perform the actual deletion."
|
1735
|
+
+ " Set 'dryrun=False' to perform the actual deletion.",
|
1207
1736
|
)
|
1208
1737
|
logger.info(f"Total {len(all_fvs_rows)} Feature views to be deleted:")
|
1209
1738
|
all_fvs_df.show(n=len(all_fvs_rows))
|
1210
|
-
logger.info(f"\nTotal {len(all_entities_rows)}
|
1739
|
+
logger.info(f"\nTotal {len(all_entities_rows)} Entities to be deleted:")
|
1211
1740
|
all_entities_df.show(n=len(all_entities_rows))
|
1212
1741
|
return
|
1213
1742
|
|
@@ -1265,6 +1794,7 @@ class FeatureStore:
|
|
1265
1794
|
{tagging_clause}
|
1266
1795
|
)
|
1267
1796
|
WAREHOUSE = {warehouse}
|
1797
|
+
REFRESH_MODE = {feature_view.refresh_mode}
|
1268
1798
|
AS {feature_view.query}
|
1269
1799
|
"""
|
1270
1800
|
self._session.sql(query).collect(block=block, statement_params=self._telemetry_stmp)
|
@@ -1482,7 +2012,7 @@ class FeatureStore:
|
|
1482
2012
|
MATCH_CONDITION ( spine.ts >= feature.ts )
|
1483
2013
|
ON spine.id = feature.id;
|
1484
2014
|
"""
|
1485
|
-
).collect()
|
2015
|
+
).collect(statement_params=self._telemetry_stmp)
|
1486
2016
|
except SnowparkSQLException:
|
1487
2017
|
return False
|
1488
2018
|
return result is not None and len(result) == 1
|
@@ -1686,6 +2216,7 @@ class FeatureStore:
|
|
1686
2216
|
values.append(row["target_lag"] if "target_lag" in row else None)
|
1687
2217
|
values.append(row["refresh_mode"] if "refresh_mode" in row else None)
|
1688
2218
|
values.append(row["scheduling_state"] if "scheduling_state" in row else None)
|
2219
|
+
values.append(row["warehouse"] if "warehouse" in row else None)
|
1689
2220
|
output_values.append(values)
|
1690
2221
|
|
1691
2222
|
def _lookup_feature_view_metadata(self, row: Row, fv_name: str) -> Tuple[_FeatureViewMetadata, str]:
|
@@ -1862,11 +2393,11 @@ class FeatureStore:
|
|
1862
2393
|
result.append(row)
|
1863
2394
|
return result
|
1864
2395
|
|
1865
|
-
def
|
1866
|
-
self,
|
2396
|
+
def _load_serialized_feature_views(
|
2397
|
+
self, serialized_feature_views: List[str]
|
1867
2398
|
) -> List[Union[FeatureView, FeatureViewSlice]]:
|
1868
2399
|
results: List[Union[FeatureView, FeatureViewSlice]] = []
|
1869
|
-
for obj in
|
2400
|
+
for obj in serialized_feature_views:
|
1870
2401
|
try:
|
1871
2402
|
obj_type = json.loads(obj)[_FEATURE_OBJ_TYPE]
|
1872
2403
|
except Exception as e:
|
@@ -1880,6 +2411,14 @@ class FeatureStore:
|
|
1880
2411
|
raise ValueError(f"Unsupported feature object type: {obj_type}")
|
1881
2412
|
return results
|
1882
2413
|
|
2414
|
+
def _load_compact_feature_views(
|
2415
|
+
self, compact_feature_views: List[str]
|
2416
|
+
) -> List[Union[FeatureView, FeatureViewSlice]]:
|
2417
|
+
results: List[Union[FeatureView, FeatureViewSlice]] = []
|
2418
|
+
for obj in compact_feature_views:
|
2419
|
+
results.append(FeatureView._load_from_compact_repr(self._session, obj))
|
2420
|
+
return results
|
2421
|
+
|
1883
2422
|
def _exclude_columns(self, df: DataFrame, exclude_columns: List[str]) -> DataFrame:
|
1884
2423
|
exclude_columns = to_sql_identifiers(exclude_columns) # type: ignore[assignment]
|
1885
2424
|
df_cols = to_sql_identifiers(df.columns)
|
@@ -1895,12 +2434,12 @@ class FeatureStore:
|
|
1895
2434
|
|
1896
2435
|
def _is_dataset_enabled(self) -> bool:
|
1897
2436
|
try:
|
1898
|
-
self._session.sql(f"SHOW DATASETS IN SCHEMA {self._config.full_schema_path}").collect(
|
2437
|
+
self._session.sql(f"SHOW DATASETS IN SCHEMA {self._config.full_schema_path}").collect(
|
2438
|
+
statement_params=self._telemetry_stmp
|
2439
|
+
)
|
1899
2440
|
return True
|
1900
|
-
except SnowparkSQLException
|
1901
|
-
|
1902
|
-
return False
|
1903
|
-
raise
|
2441
|
+
except SnowparkSQLException:
|
2442
|
+
return False
|
1904
2443
|
|
1905
2444
|
def _check_feature_store_object_versions(self) -> None:
|
1906
2445
|
versions = self._collapse_object_versions()
|
@@ -1942,6 +2481,7 @@ class FeatureStore:
|
|
1942
2481
|
SnowflakeMLException: [RuntimeError] Failed to lookup tags.
|
1943
2482
|
|
1944
2483
|
Example::
|
2484
|
+
|
1945
2485
|
self._lookup_tags("TABLE", "MY_FV", [lambda d: d["tagName"] == "TARGET_TAG_NAME"])
|
1946
2486
|
|
1947
2487
|
"""
|
@@ -1979,6 +2519,7 @@ class FeatureStore:
|
|
1979
2519
|
SnowflakeMLException: [RuntimeError] Failed to lookup tagged objects.
|
1980
2520
|
|
1981
2521
|
Example::
|
2522
|
+
|
1982
2523
|
self._lookup_tagged_objects("TARGET_TAG_NAME", [lambda d: d["entityName"] == "MY_FV"])
|
1983
2524
|
|
1984
2525
|
"""
|
@@ -2024,3 +2565,23 @@ class FeatureStore:
|
|
2024
2565
|
),
|
2025
2566
|
)
|
2026
2567
|
return sorted_versions
|
2568
|
+
|
2569
|
+
def _validate_feature_view_name_and_version_input(
|
2570
|
+
self, feature_view: Union[FeatureView, str], version: Optional[str] = None
|
2571
|
+
) -> FeatureView:
|
2572
|
+
if isinstance(feature_view, str):
|
2573
|
+
if version is None:
|
2574
|
+
raise snowml_exceptions.SnowflakeMLException(
|
2575
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
2576
|
+
original_exception=ValueError("Version must be provided when argument feature_view is a str."),
|
2577
|
+
)
|
2578
|
+
feature_view = self.get_feature_view(feature_view, version)
|
2579
|
+
elif not isinstance(feature_view, FeatureView):
|
2580
|
+
raise snowml_exceptions.SnowflakeMLException(
|
2581
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
2582
|
+
original_exception=ValueError(
|
2583
|
+
"Invalid type of argument feature_view. It must be either str or FeatureView type."
|
2584
|
+
),
|
2585
|
+
)
|
2586
|
+
|
2587
|
+
return feature_view
|