snowflake-ml-python 1.5.2__py3-none-any.whl → 1.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +2 -1
- snowflake/cortex/_complete.py +240 -16
- snowflake/cortex/_extract_answer.py +0 -1
- snowflake/cortex/_sentiment.py +0 -1
- snowflake/cortex/_sse_client.py +81 -0
- snowflake/cortex/_summarize.py +0 -1
- snowflake/cortex/_translate.py +0 -1
- snowflake/cortex/_util.py +34 -10
- snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
- snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
- snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
- snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
- snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
- snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
- snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
- snowflake/ml/_internal/telemetry.py +26 -0
- snowflake/ml/_internal/utils/identifier.py +14 -0
- snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
- snowflake/ml/dataset/dataset.py +54 -32
- snowflake/ml/dataset/dataset_factory.py +3 -4
- snowflake/ml/feature_store/feature_store.py +440 -243
- snowflake/ml/feature_store/feature_view.py +61 -9
- snowflake/ml/fileset/embedded_stage_fs.py +25 -21
- snowflake/ml/fileset/fileset.py +2 -2
- snowflake/ml/fileset/snowfs.py +4 -15
- snowflake/ml/fileset/stage_fs.py +6 -8
- snowflake/ml/lineage/__init__.py +3 -0
- snowflake/ml/lineage/lineage_node.py +139 -0
- snowflake/ml/model/_client/model/model_impl.py +47 -14
- snowflake/ml/model/_client/model/model_version_impl.py +82 -2
- snowflake/ml/model/_client/ops/model_ops.py +77 -5
- snowflake/ml/model/_client/sql/model.py +1 -0
- snowflake/ml/model/_client/sql/model_version.py +47 -4
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +2 -3
- snowflake/ml/model/_model_composer/model_composer.py +7 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +7 -1
- snowflake/ml/model/_model_composer/model_method/function_generator.py +17 -1
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +79 -0
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +5 -3
- snowflake/ml/model/_model_composer/model_method/model_method.py +5 -5
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +1 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +2 -2
- snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
- snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
- snowflake/ml/model/_packager/model_handlers/xgboost.py +2 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_packager.py +9 -4
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_signatures/builtins_handler.py +2 -1
- snowflake/ml/model/_signatures/core.py +13 -1
- snowflake/ml/model/_signatures/pandas_handler.py +2 -0
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/custom_model.py +22 -2
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/model/type_hints.py +74 -4
- snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +158 -121
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +2 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +39 -18
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +88 -134
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +22 -17
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +5 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +5 -3
- snowflake/ml/modeling/cluster/birch.py +5 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +5 -3
- snowflake/ml/modeling/cluster/dbscan.py +5 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +5 -3
- snowflake/ml/modeling/cluster/k_means.py +5 -3
- snowflake/ml/modeling/cluster/mean_shift.py +5 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +5 -3
- snowflake/ml/modeling/cluster/optics.py +5 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +5 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +5 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +5 -3
- snowflake/ml/modeling/compose/column_transformer.py +5 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +1 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +5 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +5 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +5 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +5 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +5 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +5 -3
- snowflake/ml/modeling/covariance/oas.py +5 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +5 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +5 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +5 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +5 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +5 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +5 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +5 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +5 -3
- snowflake/ml/modeling/decomposition/pca.py +5 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +5 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +5 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +5 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +1 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +1 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +1 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +1 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +5 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +5 -3
- snowflake/ml/modeling/framework/base.py +3 -8
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +1 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +1 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +5 -3
- snowflake/ml/modeling/impute/knn_imputer.py +5 -3
- snowflake/ml/modeling/impute/missing_indicator.py +5 -3
- snowflake/ml/modeling/impute/simple_imputer.py +8 -4
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +5 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +5 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +5 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +5 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +5 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +1 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/lars.py +1 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +1 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/perceptron.py +1 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ridge.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +5 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +1 -1
- snowflake/ml/modeling/manifold/isomap.py +5 -3
- snowflake/ml/modeling/manifold/mds.py +5 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +5 -3
- snowflake/ml/modeling/manifold/tsne.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +3 -0
- snowflake/ml/modeling/metrics/regression.py +3 -0
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +5 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +5 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +1 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +5 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +5 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +1 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +5 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +5 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +1 -1
- snowflake/ml/modeling/pipeline/pipeline.py +6 -0
- snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
- snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +53 -11
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +44 -13
- snowflake/ml/modeling/preprocessing/polynomial_features.py +5 -3
- snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +1 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +1 -1
- snowflake/ml/modeling/svm/linear_svc.py +1 -1
- snowflake/ml/modeling/svm/linear_svr.py +1 -1
- snowflake/ml/modeling/svm/nu_svc.py +1 -1
- snowflake/ml/modeling/svm/nu_svr.py +1 -1
- snowflake/ml/modeling/svm/svc.py +1 -1
- snowflake/ml/modeling/svm/svr.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +1 -1
- snowflake/ml/registry/_manager/model_manager.py +16 -3
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/METADATA +51 -7
- snowflake_ml_python-1.5.4.dist-info/RECORD +389 -0
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/WHEEL +1 -1
- snowflake_ml_python-1.5.2.dist-info/RECORD +0 -384
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,7 @@ from snowflake.ml._internal.exceptions import (
|
|
33
33
|
dataset_errors,
|
34
34
|
error_codes,
|
35
35
|
exceptions as snowml_exceptions,
|
36
|
+
sql_error_codes,
|
36
37
|
)
|
37
38
|
from snowflake.ml._internal.utils import identifier
|
38
39
|
from snowflake.ml._internal.utils.sql_identifier import (
|
@@ -131,6 +132,9 @@ _LIST_FEATURE_VIEW_SCHEMA = StructType(
|
|
131
132
|
StructField("owner", StringType()),
|
132
133
|
StructField("desc", StringType()),
|
133
134
|
StructField("entities", ArrayType(StringType())),
|
135
|
+
StructField("refresh_freq", StringType()),
|
136
|
+
StructField("refresh_mode", StringType()),
|
137
|
+
StructField("scheduling_state", StringType()),
|
134
138
|
]
|
135
139
|
)
|
136
140
|
|
@@ -267,10 +271,7 @@ class FeatureStore:
|
|
267
271
|
raise snowml_exceptions.SnowflakeMLException(
|
268
272
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
269
273
|
original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
|
270
|
-
)
|
271
|
-
|
272
|
-
# TODO: remove this after tag_ref_internal rollout
|
273
|
-
self._use_optimized_tag_ref = self._tag_ref_internal_enabled()
|
274
|
+
) from e
|
274
275
|
self._check_feature_store_object_versions()
|
275
276
|
logger.info(f"Successfully connected to feature store: {self._config.full_schema_path}.")
|
276
277
|
|
@@ -469,7 +470,12 @@ class FeatureStore:
|
|
469
470
|
|
470
471
|
@dispatch_decorator()
|
471
472
|
def update_feature_view(
|
472
|
-
self,
|
473
|
+
self,
|
474
|
+
name: str,
|
475
|
+
version: str,
|
476
|
+
refresh_freq: Optional[str] = None,
|
477
|
+
warehouse: Optional[str] = None,
|
478
|
+
desc: Optional[str] = None,
|
473
479
|
) -> FeatureView:
|
474
480
|
"""Update a registered feature view.
|
475
481
|
Check feature_view.py for which fields are allowed to be updated after registration.
|
@@ -479,32 +485,67 @@ class FeatureStore:
|
|
479
485
|
version: version of the FeatureView to be updated.
|
480
486
|
refresh_freq: updated refresh frequency.
|
481
487
|
warehouse: updated warehouse.
|
488
|
+
desc: description of feature view.
|
482
489
|
|
483
490
|
Returns:
|
484
491
|
Updated FeatureView.
|
485
492
|
|
493
|
+
Example::
|
494
|
+
|
495
|
+
>>> fs = FeatureStore(
|
496
|
+
... ...,
|
497
|
+
... default_warehouse='ORIGINAL_WH',
|
498
|
+
... )
|
499
|
+
>>> fv = FeatureView(
|
500
|
+
... name='foo',
|
501
|
+
... entities=[e1, e2],
|
502
|
+
... feature_df=session.sql('...'),
|
503
|
+
... timestamp_col='timestamp',
|
504
|
+
... refresh_freq='1d',
|
505
|
+
... desc='this is old description'
|
506
|
+
... )
|
507
|
+
>>> fv = fs.register_feature_view(feature_view=fv, version='v1')
|
508
|
+
>>> # update_feature_view will apply new arguments to the registered feature view.
|
509
|
+
>>> new_fv = fs.update_feature_view(
|
510
|
+
... name='foo',
|
511
|
+
... version='v1',
|
512
|
+
... refresh_freq='2d',
|
513
|
+
... warehouse='MY_NEW_WH',
|
514
|
+
... desc='that is new descption',
|
515
|
+
... )
|
516
|
+
|
486
517
|
Raises:
|
487
518
|
SnowflakeMLException: [RuntimeError] If FeatureView is not managed and refresh_freq is defined.
|
488
519
|
SnowflakeMLException: [RuntimeError] Failed to update feature view.
|
489
520
|
"""
|
490
521
|
feature_view = self.get_feature_view(name=name, version=version)
|
491
|
-
if
|
492
|
-
full_name = f"{feature_view.name}/{feature_view.version}"
|
493
|
-
raise snowml_exceptions.SnowflakeMLException(
|
494
|
-
error_code=error_codes.INVALID_ARGUMENT,
|
495
|
-
original_exception=RuntimeError(f"Feature view {full_name} must be non-static so that can be updated."),
|
496
|
-
)
|
522
|
+
new_desc = desc if desc is not None else feature_view.desc
|
497
523
|
|
498
|
-
|
524
|
+
if feature_view.status == FeatureViewStatus.STATIC:
|
525
|
+
if refresh_freq is not None or warehouse is not None:
|
526
|
+
full_name = f"{feature_view.name}/{feature_view.version}"
|
527
|
+
raise snowml_exceptions.SnowflakeMLException(
|
528
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
529
|
+
original_exception=RuntimeError(
|
530
|
+
f"Static feature view '{full_name}' does not support refresh_freq and warehouse."
|
531
|
+
),
|
532
|
+
)
|
533
|
+
new_query = f"""
|
534
|
+
ALTER VIEW {feature_view.fully_qualified_name()} SET
|
535
|
+
COMMENT = '{new_desc}'
|
536
|
+
"""
|
537
|
+
else:
|
538
|
+
warehouse = SqlIdentifier(warehouse) if warehouse else feature_view.warehouse
|
539
|
+
# TODO(@wezhou): we need to properly handle cron expr
|
540
|
+
new_query = f"""
|
541
|
+
ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
|
542
|
+
TARGET_LAG = '{refresh_freq or feature_view.refresh_freq}'
|
543
|
+
WAREHOUSE = {warehouse}
|
544
|
+
COMMENT = '{new_desc}'
|
545
|
+
"""
|
499
546
|
|
500
|
-
# TODO(@wezhou): we need to properly handle cron expr
|
501
547
|
try:
|
502
|
-
self._session.sql(
|
503
|
-
f"""ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
|
504
|
-
TARGET_LAG = '{refresh_freq or feature_view.refresh_freq}'
|
505
|
-
WAREHOUSE = {warehouse}
|
506
|
-
"""
|
507
|
-
).collect(statement_params=self._telemetry_stmp)
|
548
|
+
self._session.sql(new_query).collect(statement_params=self._telemetry_stmp)
|
508
549
|
except Exception as e:
|
509
550
|
raise snowml_exceptions.SnowflakeMLException(
|
510
551
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
@@ -559,13 +600,10 @@ class FeatureStore:
|
|
559
600
|
|
560
601
|
if entity_name is not None:
|
561
602
|
entity_name = SqlIdentifier(entity_name)
|
562
|
-
|
563
|
-
return self._optimized_find_feature_views(entity_name, feature_view_name)
|
564
|
-
else:
|
565
|
-
return self._find_feature_views(entity_name, feature_view_name)
|
603
|
+
return self._optimized_find_feature_views(entity_name, feature_view_name)
|
566
604
|
else:
|
567
605
|
output_values: List[List[Any]] = []
|
568
|
-
for row in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
|
606
|
+
for row, _ in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
|
569
607
|
self._extract_feature_view_info(row, output_values)
|
570
608
|
return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
|
571
609
|
|
@@ -596,7 +634,90 @@ class FeatureStore:
|
|
596
634
|
original_exception=ValueError(f"Failed to find FeatureView {name}/{version}: {results}"),
|
597
635
|
)
|
598
636
|
|
599
|
-
return self._compose_feature_view(results[0], self.list_entities().collect())
|
637
|
+
return self._compose_feature_view(results[0][0], results[0][1], self.list_entities().collect())
|
638
|
+
|
639
|
+
@dispatch_decorator()
|
640
|
+
def refresh_feature_view(self, feature_view: FeatureView) -> None:
|
641
|
+
"""Manually refresh a feature view.
|
642
|
+
|
643
|
+
Args:
|
644
|
+
feature_view: A registered feature view.
|
645
|
+
|
646
|
+
Example::
|
647
|
+
|
648
|
+
>>> fs = FeatureStore(...)
|
649
|
+
>>> fv = fs.get_feature_view(name='MY_FV', version='v1')
|
650
|
+
>>> fs.refresh_feature_view(fv)
|
651
|
+
>>> fs.get_refresh_history(fv).show()
|
652
|
+
---------------------------------------------------------------------------------------------------------------
|
653
|
+
|"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
|
654
|
+
---------------------------------------------------------------------------------------------------------------
|
655
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-02 13:45:01.11300-07:00 |2024-07-02 13:45:01.82700-07:00 |INCREMENTAL |
|
656
|
+
---------------------------------------------------------------------------------------------------------------
|
657
|
+
"""
|
658
|
+
if feature_view.status == FeatureViewStatus.STATIC:
|
659
|
+
warnings.warn(
|
660
|
+
"Static feature view can't be refreshed. You must set refresh_freq when register_feature_view().",
|
661
|
+
stacklevel=2,
|
662
|
+
category=UserWarning,
|
663
|
+
)
|
664
|
+
return
|
665
|
+
self._update_feature_view_status(feature_view, "REFRESH")
|
666
|
+
|
667
|
+
def get_refresh_history(self, feature_view: FeatureView, verbose: bool = False) -> DataFrame:
|
668
|
+
"""Get refresh hisotry statistics about a feature view.
|
669
|
+
|
670
|
+
Args:
|
671
|
+
feature_view: A registered feature view.
|
672
|
+
verbose: Return more detailed history when set true.
|
673
|
+
|
674
|
+
Returns:
|
675
|
+
A dataframe contains the refresh history information.
|
676
|
+
|
677
|
+
Example::
|
678
|
+
|
679
|
+
>>> fs = FeatureStore(...)
|
680
|
+
>>> fv = fs.get_feature_view(name='MY_FV', version='v1')
|
681
|
+
>>> fs.refresh_feature_view(fv)
|
682
|
+
>>> fs.get_refresh_history(fv).show()
|
683
|
+
---------------------------------------------------------------------------------------------------------------
|
684
|
+
|"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
|
685
|
+
---------------------------------------------------------------------------------------------------------------
|
686
|
+
|MY_FV$v1 |SUCCEEDED |2024-07-02 13:45:01.11300-07:00 |2024-07-02 13:45:01.82700-07:00 |INCREMENTAL |
|
687
|
+
---------------------------------------------------------------------------------------------------------------
|
688
|
+
"""
|
689
|
+
if feature_view.status == FeatureViewStatus.STATIC:
|
690
|
+
warnings.warn(
|
691
|
+
"Static feature view never refreshes.",
|
692
|
+
stacklevel=2,
|
693
|
+
category=UserWarning,
|
694
|
+
)
|
695
|
+
return self._session.create_dataframe([Row()])
|
696
|
+
|
697
|
+
if feature_view.status == FeatureViewStatus.DRAFT:
|
698
|
+
warnings.warn(
|
699
|
+
"This feature view has not been registered thus has no refresh history.",
|
700
|
+
stacklevel=2,
|
701
|
+
category=UserWarning,
|
702
|
+
)
|
703
|
+
return self._session.create_dataframe([Row()])
|
704
|
+
|
705
|
+
fv_resolved_name = FeatureView._get_physical_name(
|
706
|
+
feature_view.name,
|
707
|
+
feature_view.version, # type: ignore[arg-type]
|
708
|
+
).resolved()
|
709
|
+
select_cols = "*" if verbose else "name, state, refresh_start_time, refresh_end_time, refresh_action"
|
710
|
+
return self._session.sql(
|
711
|
+
f"""
|
712
|
+
SELECT
|
713
|
+
{select_cols}
|
714
|
+
FROM TABLE (
|
715
|
+
{self._config.database}.INFORMATION_SCHEMA.DYNAMIC_TABLE_REFRESH_HISTORY ()
|
716
|
+
)
|
717
|
+
WHERE NAME = '{fv_resolved_name}'
|
718
|
+
AND SCHEMA_NAME = '{self._config.schema}'
|
719
|
+
"""
|
720
|
+
)
|
600
721
|
|
601
722
|
@dispatch_decorator()
|
602
723
|
def resume_feature_view(self, feature_view: FeatureView) -> FeatureView:
|
@@ -807,6 +928,86 @@ class FeatureStore:
|
|
807
928
|
|
808
929
|
return df
|
809
930
|
|
931
|
+
@dispatch_decorator()
|
932
|
+
def generate_training_set(
|
933
|
+
self,
|
934
|
+
spine_df: DataFrame,
|
935
|
+
features: List[Union[FeatureView, FeatureViewSlice]],
|
936
|
+
save_as: Optional[str] = None,
|
937
|
+
spine_timestamp_col: Optional[str] = None,
|
938
|
+
spine_label_cols: Optional[List[str]] = None,
|
939
|
+
exclude_columns: Optional[List[str]] = None,
|
940
|
+
include_feature_view_timestamp_col: bool = False,
|
941
|
+
) -> DataFrame:
|
942
|
+
"""
|
943
|
+
Generate a training set from the specified Spine DataFrame and Feature Views. Result is
|
944
|
+
materialized to a Snowflake Table if `save_as` is specified.
|
945
|
+
|
946
|
+
Args:
|
947
|
+
spine_df: Snowpark DataFrame to join features into.
|
948
|
+
features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
|
949
|
+
save_as: If specified, a new table containing the produced result will be created. Name can be a fully
|
950
|
+
qualified name or an unqualified name. If unqualified, defaults to the Feature Store database and schema
|
951
|
+
spine_timestamp_col: Name of timestamp column in spine_df that will be used to join
|
952
|
+
time-series features. If spine_timestamp_col is not none, the input features also must have
|
953
|
+
timestamp_col.
|
954
|
+
spine_label_cols: Name of column(s) in spine_df that contains labels.
|
955
|
+
exclude_columns: Name of column(s) to exclude from the resulting training set.
|
956
|
+
include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
|
957
|
+
(if feature view has timestamp column) if set true. Default to false.
|
958
|
+
|
959
|
+
Returns:
|
960
|
+
Returns a Snowpark DataFrame representing the training set.
|
961
|
+
|
962
|
+
Raises:
|
963
|
+
SnowflakeMLException: [RuntimeError] Materialized table name already exists
|
964
|
+
SnowflakeMLException: [RuntimeError] Failed to create materialized table.
|
965
|
+
|
966
|
+
Example::
|
967
|
+
|
968
|
+
>>> fs = FeatureStore(session, ...)
|
969
|
+
>>> fv = fs.get_feature_view("MY_FV", "1")
|
970
|
+
>>> spine_df = session.create_dataframe(["id_1", "id_2"], schema=["id"])
|
971
|
+
>>> training_set = fs.generate_training_set(
|
972
|
+
... spine_df,
|
973
|
+
... [fv],
|
974
|
+
... save_as="my_training_set",
|
975
|
+
... )
|
976
|
+
>>> print(type(training_set))
|
977
|
+
<class 'snowflake.snowpark.table.Table'>
|
978
|
+
>>> print(training_set.queries)
|
979
|
+
{'queries': ['SELECT * FROM (my_training_set)'], 'post_actions': []}
|
980
|
+
|
981
|
+
"""
|
982
|
+
if spine_timestamp_col is not None:
|
983
|
+
spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
|
984
|
+
if spine_label_cols is not None:
|
985
|
+
spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
|
986
|
+
|
987
|
+
result_df, join_keys = self._join_features(
|
988
|
+
spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
|
989
|
+
)
|
990
|
+
|
991
|
+
if exclude_columns is not None:
|
992
|
+
result_df = self._exclude_columns(result_df, exclude_columns)
|
993
|
+
|
994
|
+
if save_as is not None:
|
995
|
+
try:
|
996
|
+
save_as = self._get_fully_qualified_name(save_as)
|
997
|
+
result_df.write.mode("errorifexists").save_as_table(save_as)
|
998
|
+
return self._session.table(save_as)
|
999
|
+
except SnowparkSQLException as e:
|
1000
|
+
if e.sql_error_code == sql_error_codes.OBJECT_ALREADY_EXISTS:
|
1001
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1002
|
+
error_code=error_codes.OBJECT_ALREADY_EXISTS,
|
1003
|
+
original_exception=RuntimeError(str(e)),
|
1004
|
+
) from e
|
1005
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1006
|
+
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
1007
|
+
original_exception=RuntimeError(f"An error occurred during training set materialization: {e}."),
|
1008
|
+
) from e
|
1009
|
+
return result_df
|
1010
|
+
|
810
1011
|
@overload
|
811
1012
|
def generate_dataset(
|
812
1013
|
self,
|
@@ -859,7 +1060,7 @@ class FeatureStore:
|
|
859
1060
|
Args:
|
860
1061
|
name: The name of the Dataset to be generated. Datasets are uniquely identified within a schema
|
861
1062
|
by their name and version.
|
862
|
-
spine_df:
|
1063
|
+
spine_df: Snowpark DataFrame to join features into.
|
863
1064
|
features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
|
864
1065
|
version: The version of the Dataset to be generated. If none specified, the current timestamp
|
865
1066
|
will be used instead.
|
@@ -867,22 +1068,19 @@ class FeatureStore:
|
|
867
1068
|
time-series features. If spine_timestamp_col is not none, the input features also must have
|
868
1069
|
timestamp_col.
|
869
1070
|
spine_label_cols: Name of column(s) in spine_df that contains labels.
|
870
|
-
exclude_columns:
|
871
|
-
The underlying storage will still contain the columns.
|
1071
|
+
exclude_columns: Name of column(s) to exclude from the resulting training set.
|
872
1072
|
include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
|
873
1073
|
(if feature view has timestamp column) if set true. Default to false.
|
874
1074
|
desc: A description about this dataset.
|
875
|
-
output_type: The type of Snowflake storage to use for the generated training data.
|
1075
|
+
output_type: (Deprecated) The type of Snowflake storage to use for the generated training data.
|
876
1076
|
|
877
1077
|
Returns:
|
878
1078
|
If output_type is "dataset" (default), returns a Dataset object.
|
879
1079
|
If output_type is "table", returns a Snowpark DataFrame representing the table.
|
880
1080
|
|
881
1081
|
Raises:
|
882
|
-
SnowflakeMLException: [ValueError] Dataset name/version already exists
|
883
|
-
SnowflakeMLException: [ValueError] Snapshot creation failed.
|
884
1082
|
SnowflakeMLException: [ValueError] Invalid output_type specified.
|
885
|
-
SnowflakeMLException: [RuntimeError]
|
1083
|
+
SnowflakeMLException: [RuntimeError] Dataset name/version already exists.
|
886
1084
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
887
1085
|
"""
|
888
1086
|
if output_type not in {"table", "dataset"}:
|
@@ -890,61 +1088,59 @@ class FeatureStore:
|
|
890
1088
|
error_code=error_codes.INVALID_ARGUMENT,
|
891
1089
|
original_exception=ValueError(f"Invalid output_type: {output_type}."),
|
892
1090
|
)
|
893
|
-
if spine_timestamp_col is not None:
|
894
|
-
spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
|
895
|
-
if spine_label_cols is not None:
|
896
|
-
spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
|
897
|
-
|
898
|
-
result_df, join_keys = self._join_features(
|
899
|
-
spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
|
900
|
-
)
|
901
1091
|
|
902
1092
|
# Convert name to fully qualified name if not already fully qualified
|
903
|
-
|
904
|
-
name = "{}.{}.{}".format(
|
905
|
-
db_name or self._config.database,
|
906
|
-
schema_name or self._config.schema,
|
907
|
-
object_name,
|
908
|
-
)
|
1093
|
+
name = self._get_fully_qualified_name(name)
|
909
1094
|
version = version or datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
910
1095
|
|
911
|
-
if exclude_columns is not None:
|
912
|
-
result_df = self._exclude_columns(result_df, exclude_columns)
|
913
|
-
|
914
1096
|
fs_meta = FeatureStoreMetadata(
|
915
1097
|
spine_query=spine_df.queries["queries"][-1],
|
916
1098
|
serialized_feature_views=[fv.to_json() for fv in features],
|
917
1099
|
spine_timestamp_col=spine_timestamp_col,
|
918
1100
|
)
|
919
1101
|
|
1102
|
+
# Only set a save_as name if output_type is table
|
1103
|
+
table_name = f"{name}_{version}" if output_type == "table" else None
|
1104
|
+
result_df = self.generate_training_set(
|
1105
|
+
spine_df,
|
1106
|
+
features,
|
1107
|
+
spine_timestamp_col=spine_timestamp_col,
|
1108
|
+
spine_label_cols=spine_label_cols,
|
1109
|
+
exclude_columns=exclude_columns,
|
1110
|
+
include_feature_view_timestamp_col=include_feature_view_timestamp_col,
|
1111
|
+
save_as=table_name,
|
1112
|
+
)
|
1113
|
+
if output_type == "table":
|
1114
|
+
warnings.warn(
|
1115
|
+
"Generating a table from generate_dataset() is deprecated and will be removed in a future release,"
|
1116
|
+
" use generate_training_set() instead.",
|
1117
|
+
DeprecationWarning,
|
1118
|
+
stacklevel=2,
|
1119
|
+
)
|
1120
|
+
return result_df
|
1121
|
+
|
920
1122
|
try:
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
error_code=error_codes.SNOWML_CREATE_FAILED,
|
931
|
-
original_exception=RuntimeError(
|
932
|
-
"Dataset is not enabled in your account. Ask your account admin to set"
|
933
|
-
' FEATURE_DATASET=ENABLED or set output_type="table" to generate the data'
|
934
|
-
" as a Snowflake Table instead."
|
935
|
-
),
|
936
|
-
)
|
937
|
-
ds: dataset.Dataset = dataset.create_from_dataframe(
|
938
|
-
self._session,
|
939
|
-
name,
|
940
|
-
version,
|
941
|
-
input_dataframe=result_df,
|
942
|
-
exclude_cols=[spine_timestamp_col],
|
943
|
-
label_cols=spine_label_cols,
|
944
|
-
properties=fs_meta,
|
945
|
-
comment=desc,
|
1123
|
+
assert output_type == "dataset"
|
1124
|
+
if not self._is_dataset_enabled():
|
1125
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1126
|
+
error_code=error_codes.SNOWML_CREATE_FAILED,
|
1127
|
+
original_exception=RuntimeError(
|
1128
|
+
"Dataset is not enabled in your account. Ask your account admin to set"
|
1129
|
+
" FEATURE_DATASET=ENABLED or use generate_training_set() instead"
|
1130
|
+
" to generate the data as a Snowflake Table."
|
1131
|
+
),
|
946
1132
|
)
|
947
|
-
|
1133
|
+
ds: dataset.Dataset = dataset.create_from_dataframe(
|
1134
|
+
self._session,
|
1135
|
+
name,
|
1136
|
+
version,
|
1137
|
+
input_dataframe=result_df,
|
1138
|
+
exclude_cols=[spine_timestamp_col] if spine_timestamp_col is not None else [],
|
1139
|
+
label_cols=spine_label_cols,
|
1140
|
+
properties=fs_meta,
|
1141
|
+
comment=desc,
|
1142
|
+
)
|
1143
|
+
return ds
|
948
1144
|
|
949
1145
|
except dataset_errors.DatasetExistError as e:
|
950
1146
|
raise snowml_exceptions.SnowflakeMLException(
|
@@ -1391,20 +1587,36 @@ class FeatureStore:
|
|
1391
1587
|
return SqlIdentifier(identifier.concat_names([_ENTITY_TAG_PREFIX, raw_name]))
|
1392
1588
|
|
1393
1589
|
def _get_fully_qualified_name(self, name: Union[SqlIdentifier, str]) -> str:
|
1394
|
-
|
1590
|
+
# Do a quick check to see if we can skip regex operations
|
1591
|
+
if "." not in name:
|
1592
|
+
return f"{self._config.full_schema_path}.{name}"
|
1593
|
+
|
1594
|
+
db_name, schema_name, object_name, _ = identifier.parse_schema_level_object_identifier(name)
|
1595
|
+
return "{}.{}.{}".format(
|
1596
|
+
db_name or self._config.database,
|
1597
|
+
schema_name or self._config.schema,
|
1598
|
+
object_name,
|
1599
|
+
)
|
1395
1600
|
|
1396
1601
|
# TODO: SHOW DYNAMIC TABLES is very slow while other show objects are fast, investigate with DT in SNOW-902804.
|
1397
1602
|
def _get_fv_backend_representations(
|
1398
1603
|
self, object_name: Optional[SqlIdentifier], prefix_match: bool = False
|
1399
|
-
) -> List[Row]:
|
1400
|
-
dynamic_table_results =
|
1401
|
-
|
1604
|
+
) -> List[Tuple[Row, _FeatureStoreObjTypes]]:
|
1605
|
+
dynamic_table_results = [
|
1606
|
+
(d, _FeatureStoreObjTypes.MANAGED_FEATURE_VIEW)
|
1607
|
+
for d in self._find_object("DYNAMIC TABLES", object_name, prefix_match)
|
1608
|
+
]
|
1609
|
+
view_results = [
|
1610
|
+
(d, _FeatureStoreObjTypes.EXTERNAL_FEATURE_VIEW)
|
1611
|
+
for d in self._find_object("VIEWS", object_name, prefix_match)
|
1612
|
+
]
|
1402
1613
|
return dynamic_table_results + view_results
|
1403
1614
|
|
1404
1615
|
def _update_feature_view_status(self, feature_view: FeatureView, operation: str) -> FeatureView:
|
1405
1616
|
assert operation in [
|
1406
1617
|
"RESUME",
|
1407
1618
|
"SUSPEND",
|
1619
|
+
"REFRESH",
|
1408
1620
|
], f"Operation: {operation} not supported"
|
1409
1621
|
if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
|
1410
1622
|
raise snowml_exceptions.SnowflakeMLException(
|
@@ -1417,9 +1629,10 @@ class FeatureStore:
|
|
1417
1629
|
self._session.sql(f"ALTER DYNAMIC TABLE {fully_qualified_name} {operation}").collect(
|
1418
1630
|
statement_params=self._telemetry_stmp
|
1419
1631
|
)
|
1420
|
-
|
1421
|
-
|
1422
|
-
|
1632
|
+
if operation != "REFRESH":
|
1633
|
+
self._session.sql(f"ALTER TASK IF EXISTS {fully_qualified_name} {operation}").collect(
|
1634
|
+
statement_params=self._telemetry_stmp
|
1635
|
+
)
|
1423
1636
|
except Exception as e:
|
1424
1637
|
raise snowml_exceptions.SnowflakeMLException(
|
1425
1638
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
@@ -1438,46 +1651,28 @@ class FeatureStore:
|
|
1438
1651
|
# TODO: this can be optimized further by directly getting all possible FVs and filter by tag
|
1439
1652
|
# it's easier to rewrite the code once we can remove the tag_reference path
|
1440
1653
|
all_fvs = self._get_fv_backend_representations(object_name=None)
|
1441
|
-
fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
|
1654
|
+
fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r, _ in all_fvs}
|
1442
1655
|
|
1443
1656
|
if len(fv_maps.keys()) == 0:
|
1444
1657
|
return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
|
1445
1658
|
|
1446
|
-
|
1447
|
-
|
1448
|
-
|
1449
|
-
|
1450
|
-
|
1451
|
-
|
1452
|
-
FROM TABLE(
|
1453
|
-
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1454
|
-
TAG_NAME => '{self._get_fully_qualified_name(self._get_entity_name(entity_name))}'
|
1455
|
-
)
|
1456
|
-
) {filter_clause}"""
|
1457
|
-
).collect(statement_params=self._telemetry_stmp)
|
1458
|
-
except Exception as e:
|
1459
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1460
|
-
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
1461
|
-
original_exception=RuntimeError(f"Failed to find feature views' by entity {entity_name}: {e}"),
|
1462
|
-
) from e
|
1659
|
+
filters = (
|
1660
|
+
[lambda d: d["entityName"].startswith(feature_view_name.resolved())] # type: ignore[union-attr]
|
1661
|
+
if feature_view_name
|
1662
|
+
else None
|
1663
|
+
)
|
1664
|
+
res = self._lookup_tagged_objects(self._get_entity_name(entity_name), filters)
|
1463
1665
|
|
1464
1666
|
output_values: List[List[Any]] = []
|
1465
1667
|
for r in res:
|
1466
|
-
row = fv_maps[SqlIdentifier(r["
|
1668
|
+
row = fv_maps[SqlIdentifier(r["entityName"], case_sensitive=True)]
|
1467
1669
|
self._extract_feature_view_info(row, output_values)
|
1468
1670
|
|
1469
1671
|
return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
|
1470
1672
|
|
1471
1673
|
def _extract_feature_view_info(self, row: Row, output_values: List[List[Any]]) -> None:
|
1472
1674
|
name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
|
1473
|
-
|
1474
|
-
if m is None:
|
1475
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1476
|
-
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
1477
|
-
original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
|
1478
|
-
)
|
1479
|
-
|
1480
|
-
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
1675
|
+
fv_metadata, _ = self._lookup_feature_view_metadata(row, FeatureView._get_physical_name(name, version))
|
1481
1676
|
|
1482
1677
|
values: List[Any] = []
|
1483
1678
|
values.append(name)
|
@@ -1488,63 +1683,41 @@ class FeatureStore:
|
|
1488
1683
|
values.append(row["owner"])
|
1489
1684
|
values.append(row["comment"])
|
1490
1685
|
values.append(fv_metadata.entities)
|
1686
|
+
values.append(row["target_lag"] if "target_lag" in row else None)
|
1687
|
+
values.append(row["refresh_mode"] if "refresh_mode" in row else None)
|
1688
|
+
values.append(row["scheduling_state"] if "scheduling_state" in row else None)
|
1491
1689
|
output_values.append(values)
|
1492
1690
|
|
1493
|
-
def
|
1494
|
-
if
|
1495
|
-
|
1496
|
-
|
1497
|
-
|
1498
|
-
|
1499
|
-
|
1500
|
-
|
1501
|
-
|
1502
|
-
|
1503
|
-
|
1504
|
-
|
1505
|
-
|
1506
|
-
|
1507
|
-
|
1508
|
-
|
1509
|
-
|
1510
|
-
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1514
|
-
|
1515
|
-
|
1516
|
-
)
|
1517
|
-
|
1518
|
-
|
1519
|
-
|
1520
|
-
|
1521
|
-
]
|
1522
|
-
|
1523
|
-
results = self._session.sql("\nUNION\n".join(queries)).collect(statement_params=self._telemetry_stmp)
|
1524
|
-
except Exception as e:
|
1525
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1526
|
-
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
1527
|
-
original_exception=RuntimeError(f"Failed to retrieve feature views' information: {e}"),
|
1528
|
-
) from e
|
1529
|
-
|
1530
|
-
output_values: List[List[Any]] = []
|
1531
|
-
for r in results:
|
1532
|
-
fv_metadata = _FeatureViewMetadata.from_json(r["TAG_VALUE"])
|
1533
|
-
for retrieved_entity in fv_metadata.entities:
|
1534
|
-
if entity_name == SqlIdentifier(retrieved_entity, case_sensitive=True):
|
1535
|
-
fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
|
1536
|
-
fv_name = SqlIdentifier(fv_name, case_sensitive=True)
|
1537
|
-
obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
|
1538
|
-
if feature_view_name is not None:
|
1539
|
-
if fv_name == feature_view_name:
|
1540
|
-
self._extract_feature_view_info(fv_maps[obj_name], output_values)
|
1541
|
-
else:
|
1542
|
-
continue
|
1543
|
-
else:
|
1544
|
-
self._extract_feature_view_info(fv_maps[obj_name], output_values)
|
1545
|
-
return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
|
1691
|
+
def _lookup_feature_view_metadata(self, row: Row, fv_name: str) -> Tuple[_FeatureViewMetadata, str]:
|
1692
|
+
if len(row["text"]) == 0:
|
1693
|
+
# NOTE: if this is a shared feature view, then text column will be empty due to privacy constraints.
|
1694
|
+
# So instead of looking at original query text, we will obtain metadata by querying the tag value.
|
1695
|
+
# For query body, we will just use a simple select instead of original DDL query since shared feature views
|
1696
|
+
# are read-only.
|
1697
|
+
try:
|
1698
|
+
res = self._lookup_tags(
|
1699
|
+
domain="table", obj_name=fv_name, filter_fns=[lambda d: d["tagName"] == _FEATURE_VIEW_METADATA_TAG]
|
1700
|
+
)
|
1701
|
+
fv_metadata = _FeatureViewMetadata.from_json(res[0]["tagValue"])
|
1702
|
+
query = f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}"
|
1703
|
+
return (fv_metadata, query)
|
1704
|
+
except Exception as e:
|
1705
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1706
|
+
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
1707
|
+
original_exception=RuntimeError(f"Failed to extract feature_view metadata for {fv_name}: {e}."),
|
1708
|
+
)
|
1709
|
+
else:
|
1710
|
+
m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
|
1711
|
+
if m is None:
|
1712
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1713
|
+
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
1714
|
+
original_exception=RuntimeError(f"Failed to parse query text for FeatureView {fv_name}: {row}."),
|
1715
|
+
)
|
1716
|
+
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
1717
|
+
query = m.group("query")
|
1718
|
+
return (fv_metadata, query)
|
1546
1719
|
|
1547
|
-
def _compose_feature_view(self, row: Row, entity_list: List[Row]) -> FeatureView:
|
1720
|
+
def _compose_feature_view(self, row: Row, obj_type: _FeatureStoreObjTypes, entity_list: List[Row]) -> FeatureView:
|
1548
1721
|
def find_and_compose_entity(name: str) -> Entity:
|
1549
1722
|
name = SqlIdentifier(name).resolved()
|
1550
1723
|
for e in entity_list:
|
@@ -1558,21 +1731,14 @@ class FeatureStore:
|
|
1558
1731
|
|
1559
1732
|
name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
|
1560
1733
|
name = SqlIdentifier(name, case_sensitive=True)
|
1561
|
-
m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
|
1562
|
-
if m is None:
|
1563
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1564
|
-
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
1565
|
-
original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
|
1566
|
-
)
|
1567
|
-
|
1568
1734
|
fv_name = FeatureView._get_physical_name(name, version)
|
1735
|
+
fv_metadata, query = self._lookup_feature_view_metadata(row, fv_name)
|
1736
|
+
|
1569
1737
|
infer_schema_df = self._session.sql(f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}")
|
1738
|
+
desc = row["comment"]
|
1570
1739
|
|
1571
|
-
if
|
1572
|
-
query = m.group("query")
|
1740
|
+
if obj_type == _FeatureStoreObjTypes.MANAGED_FEATURE_VIEW:
|
1573
1741
|
df = self._session.sql(query)
|
1574
|
-
desc = m.group("comment")
|
1575
|
-
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
1576
1742
|
entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
|
1577
1743
|
ts_col = fv_metadata.timestamp_col
|
1578
1744
|
timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
|
@@ -1584,23 +1750,25 @@ class FeatureStore:
|
|
1584
1750
|
timestamp_col=timestamp_col,
|
1585
1751
|
desc=desc,
|
1586
1752
|
version=version,
|
1587
|
-
status=FeatureViewStatus(row["scheduling_state"])
|
1753
|
+
status=FeatureViewStatus(row["scheduling_state"])
|
1754
|
+
if len(row["scheduling_state"]) > 0
|
1755
|
+
else FeatureViewStatus.MASKED,
|
1588
1756
|
feature_descs=self._fetch_column_descs("DYNAMIC TABLE", fv_name),
|
1589
1757
|
refresh_freq=row["target_lag"],
|
1590
1758
|
database=self._config.database.identifier(),
|
1591
1759
|
schema=self._config.schema.identifier(),
|
1592
|
-
warehouse=SqlIdentifier(row["warehouse"], case_sensitive=True).identifier()
|
1760
|
+
warehouse=SqlIdentifier(row["warehouse"], case_sensitive=True).identifier()
|
1761
|
+
if len(row["warehouse"]) > 0
|
1762
|
+
else None,
|
1593
1763
|
refresh_mode=row["refresh_mode"],
|
1594
1764
|
refresh_mode_reason=row["refresh_mode_reason"],
|
1595
1765
|
owner=row["owner"],
|
1596
1766
|
infer_schema_df=infer_schema_df,
|
1767
|
+
session=self._session,
|
1597
1768
|
)
|
1598
1769
|
return fv
|
1599
1770
|
else:
|
1600
|
-
query = m.group("query")
|
1601
1771
|
df = self._session.sql(query)
|
1602
|
-
desc = m.group("comment")
|
1603
|
-
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
1604
1772
|
entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
|
1605
1773
|
ts_col = fv_metadata.timestamp_col
|
1606
1774
|
timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
|
@@ -1622,6 +1790,7 @@ class FeatureStore:
|
|
1622
1790
|
refresh_mode_reason=None,
|
1623
1791
|
owner=row["owner"],
|
1624
1792
|
infer_schema_df=infer_schema_df,
|
1793
|
+
session=self._session,
|
1625
1794
|
)
|
1626
1795
|
return fv
|
1627
1796
|
|
@@ -1675,42 +1844,10 @@ class FeatureStore:
|
|
1675
1844
|
)
|
1676
1845
|
# There could be none-FS objects under FS schema, thus filter on objects with FS special tag.
|
1677
1846
|
if object_type not in tag_free_object_types and len(all_rows) > 0:
|
1678
|
-
|
1679
|
-
|
1680
|
-
|
1681
|
-
|
1682
|
-
OBJECT_NAME
|
1683
|
-
FROM TABLE(
|
1684
|
-
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1685
|
-
TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
|
1686
|
-
)
|
1687
|
-
)
|
1688
|
-
WHERE DOMAIN='{obj_domain}'
|
1689
|
-
"""
|
1690
|
-
).collect(statement_params=self._telemetry_stmp)
|
1691
|
-
else:
|
1692
|
-
# TODO: remove this after tag_ref_internal rollout
|
1693
|
-
# Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
|
1694
|
-
# use double quotes to make it case-sensitive.
|
1695
|
-
queries = [
|
1696
|
-
f"""
|
1697
|
-
SELECT OBJECT_NAME
|
1698
|
-
FROM TABLE(
|
1699
|
-
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
|
1700
|
-
'{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
|
1701
|
-
'{obj_domain}'
|
1702
|
-
)
|
1703
|
-
)
|
1704
|
-
WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
|
1705
|
-
AND TAG_SCHEMA = '{self._config.schema.resolved()}'
|
1706
|
-
"""
|
1707
|
-
for row in all_rows
|
1708
|
-
]
|
1709
|
-
fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
|
1710
|
-
statement_params=self._telemetry_stmp
|
1711
|
-
)
|
1712
|
-
|
1713
|
-
fs_tag_objects = [row["OBJECT_NAME"] for row in fs_obj_rows]
|
1847
|
+
fs_obj_rows = self._lookup_tagged_objects(
|
1848
|
+
_FEATURE_STORE_OBJECT_TAG, [lambda d: d["domain"] == obj_domain]
|
1849
|
+
)
|
1850
|
+
fs_tag_objects = [row["entityName"] for row in fs_obj_rows]
|
1714
1851
|
except Exception as e:
|
1715
1852
|
raise snowml_exceptions.SnowflakeMLException(
|
1716
1853
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
@@ -1756,21 +1893,6 @@ class FeatureStore:
|
|
1756
1893
|
)
|
1757
1894
|
return cast(DataFrame, df.drop(exclude_columns))
|
1758
1895
|
|
1759
|
-
def _tag_ref_internal_enabled(self) -> bool:
|
1760
|
-
try:
|
1761
|
-
self._session.sql(
|
1762
|
-
f"""
|
1763
|
-
SELECT * FROM TABLE(
|
1764
|
-
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1765
|
-
TAG_NAME => '{_FEATURE_STORE_OBJECT_TAG}'
|
1766
|
-
)
|
1767
|
-
) LIMIT 1;
|
1768
|
-
"""
|
1769
|
-
).collect()
|
1770
|
-
return True
|
1771
|
-
except Exception:
|
1772
|
-
return False
|
1773
|
-
|
1774
1896
|
def _is_dataset_enabled(self) -> bool:
|
1775
1897
|
try:
|
1776
1898
|
self._session.sql(f"SHOW DATASETS IN SCHEMA {self._config.full_schema_path}").collect()
|
@@ -1790,21 +1912,96 @@ class FeatureStore:
|
|
1790
1912
|
category=UserWarning,
|
1791
1913
|
)
|
1792
1914
|
|
1793
|
-
def
|
1794
|
-
|
1795
|
-
|
1915
|
+
def _filter_results(
|
1916
|
+
self, results: List[Dict[str, str]], filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
|
1917
|
+
) -> List[Dict[str, str]]:
|
1918
|
+
if filter_fns is None:
|
1919
|
+
return results
|
1920
|
+
|
1921
|
+
filtered_results = []
|
1922
|
+
for r in results:
|
1923
|
+
if all([fn(r) for fn in filter_fns]):
|
1924
|
+
filtered_results.append(r)
|
1925
|
+
return filtered_results
|
1926
|
+
|
1927
|
+
def _lookup_tags(
|
1928
|
+
self, domain: str, obj_name: str, filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
|
1929
|
+
) -> List[Dict[str, str]]:
|
1930
|
+
"""
|
1931
|
+
Lookup tag values for a given object, optionally apply filters on the results.
|
1932
|
+
|
1933
|
+
Args:
|
1934
|
+
domain: Domain of the obj to look for tag. E.g. table
|
1935
|
+
obj_name: Name of the obj.
|
1936
|
+
filter_fns: List of filter functions applied on the results.
|
1937
|
+
|
1938
|
+
Returns:
|
1939
|
+
List of tag values in dictionary format.
|
1940
|
+
|
1941
|
+
Raises:
|
1942
|
+
SnowflakeMLException: [RuntimeError] Failed to lookup tags.
|
1943
|
+
|
1944
|
+
Example::
|
1945
|
+
self._lookup_tags("TABLE", "MY_FV", [lambda d: d["tagName"] == "TARGET_TAG_NAME"])
|
1796
1946
|
|
1797
|
-
query = f"""
|
1798
|
-
SELECT
|
1799
|
-
TAG_VALUE
|
1800
|
-
FROM TABLE(
|
1801
|
-
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1802
|
-
TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
|
1803
|
-
)
|
1804
|
-
)
|
1805
1947
|
"""
|
1948
|
+
# NOTE: use ENTITY_DETAIL system fn to query tags for given object for it to work in
|
1949
|
+
# processes using owner's right. e.g. Streamlit, or stored procedure
|
1950
|
+
try:
|
1951
|
+
res = self._session.sql(
|
1952
|
+
f"""
|
1953
|
+
SELECT ENTITY_DETAIL('{domain}','{self._get_fully_qualified_name(obj_name)}', '["TAG_REFERENCES"]');
|
1954
|
+
"""
|
1955
|
+
).collect(statement_params=self._telemetry_stmp)
|
1956
|
+
entity_detail = json.loads(res[0][0])
|
1957
|
+
results = entity_detail["tagReferencesInfo"]["tagReferenceList"]
|
1958
|
+
return self._filter_results(results, filter_fns)
|
1959
|
+
except Exception as e:
|
1960
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1961
|
+
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
1962
|
+
original_exception=RuntimeError(f"Failed to lookup tags for object for {obj_name}: {e}"),
|
1963
|
+
) from e
|
1964
|
+
|
1965
|
+
def _lookup_tagged_objects(
|
1966
|
+
self, tag_name: str, filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
|
1967
|
+
) -> List[Dict[str, str]]:
|
1968
|
+
"""
|
1969
|
+
Lookup objects based on specified tag name, optionally apply filters on the results.
|
1970
|
+
|
1971
|
+
Args:
|
1972
|
+
tag_name: Name of the tag.
|
1973
|
+
filter_fns: List of filter functions applied on the results.
|
1974
|
+
|
1975
|
+
Returns:
|
1976
|
+
List of objects in dictionary format.
|
1977
|
+
|
1978
|
+
Raises:
|
1979
|
+
SnowflakeMLException: [RuntimeError] Failed to lookup tagged objects.
|
1980
|
+
|
1981
|
+
Example::
|
1982
|
+
self._lookup_tagged_objects("TARGET_TAG_NAME", [lambda d: d["entityName"] == "MY_FV"])
|
1983
|
+
|
1984
|
+
"""
|
1985
|
+
# NOTE: use ENTITY_DETAIL system fn to query objects from tag for it to work in
|
1986
|
+
# processes using owner's right. e.g. Streamlit, or stored procedure
|
1987
|
+
try:
|
1988
|
+
res = self._session.sql(
|
1989
|
+
f"""
|
1990
|
+
SELECT ENTITY_DETAIL('TAG','{self._get_fully_qualified_name(tag_name)}', '["TAG_REFERENCES_INTERNAL"]');
|
1991
|
+
"""
|
1992
|
+
).collect(statement_params=self._telemetry_stmp)
|
1993
|
+
entity_detail = json.loads(res[0][0])
|
1994
|
+
results = entity_detail["referencedEntities"]["tagReferenceList"]
|
1995
|
+
return self._filter_results(results, filter_fns)
|
1996
|
+
except Exception as e:
|
1997
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1998
|
+
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
1999
|
+
original_exception=RuntimeError(f"Failed to lookup tagged objects for {tag_name}: {e}"),
|
2000
|
+
) from e
|
2001
|
+
|
2002
|
+
def _collapse_object_versions(self) -> List[pkg_version.Version]:
|
1806
2003
|
try:
|
1807
|
-
res = self.
|
2004
|
+
res = self._lookup_tagged_objects(_FEATURE_STORE_OBJECT_TAG)
|
1808
2005
|
except Exception:
|
1809
2006
|
# since this is a best effort user warning to upgrade pkg versions
|
1810
2007
|
# we are treating failures as benign error
|
@@ -1812,7 +2009,7 @@ class FeatureStore:
|
|
1812
2009
|
versions = set()
|
1813
2010
|
compatibility_breakage_detected = False
|
1814
2011
|
for r in res:
|
1815
|
-
info = _FeatureStoreObjInfo.from_json(r["
|
2012
|
+
info = _FeatureStoreObjInfo.from_json(r["tagValue"])
|
1816
2013
|
if info.type == _FeatureStoreObjTypes.UNKNOWN:
|
1817
2014
|
compatibility_breakage_detected = True
|
1818
2015
|
versions.add(pkg_version.parse(info.pkg_version))
|