snowflake-ml-python 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +66 -31
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +408 -282
- snowflake/ml/feature_store/feature_view.py +37 -8
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +0 -4
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +1 -4
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +135 -0
- snowflake/ml/model/_client/ops/model_ops.py +137 -67
- snowflake/ml/model/_client/sql/model.py +16 -14
- snowflake/ml/model/_client/sql/model_version.py +109 -1
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +22 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +11 -0
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -5
- snowflake/ml/model/_packager/model_packager.py +0 -3
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +24 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -52
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -52
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -52
- snowflake/ml/modeling/cluster/birch.py +53 -52
- snowflake/ml/modeling/cluster/bisecting_k_means.py +53 -52
- snowflake/ml/modeling/cluster/dbscan.py +51 -52
- snowflake/ml/modeling/cluster/feature_agglomeration.py +53 -52
- snowflake/ml/modeling/cluster/k_means.py +53 -52
- snowflake/ml/modeling/cluster/mean_shift.py +51 -52
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +53 -52
- snowflake/ml/modeling/cluster/optics.py +51 -52
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -52
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -52
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -52
- snowflake/ml/modeling/compose/column_transformer.py +53 -52
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -52
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -52
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -52
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -52
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -52
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -52
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -52
- snowflake/ml/modeling/covariance/oas.py +51 -52
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -52
- snowflake/ml/modeling/decomposition/dictionary_learning.py +53 -52
- snowflake/ml/modeling/decomposition/factor_analysis.py +53 -52
- snowflake/ml/modeling/decomposition/fast_ica.py +53 -52
- snowflake/ml/modeling/decomposition/incremental_pca.py +53 -52
- snowflake/ml/modeling/decomposition/kernel_pca.py +53 -52
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +53 -52
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +53 -52
- snowflake/ml/modeling/decomposition/pca.py +53 -52
- snowflake/ml/modeling/decomposition/sparse_pca.py +53 -52
- snowflake/ml/modeling/decomposition/truncated_svd.py +53 -52
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +53 -52
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -52
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -52
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/stacking_regressor.py +53 -52
- snowflake/ml/modeling/ensemble/voting_classifier.py +53 -52
- snowflake/ml/modeling/ensemble/voting_regressor.py +53 -52
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +53 -52
- snowflake/ml/modeling/feature_selection/select_fdr.py +53 -52
- snowflake/ml/modeling/feature_selection/select_fpr.py +53 -52
- snowflake/ml/modeling/feature_selection/select_fwe.py +53 -52
- snowflake/ml/modeling/feature_selection/select_k_best.py +53 -52
- snowflake/ml/modeling/feature_selection/select_percentile.py +53 -52
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +53 -52
- snowflake/ml/modeling/feature_selection/variance_threshold.py +53 -52
- snowflake/ml/modeling/framework/base.py +63 -36
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -52
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -52
- snowflake/ml/modeling/impute/iterative_imputer.py +53 -52
- snowflake/ml/modeling/impute/knn_imputer.py +53 -52
- snowflake/ml/modeling/impute/missing_indicator.py +53 -52
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +53 -52
- snowflake/ml/modeling/kernel_approximation/nystroem.py +53 -52
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +53 -52
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +53 -52
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +53 -52
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -52
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -52
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -52
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -52
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -52
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -52
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/lars.py +51 -52
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -52
- snowflake/ml/modeling/linear_model/lasso.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -52
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -52
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -52
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -52
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -52
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -52
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/perceptron.py +51 -52
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/ridge.py +51 -52
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -52
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -52
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -52
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -52
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -52
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -52
- snowflake/ml/modeling/manifold/isomap.py +53 -52
- snowflake/ml/modeling/manifold/mds.py +53 -52
- snowflake/ml/modeling/manifold/spectral_embedding.py +53 -52
- snowflake/ml/modeling/manifold/tsne.py +53 -52
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -52
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -52
- snowflake/ml/modeling/model_selection/grid_search_cv.py +21 -23
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +38 -20
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -52
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -52
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -52
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -52
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -52
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -52
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -52
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -52
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -52
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -52
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +53 -52
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -52
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -52
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +53 -52
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -52
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -52
- snowflake/ml/modeling/pipeline/pipeline.py +514 -32
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +12 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +53 -52
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -52
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -52
- snowflake/ml/modeling/svm/linear_svc.py +51 -52
- snowflake/ml/modeling/svm/linear_svr.py +51 -52
- snowflake/ml/modeling/svm/nu_svc.py +51 -52
- snowflake/ml/modeling/svm/nu_svr.py +51 -52
- snowflake/ml/modeling/svm/svc.py +51 -52
- snowflake/ml/modeling/svm/svr.py +51 -52
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -52
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -52
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -52
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -52
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -52
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -52
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -52
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -52
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +63 -2
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/RECORD +204 -196
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -8,13 +8,17 @@ import re
|
|
8
8
|
import warnings
|
9
9
|
from dataclasses import dataclass
|
10
10
|
from enum import Enum
|
11
|
-
from typing import Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast
|
11
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast
|
12
12
|
|
13
|
+
import packaging.version as pkg_version
|
14
|
+
import snowflake.ml.version as snowml_version
|
13
15
|
from pytimeparse.timeparse import timeparse
|
14
16
|
from typing_extensions import Concatenate, ParamSpec
|
15
17
|
|
18
|
+
from snowflake.ml import dataset
|
16
19
|
from snowflake.ml._internal import telemetry
|
17
20
|
from snowflake.ml._internal.exceptions import (
|
21
|
+
dataset_errors,
|
18
22
|
error_codes,
|
19
23
|
exceptions as snowml_exceptions,
|
20
24
|
)
|
@@ -23,12 +27,8 @@ from snowflake.ml._internal.utils.sql_identifier import (
|
|
23
27
|
SqlIdentifier,
|
24
28
|
to_sql_identifiers,
|
25
29
|
)
|
26
|
-
from snowflake.ml.dataset.
|
27
|
-
from snowflake.ml.feature_store.entity import
|
28
|
-
_ENTITY_NAME_LENGTH_LIMIT,
|
29
|
-
_FEATURE_VIEW_ENTITY_TAG_DELIMITER,
|
30
|
-
Entity,
|
31
|
-
)
|
30
|
+
from snowflake.ml.dataset.dataset_metadata import FeatureStoreMetadata
|
31
|
+
from snowflake.ml.feature_store.entity import _ENTITY_NAME_LENGTH_LIMIT, Entity
|
32
32
|
from snowflake.ml.feature_store.feature_view import (
|
33
33
|
_FEATURE_OBJ_TYPE,
|
34
34
|
_FEATURE_VIEW_NAME_DELIMITER,
|
@@ -37,11 +37,17 @@ from snowflake.ml.feature_store.feature_view import (
|
|
37
37
|
FeatureViewSlice,
|
38
38
|
FeatureViewStatus,
|
39
39
|
FeatureViewVersion,
|
40
|
+
_FeatureViewMetadata,
|
40
41
|
)
|
41
42
|
from snowflake.snowpark import DataFrame, Row, Session, functions as F
|
42
|
-
from snowflake.snowpark._internal import type_utils, utils as snowpark_utils
|
43
43
|
from snowflake.snowpark.exceptions import SnowparkSQLException
|
44
|
-
from snowflake.snowpark.types import
|
44
|
+
from snowflake.snowpark.types import (
|
45
|
+
ArrayType,
|
46
|
+
StringType,
|
47
|
+
StructField,
|
48
|
+
StructType,
|
49
|
+
TimestampType,
|
50
|
+
)
|
45
51
|
|
46
52
|
_Args = ParamSpec("_Args")
|
47
53
|
_RT = TypeVar("_RT")
|
@@ -49,38 +55,80 @@ _RT = TypeVar("_RT")
|
|
49
55
|
logger = logging.getLogger(__name__)
|
50
56
|
|
51
57
|
_ENTITY_TAG_PREFIX = "SNOWML_FEATURE_STORE_ENTITY_"
|
52
|
-
_FEATURE_VIEW_ENTITY_TAG = "SNOWML_FEATURE_STORE_FV_ENTITIES"
|
53
|
-
_FEATURE_VIEW_TS_COL_TAG = "SNOWML_FEATURE_STORE_FV_TS_COL"
|
54
58
|
_FEATURE_STORE_OBJECT_TAG = "SNOWML_FEATURE_STORE_OBJECT"
|
59
|
+
_FEATURE_VIEW_METADATA_TAG = "SNOWML_FEATURE_VIEW_METADATA"
|
60
|
+
|
61
|
+
|
62
|
+
@dataclass(frozen=True)
|
63
|
+
class _FeatureStoreObjInfo:
|
64
|
+
type: _FeatureStoreObjTypes
|
65
|
+
pkg_version: str
|
66
|
+
|
67
|
+
def to_json(self) -> str:
|
68
|
+
state_dict = self.__dict__.copy()
|
69
|
+
state_dict["type"] = state_dict["type"].value
|
70
|
+
return json.dumps(state_dict)
|
71
|
+
|
72
|
+
@classmethod
|
73
|
+
def from_json(cls, json_str: str) -> _FeatureStoreObjInfo:
|
74
|
+
json_dict = json.loads(json_str)
|
75
|
+
# since we may introduce new fields in the json blob in the future,
|
76
|
+
# in order to guarantee compatibility, we need to select ones that can be
|
77
|
+
# decoded in the current version
|
78
|
+
state_dict = {}
|
79
|
+
state_dict["type"] = _FeatureStoreObjTypes.parse(json_dict["type"])
|
80
|
+
state_dict["pkg_version"] = json_dict["pkg_version"]
|
81
|
+
return cls(**state_dict) # type: ignore[arg-type]
|
55
82
|
|
56
83
|
|
57
84
|
# TODO: remove "" after dataset is updated
|
58
85
|
class _FeatureStoreObjTypes(Enum):
|
59
|
-
|
86
|
+
UNKNOWN = "UNKNOWN" # for forward compatibility
|
87
|
+
MANAGED_FEATURE_VIEW = "MANAGED_FEATURE_VIEW"
|
88
|
+
EXTERNAL_FEATURE_VIEW = "EXTERNAL_FEATURE_VIEW"
|
60
89
|
FEATURE_VIEW_REFRESH_TASK = "FEATURE_VIEW_REFRESH_TASK"
|
61
90
|
TRAINING_DATA = ""
|
62
91
|
|
92
|
+
@classmethod
|
93
|
+
def parse(cls, val: str) -> _FeatureStoreObjTypes:
|
94
|
+
try:
|
95
|
+
return cls(val)
|
96
|
+
except ValueError:
|
97
|
+
return cls.UNKNOWN
|
98
|
+
|
63
99
|
|
64
100
|
_PROJECT = "FeatureStore"
|
65
101
|
_DT_OR_VIEW_QUERY_PATTERN = re.compile(
|
66
102
|
r"""CREATE\ (OR\ REPLACE\ )?(?P<obj_type>(DYNAMIC\ TABLE|VIEW))\ .*
|
67
103
|
COMMENT\ =\ '(?P<comment>.*)'\s*
|
68
|
-
TAG.*?{
|
69
|
-
.*?{ts_col_tag}\ =\ '(?P<ts_col>.*?)',?.*?
|
104
|
+
TAG.*?{fv_metadata_tag}\ =\ '(?P<fv_metadata>.*?)',?.*?
|
70
105
|
AS\ (?P<query>.*)
|
71
106
|
""".format(
|
72
|
-
|
107
|
+
fv_metadata_tag=_FEATURE_VIEW_METADATA_TAG,
|
73
108
|
),
|
74
109
|
flags=re.DOTALL | re.IGNORECASE | re.X,
|
75
110
|
)
|
76
111
|
|
112
|
+
_LIST_FEATURE_VIEW_SCHEMA = StructType(
|
113
|
+
[
|
114
|
+
StructField("name", StringType()),
|
115
|
+
StructField("version", StringType()),
|
116
|
+
StructField("database_name", StringType()),
|
117
|
+
StructField("schema_name", StringType()),
|
118
|
+
StructField("created_on", TimestampType()),
|
119
|
+
StructField("owner", StringType()),
|
120
|
+
StructField("desc", StringType()),
|
121
|
+
StructField("entities", ArrayType(StringType())),
|
122
|
+
]
|
123
|
+
)
|
124
|
+
|
77
125
|
|
78
126
|
class CreationMode(Enum):
|
79
127
|
FAIL_IF_NOT_EXIST = 1
|
80
128
|
CREATE_IF_NOT_EXIST = 2
|
81
129
|
|
82
130
|
|
83
|
-
@dataclass
|
131
|
+
@dataclass(frozen=True)
|
84
132
|
class _FeatureStoreConfig:
|
85
133
|
database: SqlIdentifier
|
86
134
|
schema: SqlIdentifier
|
@@ -111,14 +159,14 @@ def switch_warehouse(
|
|
111
159
|
return wrapper
|
112
160
|
|
113
161
|
|
114
|
-
def dispatch_decorator(
|
115
|
-
|
116
|
-
|
162
|
+
def dispatch_decorator() -> Callable[
|
163
|
+
[Callable[Concatenate[FeatureStore, _Args], _RT]],
|
164
|
+
Callable[Concatenate[FeatureStore, _Args], _RT],
|
165
|
+
]:
|
117
166
|
def decorator(
|
118
167
|
f: Callable[Concatenate[FeatureStore, _Args], _RT]
|
119
168
|
) -> Callable[Concatenate[FeatureStore, _Args], _RT]:
|
120
169
|
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
121
|
-
@snowpark_utils.private_preview(version=prpr_version)
|
122
170
|
@switch_warehouse
|
123
171
|
@functools.wraps(f)
|
124
172
|
def wrap(self: FeatureStore, /, *args: _Args.args, **kargs: _Args.kwargs) -> _RT:
|
@@ -135,7 +183,6 @@ class FeatureStore:
|
|
135
183
|
"""
|
136
184
|
|
137
185
|
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
138
|
-
@snowpark_utils.private_preview(version="1.0.8")
|
139
186
|
def __init__(
|
140
187
|
self,
|
141
188
|
session: Session,
|
@@ -178,7 +225,7 @@ class FeatureStore:
|
|
178
225
|
# search space used in query "SHOW <object_TYPE> LIKE <object_name> IN <search_space>"
|
179
226
|
# object domain used in query "TAG_REFERENCE(<object_name>, <object_domain>)"
|
180
227
|
self._obj_search_spaces = {
|
181
|
-
"
|
228
|
+
"DATASETS": (self._config.full_schema_path, "DATASET"),
|
182
229
|
"DYNAMIC TABLES": (self._config.full_schema_path, "TABLE"),
|
183
230
|
"VIEWS": (self._config.full_schema_path, "TABLE"),
|
184
231
|
"SCHEMAS": (f"DATABASE {self._config.database}", "SCHEMA"),
|
@@ -200,8 +247,7 @@ class FeatureStore:
|
|
200
247
|
)
|
201
248
|
for tag in to_sql_identifiers(
|
202
249
|
[
|
203
|
-
|
204
|
-
_FEATURE_VIEW_TS_COL_TAG,
|
250
|
+
_FEATURE_VIEW_METADATA_TAG,
|
205
251
|
]
|
206
252
|
):
|
207
253
|
self._session.sql(f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(tag)}").collect(
|
@@ -209,8 +255,7 @@ class FeatureStore:
|
|
209
255
|
)
|
210
256
|
|
211
257
|
self._session.sql(
|
212
|
-
f"
|
213
|
-
ALLOWED_VALUES {','.join([f"'{v.value}'" for v in _FeatureStoreObjTypes])}"""
|
258
|
+
f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}"
|
214
259
|
).collect(statement_params=self._telemetry_stmp)
|
215
260
|
except Exception as e:
|
216
261
|
self.clear()
|
@@ -219,10 +264,12 @@ class FeatureStore:
|
|
219
264
|
original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
|
220
265
|
)
|
221
266
|
|
267
|
+
# TODO: remove this after tag_ref_internal rollout
|
268
|
+
self._use_optimized_tag_ref = self._tag_ref_internal_enabled()
|
269
|
+
self._check_feature_store_object_versions()
|
222
270
|
logger.info(f"Successfully connected to feature store: {self._config.full_schema_path}.")
|
223
271
|
|
224
272
|
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
225
|
-
@snowpark_utils.private_preview(version="1.0.12")
|
226
273
|
def update_default_warehouse(self, warehouse_name: str) -> None:
|
227
274
|
"""Update default warehouse for feature store.
|
228
275
|
|
@@ -242,7 +289,7 @@ class FeatureStore:
|
|
242
289
|
|
243
290
|
self._default_warehouse = warehouse
|
244
291
|
|
245
|
-
@dispatch_decorator(
|
292
|
+
@dispatch_decorator()
|
246
293
|
def register_entity(self, entity: Entity) -> Entity:
|
247
294
|
"""
|
248
295
|
Register Entity in the FeatureStore.
|
@@ -268,13 +315,13 @@ class FeatureStore:
|
|
268
315
|
return entity
|
269
316
|
|
270
317
|
# allowed_values will add double-quotes around each value, thus use resolved str here.
|
271
|
-
join_keys = [f"
|
318
|
+
join_keys = [f"{key.resolved()}" for key in entity.join_keys]
|
272
319
|
join_keys_str = ",".join(join_keys)
|
273
320
|
full_tag_name = self._get_fully_qualified_name(tag_name)
|
274
321
|
try:
|
275
322
|
self._session.sql(
|
276
323
|
f"""CREATE TAG IF NOT EXISTS {full_tag_name}
|
277
|
-
ALLOWED_VALUES {join_keys_str}
|
324
|
+
ALLOWED_VALUES '{join_keys_str}'
|
278
325
|
COMMENT = '{entity.desc}'
|
279
326
|
"""
|
280
327
|
).collect(statement_params=self._telemetry_stmp)
|
@@ -289,7 +336,7 @@ class FeatureStore:
|
|
289
336
|
return self.get_entity(entity.name)
|
290
337
|
|
291
338
|
# TODO: add support to update column desc once SNOW-894249 is fixed
|
292
|
-
@dispatch_decorator(
|
339
|
+
@dispatch_decorator()
|
293
340
|
def register_feature_view(
|
294
341
|
self,
|
295
342
|
feature_view: FeatureView,
|
@@ -342,7 +389,6 @@ class FeatureStore:
|
|
342
389
|
),
|
343
390
|
)
|
344
391
|
|
345
|
-
# TODO: ideally we should move this to FeatureView creation time
|
346
392
|
for e in feature_view.entities:
|
347
393
|
if not self._validate_entity_exists(e.name):
|
348
394
|
raise snowml_exceptions.SnowflakeMLException(
|
@@ -358,12 +404,23 @@ class FeatureStore:
|
|
358
404
|
pass
|
359
405
|
|
360
406
|
fully_qualified_name = self._get_fully_qualified_name(feature_view_name)
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
407
|
+
refresh_freq = feature_view.refresh_freq
|
408
|
+
|
409
|
+
if refresh_freq is not None:
|
410
|
+
obj_info = _FeatureStoreObjInfo(_FeatureStoreObjTypes.MANAGED_FEATURE_VIEW, snowml_version.VERSION)
|
411
|
+
else:
|
412
|
+
obj_info = _FeatureStoreObjInfo(_FeatureStoreObjTypes.EXTERNAL_FEATURE_VIEW, snowml_version.VERSION)
|
413
|
+
|
414
|
+
tagging_clause = [
|
415
|
+
f"{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = '{obj_info.to_json()}'",
|
416
|
+
f"{self._get_fully_qualified_name(_FEATURE_VIEW_METADATA_TAG)} = '{feature_view._metadata().to_json()}'",
|
417
|
+
]
|
418
|
+
for e in feature_view.entities:
|
419
|
+
join_keys = [f"{key.resolved()}" for key in e.join_keys]
|
420
|
+
tagging_clause.append(
|
421
|
+
f"{self._get_fully_qualified_name(self._get_entity_name(e.name))} = '{','.join(join_keys)}'"
|
422
|
+
)
|
423
|
+
tagging_clause_str = ",\n".join(tagging_clause)
|
367
424
|
|
368
425
|
def create_col_desc(col: StructField) -> str:
|
369
426
|
desc = feature_view.feature_descs.get(SqlIdentifier(col.name), None)
|
@@ -371,7 +428,6 @@ class FeatureStore:
|
|
371
428
|
return f"{col.name} {desc}"
|
372
429
|
|
373
430
|
column_descs = ", ".join([f"{create_col_desc(col)}" for col in feature_view.output_schema.fields])
|
374
|
-
refresh_freq = feature_view.refresh_freq
|
375
431
|
|
376
432
|
if refresh_freq is not None:
|
377
433
|
schedule_task = refresh_freq != "DOWNSTREAM" and timeparse(refresh_freq) is None
|
@@ -380,10 +436,9 @@ class FeatureStore:
|
|
380
436
|
feature_view,
|
381
437
|
fully_qualified_name,
|
382
438
|
column_descs,
|
383
|
-
|
439
|
+
tagging_clause_str,
|
384
440
|
schedule_task,
|
385
441
|
self._default_warehouse,
|
386
|
-
timestamp_col,
|
387
442
|
block,
|
388
443
|
overwrite,
|
389
444
|
)
|
@@ -393,9 +448,7 @@ class FeatureStore:
|
|
393
448
|
query = f"""CREATE{overwrite_clause} VIEW {fully_qualified_name} ({column_descs})
|
394
449
|
COMMENT = '{feature_view.desc}'
|
395
450
|
TAG (
|
396
|
-
{
|
397
|
-
{_FEATURE_VIEW_TS_COL_TAG} = '{timestamp_col}',
|
398
|
-
{_FEATURE_STORE_OBJECT_TAG} = '{_FeatureStoreObjTypes.FEATURE_VIEW.value}'
|
451
|
+
{tagging_clause_str}
|
399
452
|
)
|
400
453
|
AS {feature_view.query}
|
401
454
|
"""
|
@@ -406,10 +459,10 @@ class FeatureStore:
|
|
406
459
|
original_exception=RuntimeError(f"Create view {fully_qualified_name} [\n{query}\n] failed: {e}"),
|
407
460
|
) from e
|
408
461
|
|
409
|
-
logger.info(f"Registered FeatureView {feature_view.name}/{version}.")
|
462
|
+
logger.info(f"Registered FeatureView {feature_view.name}/{version} successfully.")
|
410
463
|
return self.get_feature_view(feature_view.name, str(version))
|
411
464
|
|
412
|
-
@dispatch_decorator(
|
465
|
+
@dispatch_decorator()
|
413
466
|
def update_feature_view(
|
414
467
|
self, name: str, version: str, refresh_freq: Optional[str] = None, warehouse: Optional[str] = None
|
415
468
|
) -> FeatureView:
|
@@ -456,7 +509,7 @@ class FeatureStore:
|
|
456
509
|
) from e
|
457
510
|
return self.get_feature_view(name=name, version=version)
|
458
511
|
|
459
|
-
@dispatch_decorator(
|
512
|
+
@dispatch_decorator()
|
460
513
|
def read_feature_view(self, feature_view: FeatureView) -> DataFrame:
|
461
514
|
"""
|
462
515
|
Read FeatureView data.
|
@@ -478,13 +531,12 @@ class FeatureStore:
|
|
478
531
|
|
479
532
|
return self._session.sql(f"SELECT * FROM {feature_view.fully_qualified_name()}")
|
480
533
|
|
481
|
-
@dispatch_decorator(
|
534
|
+
@dispatch_decorator()
|
482
535
|
def list_feature_views(
|
483
536
|
self,
|
484
537
|
entity_name: Optional[str] = None,
|
485
538
|
feature_view_name: Optional[str] = None,
|
486
|
-
|
487
|
-
) -> Union[Optional[DataFrame], List[FeatureView]]:
|
539
|
+
) -> DataFrame:
|
488
540
|
"""
|
489
541
|
List FeatureViews in the FeatureStore.
|
490
542
|
If entity_name is specified, FeatureViews associated with that Entity will be listed.
|
@@ -493,34 +545,26 @@ class FeatureStore:
|
|
493
545
|
Args:
|
494
546
|
entity_name: Entity name.
|
495
547
|
feature_view_name: FeatureView name.
|
496
|
-
as_dataframe: whether the return type should be a DataFrame.
|
497
548
|
|
498
549
|
Returns:
|
499
|
-
|
550
|
+
FeatureViews information as a Snowpark DataFrame.
|
500
551
|
"""
|
501
|
-
if entity_name is not None:
|
502
|
-
entity_name = SqlIdentifier(entity_name)
|
503
552
|
if feature_view_name is not None:
|
504
553
|
feature_view_name = SqlIdentifier(feature_view_name)
|
505
554
|
|
506
555
|
if entity_name is not None:
|
507
|
-
|
556
|
+
entity_name = SqlIdentifier(entity_name)
|
557
|
+
if self._use_optimized_tag_ref:
|
558
|
+
return self._optimized_find_feature_views(entity_name, feature_view_name)
|
559
|
+
else:
|
560
|
+
return self._find_feature_views(entity_name, feature_view_name)
|
508
561
|
else:
|
509
|
-
|
510
|
-
entities = self.list_entities().collect()
|
562
|
+
output_values: List[List[Any]] = []
|
511
563
|
for row in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
|
512
|
-
|
513
|
-
|
514
|
-
if as_dataframe:
|
515
|
-
result = None
|
516
|
-
for fv in fvs:
|
517
|
-
fv_df = fv.to_df(self._session)
|
518
|
-
result = fv_df if result is None else result.union(fv_df) # type: ignore[attr-defined]
|
519
|
-
return result
|
520
|
-
else:
|
521
|
-
return fvs
|
564
|
+
self._extract_feature_view_info(row, output_values)
|
565
|
+
return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
|
522
566
|
|
523
|
-
@dispatch_decorator(
|
567
|
+
@dispatch_decorator()
|
524
568
|
def get_feature_view(self, name: str, version: str) -> FeatureView:
|
525
569
|
"""
|
526
570
|
Retrieve previously registered FeatureView.
|
@@ -549,7 +593,7 @@ class FeatureStore:
|
|
549
593
|
|
550
594
|
return self._compose_feature_view(results[0], self.list_entities().collect())
|
551
595
|
|
552
|
-
@dispatch_decorator(
|
596
|
+
@dispatch_decorator()
|
553
597
|
def resume_feature_view(self, feature_view: FeatureView) -> FeatureView:
|
554
598
|
"""
|
555
599
|
Resume a previously suspended FeatureView.
|
@@ -562,7 +606,7 @@ class FeatureStore:
|
|
562
606
|
"""
|
563
607
|
return self._update_feature_view_status(feature_view, "RESUME")
|
564
608
|
|
565
|
-
@dispatch_decorator(
|
609
|
+
@dispatch_decorator()
|
566
610
|
def suspend_feature_view(self, feature_view: FeatureView) -> FeatureView:
|
567
611
|
"""
|
568
612
|
Suspend an active FeatureView.
|
@@ -575,7 +619,7 @@ class FeatureStore:
|
|
575
619
|
"""
|
576
620
|
return self._update_feature_view_status(feature_view, "SUSPEND")
|
577
621
|
|
578
|
-
@dispatch_decorator(
|
622
|
+
@dispatch_decorator()
|
579
623
|
def delete_feature_view(self, feature_view: FeatureView) -> None:
|
580
624
|
"""
|
581
625
|
Delete a FeatureView.
|
@@ -586,6 +630,8 @@ class FeatureStore:
|
|
586
630
|
Raises:
|
587
631
|
SnowflakeMLException: [ValueError] FeatureView is not registered.
|
588
632
|
"""
|
633
|
+
# TODO: we should leverage lineage graph to check downstream deps, and block the deletion
|
634
|
+
# if there're other FVs depending on this
|
589
635
|
if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
|
590
636
|
raise snowml_exceptions.SnowflakeMLException(
|
591
637
|
error_code=error_codes.NOT_FOUND,
|
@@ -608,7 +654,7 @@ class FeatureStore:
|
|
608
654
|
|
609
655
|
logger.info(f"Deleted FeatureView {feature_view.name}/{feature_view.version}.")
|
610
656
|
|
611
|
-
@dispatch_decorator(
|
657
|
+
@dispatch_decorator()
|
612
658
|
def list_entities(self) -> DataFrame:
|
613
659
|
"""
|
614
660
|
List all Entities in the FeatureStore.
|
@@ -629,7 +675,7 @@ class FeatureStore:
|
|
629
675
|
),
|
630
676
|
)
|
631
677
|
|
632
|
-
@dispatch_decorator(
|
678
|
+
@dispatch_decorator()
|
633
679
|
def get_entity(self, name: str) -> Entity:
|
634
680
|
"""
|
635
681
|
Retrieve previously registered Entity object.
|
@@ -659,8 +705,7 @@ class FeatureStore:
|
|
659
705
|
original_exception=ValueError(f"Cannot find Entity with name: {name}."),
|
660
706
|
)
|
661
707
|
|
662
|
-
|
663
|
-
join_keys = raw_join_keys.strip("[]").split(",")
|
708
|
+
join_keys = self._recompose_join_keys(result[0]["JOIN_KEYS"])
|
664
709
|
|
665
710
|
return Entity._construct_entity(
|
666
711
|
name=SqlIdentifier(result[0]["NAME"], case_sensitive=True).identifier(),
|
@@ -669,7 +714,7 @@ class FeatureStore:
|
|
669
714
|
owner=result[0]["OWNER"],
|
670
715
|
)
|
671
716
|
|
672
|
-
@dispatch_decorator(
|
717
|
+
@dispatch_decorator()
|
673
718
|
def delete_entity(self, name: str) -> None:
|
674
719
|
"""
|
675
720
|
Delete a previously registered Entity.
|
@@ -690,13 +735,13 @@ class FeatureStore:
|
|
690
735
|
original_exception=ValueError(f"Entity {name} does not exist."),
|
691
736
|
)
|
692
737
|
|
693
|
-
active_feature_views =
|
738
|
+
active_feature_views = self.list_feature_views(entity_name=name).collect(statement_params=self._telemetry_stmp)
|
739
|
+
|
694
740
|
if len(active_feature_views) > 0:
|
741
|
+
active_fvs = [r["NAME"] for r in active_feature_views]
|
695
742
|
raise snowml_exceptions.SnowflakeMLException(
|
696
743
|
error_code=error_codes.SNOWML_DELETE_FAILED,
|
697
|
-
original_exception=ValueError(
|
698
|
-
f"Cannot delete Entity {name} due to active FeatureViews: {[f.name for f in active_feature_views]}."
|
699
|
-
),
|
744
|
+
original_exception=ValueError(f"Cannot delete Entity {name} due to active FeatureViews: {active_fvs}."),
|
700
745
|
)
|
701
746
|
|
702
747
|
tag_name = self._get_fully_qualified_name(self._get_entity_name(name))
|
@@ -709,7 +754,7 @@ class FeatureStore:
|
|
709
754
|
) from e
|
710
755
|
logger.info(f"Deleted Entity {name}.")
|
711
756
|
|
712
|
-
@dispatch_decorator(
|
757
|
+
@dispatch_decorator()
|
713
758
|
def retrieve_feature_values(
|
714
759
|
self,
|
715
760
|
spine_df: DataFrame,
|
@@ -757,39 +802,35 @@ class FeatureStore:
|
|
757
802
|
|
758
803
|
return df
|
759
804
|
|
760
|
-
@dispatch_decorator(
|
805
|
+
@dispatch_decorator()
|
761
806
|
def generate_dataset(
|
762
807
|
self,
|
808
|
+
name: str,
|
763
809
|
spine_df: DataFrame,
|
764
810
|
features: List[Union[FeatureView, FeatureViewSlice]],
|
765
|
-
|
811
|
+
version: Optional[str] = None,
|
766
812
|
spine_timestamp_col: Optional[str] = None,
|
767
813
|
spine_label_cols: Optional[List[str]] = None,
|
768
814
|
exclude_columns: Optional[List[str]] = None,
|
769
|
-
save_mode: str = "errorifexists",
|
770
815
|
include_feature_view_timestamp_col: bool = False,
|
771
816
|
desc: str = "",
|
772
|
-
) -> Dataset:
|
817
|
+
) -> dataset.Dataset:
|
773
818
|
"""
|
774
819
|
Generate dataset by given source table and feature views.
|
775
820
|
|
776
821
|
Args:
|
822
|
+
name: The name of the Dataset to be generated. Datasets are uniquely identified within a schema
|
823
|
+
by their name and version.
|
777
824
|
spine_df: The fact table contains the raw dataset.
|
778
825
|
features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
|
779
|
-
|
780
|
-
|
781
|
-
the provided table. Note result dataset will be a snowflake clone of registered table.
|
782
|
-
New data can append on same registered table and previously generated dataset won't be affected.
|
783
|
-
Default result table name will be a concatenation of materialized_table name and current timestamp.
|
826
|
+
version: The version of the Dataset to be generated. If none specified, the current timestamp
|
827
|
+
will be used instead.
|
784
828
|
spine_timestamp_col: Name of timestamp column in spine_df that will be used to join
|
785
829
|
time-series features. If spine_timestamp_col is not none, the input features also must have
|
786
830
|
timestamp_col.
|
787
831
|
spine_label_cols: Name of column(s) in spine_df that contains labels.
|
788
832
|
exclude_columns: Column names to exclude from the result dataframe.
|
789
833
|
The underlying storage will still contain the columns.
|
790
|
-
save_mode: How new data is saved. currently support:
|
791
|
-
errorifexists: Raise error if registered table already exists.
|
792
|
-
merge: Merge new data if registered table already exists.
|
793
834
|
include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
|
794
835
|
(if feature view has timestamp column) if set true. Default to false.
|
795
836
|
desc: A description about this dataset.
|
@@ -798,10 +839,8 @@ class FeatureStore:
|
|
798
839
|
A Dataset object.
|
799
840
|
|
800
841
|
Raises:
|
801
|
-
SnowflakeMLException: [ValueError] save_mode is invalid.
|
802
842
|
SnowflakeMLException: [ValueError] spine_df contains more than one query.
|
803
|
-
SnowflakeMLException: [ValueError]
|
804
|
-
SnowflakeMLException: [ValueError] Materialized_table already exists with save_mode `errorifexists`.
|
843
|
+
SnowflakeMLException: [ValueError] Dataset name/version already exists
|
805
844
|
SnowflakeMLException: [ValueError] Snapshot creation failed.
|
806
845
|
SnowflakeMLException: [RuntimeError] Failed to create clone from table.
|
807
846
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
@@ -811,15 +850,6 @@ class FeatureStore:
|
|
811
850
|
if spine_label_cols is not None:
|
812
851
|
spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
|
813
852
|
|
814
|
-
allowed_save_mode = {"errorifexists", "merge"}
|
815
|
-
if save_mode.lower() not in allowed_save_mode:
|
816
|
-
raise snowml_exceptions.SnowflakeMLException(
|
817
|
-
error_code=error_codes.INVALID_ARGUMENT,
|
818
|
-
original_exception=ValueError(
|
819
|
-
f"'{save_mode}' is not supported. Current supported save modes: {','.join(allowed_save_mode)}"
|
820
|
-
),
|
821
|
-
)
|
822
|
-
|
823
853
|
if len(spine_df.queries["queries"]) != 1:
|
824
854
|
raise snowml_exceptions.SnowflakeMLException(
|
825
855
|
error_code=error_codes.INVALID_ARGUMENT,
|
@@ -832,70 +862,55 @@ class FeatureStore:
|
|
832
862
|
spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
|
833
863
|
)
|
834
864
|
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
# TODO (wezhou) change materialized_table to SqlIdentifier
|
844
|
-
found_rows = self._find_object("TABLES", SqlIdentifier(materialized_table))
|
845
|
-
if save_mode.lower() == "errorifexists" and len(found_rows) > 0:
|
846
|
-
raise snowml_exceptions.SnowflakeMLException(
|
847
|
-
error_code=error_codes.OBJECT_ALREADY_EXISTS,
|
848
|
-
original_exception=ValueError(f"Dataset table {materialized_table} already exists."),
|
849
|
-
)
|
850
|
-
|
851
|
-
self._dump_dataset(result_df, materialized_table, join_keys, spine_timestamp_col)
|
852
|
-
|
853
|
-
snapshot_table = f"{materialized_table}_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}"
|
854
|
-
snapshot_table = self._get_fully_qualified_name(snapshot_table)
|
855
|
-
materialized_table = self._get_fully_qualified_name(materialized_table)
|
856
|
-
|
857
|
-
try:
|
858
|
-
self._session.sql(f"CREATE TABLE {snapshot_table} CLONE {materialized_table}").collect(
|
859
|
-
statement_params=self._telemetry_stmp
|
860
|
-
)
|
861
|
-
except Exception as e:
|
862
|
-
raise snowml_exceptions.SnowflakeMLException(
|
863
|
-
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
864
|
-
original_exception=RuntimeError(
|
865
|
-
f"Failed to create clone {materialized_table} from table {snapshot_table}: {e}."
|
866
|
-
),
|
867
|
-
) from e
|
868
|
-
|
869
|
-
result_df = self._session.sql(f"SELECT * FROM {snapshot_table}")
|
865
|
+
# Convert name to fully qualified name if not already fully qualified
|
866
|
+
db_name, schema_name, object_name, _ = identifier.parse_schema_level_object_identifier(name)
|
867
|
+
name = "{}.{}.{}".format(
|
868
|
+
db_name or self._config.database,
|
869
|
+
schema_name or self._config.schema,
|
870
|
+
object_name,
|
871
|
+
)
|
872
|
+
version = version or datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
870
873
|
|
871
874
|
if exclude_columns is not None:
|
872
875
|
result_df = self._exclude_columns(result_df, exclude_columns)
|
873
876
|
|
874
877
|
fs_meta = FeatureStoreMetadata(
|
875
878
|
spine_query=spine_df.queries["queries"][0],
|
876
|
-
|
877
|
-
|
879
|
+
serialized_feature_views=[fv.to_json() for fv in features],
|
880
|
+
spine_timestamp_col=spine_timestamp_col,
|
878
881
|
)
|
879
882
|
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
883
|
+
try:
|
884
|
+
ds: dataset.Dataset = dataset.create_from_dataframe(
|
885
|
+
self._session,
|
886
|
+
name,
|
887
|
+
version,
|
888
|
+
input_dataframe=result_df,
|
889
|
+
exclude_cols=[spine_timestamp_col],
|
890
|
+
label_cols=spine_label_cols,
|
891
|
+
properties=fs_meta,
|
892
|
+
comment=desc,
|
893
|
+
)
|
894
|
+
return ds
|
891
895
|
|
892
|
-
|
893
|
-
|
896
|
+
except dataset_errors.DatasetExistError as e:
|
897
|
+
raise snowml_exceptions.SnowflakeMLException(
|
898
|
+
error_code=error_codes.OBJECT_ALREADY_EXISTS,
|
899
|
+
original_exception=ValueError(str(e)),
|
900
|
+
) from e
|
901
|
+
except SnowparkSQLException as e:
|
902
|
+
raise snowml_exceptions.SnowflakeMLException(
|
903
|
+
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
904
|
+
original_exception=RuntimeError(f"An error occurred during Dataset generation: {e}."),
|
905
|
+
) from e
|
906
|
+
|
907
|
+
@dispatch_decorator()
|
908
|
+
def load_feature_views_from_dataset(self, ds: dataset.Dataset) -> List[Union[FeatureView, FeatureViewSlice]]:
|
894
909
|
"""
|
895
910
|
Retrieve FeatureViews used during Dataset construction.
|
896
911
|
|
897
912
|
Args:
|
898
|
-
|
913
|
+
ds: Dataset object created from feature store.
|
899
914
|
|
900
915
|
Returns:
|
901
916
|
List of FeatureViews used during Dataset construction.
|
@@ -903,13 +918,18 @@ class FeatureStore:
|
|
903
918
|
Raises:
|
904
919
|
ValueError: if dataset object is not generated from feature store.
|
905
920
|
"""
|
906
|
-
|
907
|
-
|
908
|
-
|
921
|
+
assert ds.selected_version is not None
|
922
|
+
source_meta = ds.selected_version._get_metadata()
|
923
|
+
if (
|
924
|
+
source_meta is None
|
925
|
+
or not isinstance(source_meta.properties, FeatureStoreMetadata)
|
926
|
+
or source_meta.properties.serialized_feature_views is None
|
927
|
+
):
|
928
|
+
raise ValueError(f"Dataset {ds} does not contain valid feature view information.")
|
909
929
|
|
910
|
-
return self._load_serialized_feature_objects(
|
930
|
+
return self._load_serialized_feature_objects(source_meta.properties.serialized_feature_views)
|
911
931
|
|
912
|
-
@dispatch_decorator(
|
932
|
+
@dispatch_decorator()
|
913
933
|
def clear(self) -> None:
|
914
934
|
"""
|
915
935
|
Clear all feature store internal objects including feature views, entities etc. Note feature store
|
@@ -929,7 +949,11 @@ class FeatureStore:
|
|
929
949
|
if len(result) == 0:
|
930
950
|
return
|
931
951
|
|
932
|
-
|
952
|
+
fs_obj_tag = self._find_object("TAGS", SqlIdentifier(_FEATURE_STORE_OBJECT_TAG))
|
953
|
+
if len(fs_obj_tag) == 0:
|
954
|
+
return
|
955
|
+
|
956
|
+
object_types = ["DYNAMIC TABLES", "DATASETS", "VIEWS", "TASKS"]
|
933
957
|
for obj_type in object_types:
|
934
958
|
all_object_rows = self._find_object(obj_type, None)
|
935
959
|
for row in all_object_rows:
|
@@ -939,9 +963,8 @@ class FeatureStore:
|
|
939
963
|
|
940
964
|
entity_tags = self._find_object("TAGS", SqlIdentifier(_ENTITY_TAG_PREFIX), prefix_match=True)
|
941
965
|
all_tags = [
|
942
|
-
_FEATURE_VIEW_ENTITY_TAG,
|
943
|
-
_FEATURE_VIEW_TS_COL_TAG,
|
944
966
|
_FEATURE_STORE_OBJECT_TAG,
|
967
|
+
_FEATURE_VIEW_METADATA_TAG,
|
945
968
|
] + [SqlIdentifier(row["name"], case_sensitive=True) for row in entity_tags]
|
946
969
|
for tag_name in all_tags:
|
947
970
|
obj_name = self._get_fully_qualified_name(tag_name)
|
@@ -965,37 +988,47 @@ class FeatureStore:
|
|
965
988
|
)
|
966
989
|
return existing_fv
|
967
990
|
|
991
|
+
def _recompose_join_keys(self, join_key: str) -> List[str]:
|
992
|
+
# ALLOWED_VALUES in TAG will follow format ["key_1,key2,..."]
|
993
|
+
# since keys are already resolved following the SQL identifier rule on the write path,
|
994
|
+
# we simply parse the keys back and wrap them with quotes to preserve cases
|
995
|
+
# Example join_key repr from TAG value: "[key1,key2,key3]"
|
996
|
+
join_keys = join_key[2:-2].split(",")
|
997
|
+
res = []
|
998
|
+
for k in join_keys:
|
999
|
+
res.append(f'"{k}"')
|
1000
|
+
return res
|
1001
|
+
|
968
1002
|
def _create_dynamic_table(
|
969
1003
|
self,
|
970
1004
|
feature_view_name: SqlIdentifier,
|
971
1005
|
feature_view: FeatureView,
|
972
1006
|
fully_qualified_name: str,
|
973
1007
|
column_descs: str,
|
974
|
-
|
1008
|
+
tagging_clause: str,
|
975
1009
|
schedule_task: bool,
|
976
1010
|
warehouse: SqlIdentifier,
|
977
|
-
timestamp_col: SqlIdentifier,
|
978
1011
|
block: bool,
|
979
1012
|
override: bool,
|
980
1013
|
) -> None:
|
981
1014
|
# TODO: cluster by join keys once DT supports that
|
982
|
-
override_clause = " OR REPLACE" if override else ""
|
983
|
-
query = f"""CREATE{override_clause} DYNAMIC TABLE {fully_qualified_name} ({column_descs})
|
984
|
-
TARGET_LAG = '{'DOWNSTREAM' if schedule_task else feature_view.refresh_freq}'
|
985
|
-
COMMENT = '{feature_view.desc}'
|
986
|
-
TAG (
|
987
|
-
{self._get_fully_qualified_name(_FEATURE_VIEW_ENTITY_TAG)} = '{entities}',
|
988
|
-
{self._get_fully_qualified_name(_FEATURE_VIEW_TS_COL_TAG)} = '{timestamp_col}',
|
989
|
-
{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} =
|
990
|
-
'{_FeatureStoreObjTypes.FEATURE_VIEW.value}'
|
991
|
-
)
|
992
|
-
WAREHOUSE = {warehouse}
|
993
|
-
AS {feature_view.query}
|
994
|
-
"""
|
995
1015
|
try:
|
1016
|
+
override_clause = " OR REPLACE" if override else ""
|
1017
|
+
query = f"""CREATE{override_clause} DYNAMIC TABLE {fully_qualified_name} ({column_descs})
|
1018
|
+
TARGET_LAG = '{'DOWNSTREAM' if schedule_task else feature_view.refresh_freq}'
|
1019
|
+
COMMENT = '{feature_view.desc}'
|
1020
|
+
TAG (
|
1021
|
+
{tagging_clause}
|
1022
|
+
)
|
1023
|
+
WAREHOUSE = {warehouse}
|
1024
|
+
AS {feature_view.query}
|
1025
|
+
"""
|
996
1026
|
self._session.sql(query).collect(block=block, statement_params=self._telemetry_stmp)
|
997
1027
|
|
998
1028
|
if schedule_task:
|
1029
|
+
task_obj_info = _FeatureStoreObjInfo(
|
1030
|
+
_FeatureStoreObjTypes.FEATURE_VIEW_REFRESH_TASK, snowml_version.VERSION
|
1031
|
+
)
|
999
1032
|
try:
|
1000
1033
|
self._session.sql(
|
1001
1034
|
f"""CREATE{override_clause} TASK {fully_qualified_name}
|
@@ -1007,8 +1040,7 @@ class FeatureStore:
|
|
1007
1040
|
self._session.sql(
|
1008
1041
|
f"""
|
1009
1042
|
ALTER TASK {fully_qualified_name}
|
1010
|
-
SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}
|
1011
|
-
='{_FeatureStoreObjTypes.FEATURE_VIEW_REFRESH_TASK.value}'
|
1043
|
+
SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}='{task_obj_info.to_json()}'
|
1012
1044
|
"""
|
1013
1045
|
).collect(statement_params=self._telemetry_stmp)
|
1014
1046
|
self._session.sql(f"ALTER TASK {fully_qualified_name} RESUME").collect(
|
@@ -1049,57 +1081,6 @@ class FeatureStore:
|
|
1049
1081
|
category=UserWarning,
|
1050
1082
|
)
|
1051
1083
|
|
1052
|
-
def _dump_dataset(
|
1053
|
-
self,
|
1054
|
-
df: DataFrame,
|
1055
|
-
table_name: str,
|
1056
|
-
join_keys: List[SqlIdentifier],
|
1057
|
-
spine_timestamp_col: Optional[SqlIdentifier] = None,
|
1058
|
-
) -> None:
|
1059
|
-
if len(df.queries["queries"]) != 1:
|
1060
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1061
|
-
error_code=error_codes.INVALID_ARGUMENT,
|
1062
|
-
original_exception=ValueError(f"Dataset df must contain only one query. Got: {df.queries['queries']}"),
|
1063
|
-
)
|
1064
|
-
schema = ", ".join([f"{c.name} {type_utils.convert_sp_to_sf_type(c.datatype)}" for c in df.schema.fields])
|
1065
|
-
fully_qualified_name = self._get_fully_qualified_name(table_name)
|
1066
|
-
|
1067
|
-
try:
|
1068
|
-
self._session.sql(
|
1069
|
-
f"""CREATE TABLE IF NOT EXISTS {fully_qualified_name} ({schema})
|
1070
|
-
CLUSTER BY ({', '.join(join_keys)})
|
1071
|
-
TAG ({self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = '')
|
1072
|
-
"""
|
1073
|
-
).collect(block=True, statement_params=self._telemetry_stmp)
|
1074
|
-
except Exception as e:
|
1075
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1076
|
-
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
1077
|
-
original_exception=RuntimeError(f"Failed to create table {fully_qualified_name}: {e}."),
|
1078
|
-
) from e
|
1079
|
-
|
1080
|
-
source_query = df.queries["queries"][0]
|
1081
|
-
|
1082
|
-
if spine_timestamp_col is not None:
|
1083
|
-
join_keys.append(spine_timestamp_col)
|
1084
|
-
|
1085
|
-
_, _, dest_alias, _ = identifier.parse_schema_level_object_identifier(fully_qualified_name)
|
1086
|
-
source_alias = f"{dest_alias}_source"
|
1087
|
-
join_cond = " AND ".join([f"{dest_alias}.{k} = {source_alias}.{k}" for k in join_keys])
|
1088
|
-
update_clause = ", ".join([f"{dest_alias}.{c} = {source_alias}.{c}" for c in df.columns])
|
1089
|
-
insert_clause = ", ".join([f"{source_alias}.{c}" for c in df.columns])
|
1090
|
-
query = f"""
|
1091
|
-
MERGE INTO {fully_qualified_name} USING ({source_query}) {source_alias} ON {join_cond}
|
1092
|
-
WHEN MATCHED THEN UPDATE SET {update_clause}
|
1093
|
-
WHEN NOT MATCHED THEN INSERT ({', '.join(df.columns)}) VALUES ({insert_clause})
|
1094
|
-
"""
|
1095
|
-
try:
|
1096
|
-
self._session.sql(query).collect(block=True, statement_params=self._telemetry_stmp)
|
1097
|
-
except Exception as e:
|
1098
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1099
|
-
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
1100
|
-
original_exception=RuntimeError(f"Failed to create dataset {fully_qualified_name} with merge: {e}."),
|
1101
|
-
) from e
|
1102
|
-
|
1103
1084
|
def _validate_entity_exists(self, name: SqlIdentifier) -> bool:
|
1104
1085
|
full_entity_tag_name = self._get_entity_name(name)
|
1105
1086
|
found_rows = self._find_object("TAGS", full_entity_tag_name)
|
@@ -1150,7 +1131,7 @@ class FeatureStore:
|
|
1150
1131
|
else:
|
1151
1132
|
cols = f.feature_names
|
1152
1133
|
|
1153
|
-
join_keys =
|
1134
|
+
join_keys = list({k for e in f.entities for k in e.join_keys})
|
1154
1135
|
join_keys_str = ", ".join(join_keys)
|
1155
1136
|
assert f.version is not None
|
1156
1137
|
join_table_name = f.fully_qualified_name()
|
@@ -1227,8 +1208,7 @@ class FeatureStore:
|
|
1227
1208
|
for tag_name in to_sql_identifiers(
|
1228
1209
|
[
|
1229
1210
|
_FEATURE_STORE_OBJECT_TAG,
|
1230
|
-
|
1231
|
-
_FEATURE_VIEW_TS_COL_TAG,
|
1211
|
+
_FEATURE_VIEW_METADATA_TAG,
|
1232
1212
|
]
|
1233
1213
|
):
|
1234
1214
|
tag_result = self._find_object("TAGS", tag_name)
|
@@ -1340,7 +1320,8 @@ class FeatureStore:
|
|
1340
1320
|
|
1341
1321
|
# Part 4: join original spine table with window table
|
1342
1322
|
prefix_f_only_cols = to_sql_identifiers(
|
1343
|
-
[f"{temp_prefix}{name.resolved()}" for name in f_only_cols],
|
1323
|
+
[f"{temp_prefix}{name.resolved()}" for name in f_only_cols],
|
1324
|
+
case_sensitive=True,
|
1344
1325
|
)
|
1345
1326
|
last_select = f"""
|
1346
1327
|
SELECT
|
@@ -1373,7 +1354,10 @@ class FeatureStore:
|
|
1373
1354
|
return dynamic_table_results + view_results
|
1374
1355
|
|
1375
1356
|
def _update_feature_view_status(self, feature_view: FeatureView, operation: str) -> FeatureView:
|
1376
|
-
assert operation in [
|
1357
|
+
assert operation in [
|
1358
|
+
"RESUME",
|
1359
|
+
"SUSPEND",
|
1360
|
+
], f"Operation: {operation} not supported"
|
1377
1361
|
if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
|
1378
1362
|
raise snowml_exceptions.SnowflakeMLException(
|
1379
1363
|
error_code=error_codes.NOT_FOUND,
|
@@ -1397,17 +1381,76 @@ class FeatureStore:
|
|
1397
1381
|
logger.info(f"Successfully {operation} FeatureView {feature_view.name}/{feature_view.version}.")
|
1398
1382
|
return self.get_feature_view(feature_view.name, feature_view.version)
|
1399
1383
|
|
1400
|
-
def
|
1384
|
+
def _optimized_find_feature_views(
|
1401
1385
|
self, entity_name: SqlIdentifier, feature_view_name: Optional[SqlIdentifier]
|
1402
|
-
) ->
|
1386
|
+
) -> DataFrame:
|
1403
1387
|
if not self._validate_entity_exists(entity_name):
|
1404
|
-
return []
|
1388
|
+
return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
|
1405
1389
|
|
1390
|
+
# TODO: this can be optimized further by directly getting all possible FVs and filter by tag
|
1391
|
+
# it's easier to rewrite the code once we can remove the tag_reference path
|
1406
1392
|
all_fvs = self._get_fv_backend_representations(object_name=None)
|
1407
1393
|
fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
|
1408
1394
|
|
1409
1395
|
if len(fv_maps.keys()) == 0:
|
1410
|
-
return []
|
1396
|
+
return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
|
1397
|
+
|
1398
|
+
filter_clause = f"WHERE OBJECT_NAME LIKE '{feature_view_name.resolved()}%'" if feature_view_name else ""
|
1399
|
+
try:
|
1400
|
+
res = self._session.sql(
|
1401
|
+
f"""
|
1402
|
+
SELECT
|
1403
|
+
OBJECT_NAME
|
1404
|
+
FROM TABLE(
|
1405
|
+
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1406
|
+
TAG_NAME => '{self._get_fully_qualified_name(self._get_entity_name(entity_name))}'
|
1407
|
+
)
|
1408
|
+
) {filter_clause}"""
|
1409
|
+
).collect(statement_params=self._telemetry_stmp)
|
1410
|
+
except Exception as e:
|
1411
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1412
|
+
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
1413
|
+
original_exception=RuntimeError(f"Failed to find feature views' by entity {entity_name}: {e}"),
|
1414
|
+
) from e
|
1415
|
+
|
1416
|
+
output_values: List[List[Any]] = []
|
1417
|
+
for r in res:
|
1418
|
+
row = fv_maps[SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)]
|
1419
|
+
self._extract_feature_view_info(row, output_values)
|
1420
|
+
|
1421
|
+
return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
|
1422
|
+
|
1423
|
+
def _extract_feature_view_info(self, row: Row, output_values: List[List[Any]]) -> None:
|
1424
|
+
name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
|
1425
|
+
m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
|
1426
|
+
if m is None:
|
1427
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1428
|
+
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
1429
|
+
original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
|
1430
|
+
)
|
1431
|
+
|
1432
|
+
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
1433
|
+
|
1434
|
+
values: List[Any] = []
|
1435
|
+
values.append(name)
|
1436
|
+
values.append(version)
|
1437
|
+
values.append(row["database_name"])
|
1438
|
+
values.append(row["schema_name"])
|
1439
|
+
values.append(row["created_on"])
|
1440
|
+
values.append(row["owner"])
|
1441
|
+
values.append(row["comment"])
|
1442
|
+
values.append(fv_metadata.entities)
|
1443
|
+
output_values.append(values)
|
1444
|
+
|
1445
|
+
def _find_feature_views(self, entity_name: SqlIdentifier, feature_view_name: Optional[SqlIdentifier]) -> DataFrame:
|
1446
|
+
if not self._validate_entity_exists(entity_name):
|
1447
|
+
return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
|
1448
|
+
|
1449
|
+
all_fvs = self._get_fv_backend_representations(object_name=None)
|
1450
|
+
fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
|
1451
|
+
|
1452
|
+
if len(fv_maps.keys()) == 0:
|
1453
|
+
return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
|
1411
1454
|
|
1412
1455
|
# NOTE: querying INFORMATION_SCHEMA for Entity lineage can be expensive depending on how many active
|
1413
1456
|
# FeatureViews there are. If this ever become an issue, consider exploring improvements.
|
@@ -1424,7 +1467,7 @@ class FeatureStore:
|
|
1424
1467
|
)
|
1425
1468
|
)
|
1426
1469
|
WHERE LEVEL = 'TABLE'
|
1427
|
-
AND TAG_NAME = '{
|
1470
|
+
AND TAG_NAME = '{_FEATURE_VIEW_METADATA_TAG}'
|
1428
1471
|
"""
|
1429
1472
|
for fv_name in fv_maps.keys()
|
1430
1473
|
]
|
@@ -1436,21 +1479,22 @@ class FeatureStore:
|
|
1436
1479
|
original_exception=RuntimeError(f"Failed to retrieve feature views' information: {e}"),
|
1437
1480
|
) from e
|
1438
1481
|
|
1439
|
-
|
1440
|
-
outputs = []
|
1482
|
+
output_values: List[List[Any]] = []
|
1441
1483
|
for r in results:
|
1442
|
-
|
1443
|
-
|
1444
|
-
|
1445
|
-
|
1446
|
-
|
1447
|
-
|
1448
|
-
|
1484
|
+
fv_metadata = _FeatureViewMetadata.from_json(r["TAG_VALUE"])
|
1485
|
+
for retrieved_entity in fv_metadata.entities:
|
1486
|
+
if entity_name == SqlIdentifier(retrieved_entity, case_sensitive=True):
|
1487
|
+
fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
|
1488
|
+
fv_name = SqlIdentifier(fv_name, case_sensitive=True)
|
1489
|
+
obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
|
1490
|
+
if feature_view_name is not None:
|
1491
|
+
if fv_name == feature_view_name:
|
1492
|
+
self._extract_feature_view_info(fv_maps[obj_name], output_values)
|
1493
|
+
else:
|
1494
|
+
continue
|
1449
1495
|
else:
|
1450
|
-
|
1451
|
-
|
1452
|
-
outputs.append(self._compose_feature_view(fv_maps[obj_name], entities))
|
1453
|
-
return outputs
|
1496
|
+
self._extract_feature_view_info(fv_maps[obj_name], output_values)
|
1497
|
+
return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
|
1454
1498
|
|
1455
1499
|
def _compose_feature_view(self, row: Row, entity_list: List[Row]) -> FeatureView:
|
1456
1500
|
def find_and_compose_entity(name: str) -> Entity:
|
@@ -1459,7 +1503,7 @@ class FeatureStore:
|
|
1459
1503
|
if e["NAME"] == name:
|
1460
1504
|
return Entity(
|
1461
1505
|
name=SqlIdentifier(e["NAME"], case_sensitive=True).identifier(),
|
1462
|
-
join_keys=e["JOIN_KEYS"]
|
1506
|
+
join_keys=self._recompose_join_keys(e["JOIN_KEYS"]),
|
1463
1507
|
desc=e["DESC"],
|
1464
1508
|
)
|
1465
1509
|
raise RuntimeError(f"Cannot find entity {name} from retrieved entity list: {entity_list}")
|
@@ -1477,9 +1521,9 @@ class FeatureStore:
|
|
1477
1521
|
query = m.group("query")
|
1478
1522
|
df = self._session.sql(query)
|
1479
1523
|
desc = m.group("comment")
|
1480
|
-
|
1481
|
-
entities = [find_and_compose_entity(n) for n in
|
1482
|
-
ts_col =
|
1524
|
+
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
1525
|
+
entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
|
1526
|
+
ts_col = fv_metadata.timestamp_col
|
1483
1527
|
timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
|
1484
1528
|
|
1485
1529
|
fv = FeatureView._construct_feature_view(
|
@@ -1506,9 +1550,9 @@ class FeatureStore:
|
|
1506
1550
|
query = m.group("query")
|
1507
1551
|
df = self._session.sql(query)
|
1508
1552
|
desc = m.group("comment")
|
1509
|
-
|
1510
|
-
entities = [find_and_compose_entity(n) for n in
|
1511
|
-
ts_col =
|
1553
|
+
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
1554
|
+
entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
|
1555
|
+
ts_col = fv_metadata.timestamp_col
|
1512
1556
|
timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
|
1513
1557
|
|
1514
1558
|
fv = FeatureView._construct_feature_view(
|
@@ -1542,7 +1586,10 @@ class FeatureStore:
|
|
1542
1586
|
return descs
|
1543
1587
|
|
1544
1588
|
def _find_object(
|
1545
|
-
self,
|
1589
|
+
self,
|
1590
|
+
object_type: str,
|
1591
|
+
object_name: Optional[SqlIdentifier],
|
1592
|
+
prefix_match: bool = False,
|
1546
1593
|
) -> List[Row]:
|
1547
1594
|
"""Try to find an object by given type and name pattern.
|
1548
1595
|
|
@@ -1569,7 +1616,7 @@ class FeatureStore:
|
|
1569
1616
|
search_space, obj_domain = self._obj_search_spaces[object_type]
|
1570
1617
|
all_rows = []
|
1571
1618
|
fs_tag_objects = []
|
1572
|
-
tag_free_object_types = ["TAGS", "SCHEMAS", "WAREHOUSES"]
|
1619
|
+
tag_free_object_types = ["TAGS", "SCHEMAS", "WAREHOUSES", "DATASETS"]
|
1573
1620
|
try:
|
1574
1621
|
search_scope = f"IN {search_space}" if search_space is not None else ""
|
1575
1622
|
all_rows = self._session.sql(f"SHOW {object_type} LIKE '{match_name}' {search_scope}").collect(
|
@@ -1577,25 +1624,41 @@ class FeatureStore:
|
|
1577
1624
|
)
|
1578
1625
|
# There could be none-FS objects under FS schema, thus filter on objects with FS special tag.
|
1579
1626
|
if object_type not in tag_free_object_types and len(all_rows) > 0:
|
1580
|
-
|
1581
|
-
|
1582
|
-
|
1583
|
-
|
1584
|
-
|
1585
|
-
|
1586
|
-
|
1587
|
-
|
1588
|
-
|
1627
|
+
if self._use_optimized_tag_ref:
|
1628
|
+
fs_obj_rows = self._session.sql(
|
1629
|
+
f"""
|
1630
|
+
SELECT
|
1631
|
+
OBJECT_NAME
|
1632
|
+
FROM TABLE(
|
1633
|
+
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1634
|
+
TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
|
1635
|
+
)
|
1589
1636
|
)
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
1593
|
-
|
1594
|
-
|
1595
|
-
|
1596
|
-
|
1597
|
-
|
1598
|
-
|
1637
|
+
WHERE DOMAIN='{obj_domain}'
|
1638
|
+
"""
|
1639
|
+
).collect(statement_params=self._telemetry_stmp)
|
1640
|
+
else:
|
1641
|
+
# TODO: remove this after tag_ref_internal rollout
|
1642
|
+
# Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
|
1643
|
+
# use double quotes to make it case-sensitive.
|
1644
|
+
queries = [
|
1645
|
+
f"""
|
1646
|
+
SELECT OBJECT_NAME
|
1647
|
+
FROM TABLE(
|
1648
|
+
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
|
1649
|
+
'{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
|
1650
|
+
'{obj_domain}'
|
1651
|
+
)
|
1652
|
+
)
|
1653
|
+
WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
|
1654
|
+
AND TAG_SCHEMA = '{self._config.schema.resolved()}'
|
1655
|
+
"""
|
1656
|
+
for row in all_rows
|
1657
|
+
]
|
1658
|
+
fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
|
1659
|
+
statement_params=self._telemetry_stmp
|
1660
|
+
)
|
1661
|
+
|
1599
1662
|
fs_tag_objects = [row["OBJECT_NAME"] for row in fs_obj_rows]
|
1600
1663
|
except Exception as e:
|
1601
1664
|
raise snowml_exceptions.SnowflakeMLException(
|
@@ -1641,3 +1704,66 @@ class FeatureStore:
|
|
1641
1704
|
),
|
1642
1705
|
)
|
1643
1706
|
return cast(DataFrame, df.drop(exclude_columns))
|
1707
|
+
|
1708
|
+
def _tag_ref_internal_enabled(self) -> bool:
|
1709
|
+
try:
|
1710
|
+
self._session.sql(
|
1711
|
+
f"""
|
1712
|
+
SELECT * FROM TABLE(
|
1713
|
+
INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1714
|
+
TAG_NAME => '{_FEATURE_STORE_OBJECT_TAG}'
|
1715
|
+
)
|
1716
|
+
) LIMIT 1;
|
1717
|
+
"""
|
1718
|
+
).collect()
|
1719
|
+
return True
|
1720
|
+
except Exception:
|
1721
|
+
return False
|
1722
|
+
|
1723
|
+
def _check_feature_store_object_versions(self) -> None:
|
1724
|
+
versions = self._collapse_object_versions()
|
1725
|
+
if len(versions) > 0 and pkg_version.parse(snowml_version.VERSION) < versions[0]:
|
1726
|
+
warnings.warn(
|
1727
|
+
"The current snowflake-ml-python version out of date, package upgrade recommended "
|
1728
|
+
+ f"(current={snowml_version.VERSION}, recommended>={str(versions[0])})",
|
1729
|
+
stacklevel=2,
|
1730
|
+
category=UserWarning,
|
1731
|
+
)
|
1732
|
+
|
1733
|
+
def _collapse_object_versions(self) -> List[pkg_version.Version]:
|
1734
|
+
if not self._use_optimized_tag_ref:
|
1735
|
+
return []
|
1736
|
+
|
1737
|
+
query = f"""
|
1738
|
+
SELECT
|
1739
|
+
TAG_VALUE
|
1740
|
+
FROM TABLE(
|
1741
|
+
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1742
|
+
TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
|
1743
|
+
)
|
1744
|
+
)
|
1745
|
+
"""
|
1746
|
+
try:
|
1747
|
+
res = self._session.sql(query).collect(statement_params=self._telemetry_stmp)
|
1748
|
+
except Exception:
|
1749
|
+
# since this is a best effort user warning to upgrade pkg versions
|
1750
|
+
# we are treating failures as benign error
|
1751
|
+
return []
|
1752
|
+
versions = set()
|
1753
|
+
compatibility_breakage_detected = False
|
1754
|
+
for r in res:
|
1755
|
+
info = _FeatureStoreObjInfo.from_json(r["TAG_VALUE"])
|
1756
|
+
if info.type == _FeatureStoreObjTypes.UNKNOWN:
|
1757
|
+
compatibility_breakage_detected = True
|
1758
|
+
versions.add(pkg_version.parse(info.pkg_version))
|
1759
|
+
|
1760
|
+
sorted_versions = sorted(versions, reverse=True)
|
1761
|
+
if compatibility_breakage_detected:
|
1762
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1763
|
+
error_code=error_codes.SNOWML_PACKAGE_OUTDATED,
|
1764
|
+
original_exception=RuntimeError(
|
1765
|
+
f"The current snowflake-ml-python version {snowml_version.VERSION} is out of date, "
|
1766
|
+
+ f"please upgrade to at least {sorted_versions[0]}."
|
1767
|
+
),
|
1768
|
+
)
|
1769
|
+
return sorted_versions
|