snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. snowflake/ml/_internal/env_utils.py +77 -32
  2. snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
  3. snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
  4. snowflake/ml/_internal/exceptions/error_codes.py +3 -0
  5. snowflake/ml/_internal/lineage/data_source.py +10 -0
  6. snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
  7. snowflake/ml/_internal/utils/identifier.py +3 -1
  8. snowflake/ml/_internal/utils/sql_identifier.py +2 -6
  9. snowflake/ml/dataset/__init__.py +10 -0
  10. snowflake/ml/dataset/dataset.py +454 -129
  11. snowflake/ml/dataset/dataset_factory.py +53 -0
  12. snowflake/ml/dataset/dataset_metadata.py +103 -0
  13. snowflake/ml/dataset/dataset_reader.py +202 -0
  14. snowflake/ml/feature_store/feature_store.py +531 -332
  15. snowflake/ml/feature_store/feature_view.py +40 -23
  16. snowflake/ml/fileset/embedded_stage_fs.py +146 -0
  17. snowflake/ml/fileset/sfcfs.py +56 -54
  18. snowflake/ml/fileset/snowfs.py +159 -0
  19. snowflake/ml/fileset/stage_fs.py +49 -17
  20. snowflake/ml/model/__init__.py +2 -2
  21. snowflake/ml/model/_api.py +16 -1
  22. snowflake/ml/model/_client/model/model_impl.py +27 -0
  23. snowflake/ml/model/_client/model/model_version_impl.py +137 -50
  24. snowflake/ml/model/_client/ops/model_ops.py +159 -40
  25. snowflake/ml/model/_client/sql/model.py +25 -2
  26. snowflake/ml/model/_client/sql/model_version.py +131 -2
  27. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
  28. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
  29. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  30. snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
  31. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
  32. snowflake/ml/model/_model_composer/model_composer.py +22 -1
  33. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
  34. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
  35. snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
  36. snowflake/ml/model/_packager/model_env/model_env.py +41 -0
  37. snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
  38. snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
  39. snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
  40. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  41. snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
  42. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
  43. snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
  44. snowflake/ml/model/_packager/model_packager.py +2 -5
  45. snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
  46. snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
  47. snowflake/ml/model/type_hints.py +21 -2
  48. snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
  49. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
  50. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
  51. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
  52. snowflake/ml/modeling/_internal/model_trainer.py +7 -0
  53. snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
  54. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
  55. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
  56. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
  57. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
  58. snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
  59. snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
  60. snowflake/ml/modeling/cluster/birch.py +248 -175
  61. snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
  62. snowflake/ml/modeling/cluster/dbscan.py +246 -175
  63. snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
  64. snowflake/ml/modeling/cluster/k_means.py +248 -175
  65. snowflake/ml/modeling/cluster/mean_shift.py +246 -175
  66. snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
  67. snowflake/ml/modeling/cluster/optics.py +246 -175
  68. snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
  69. snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
  70. snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
  71. snowflake/ml/modeling/compose/column_transformer.py +248 -175
  72. snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
  73. snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
  74. snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
  75. snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
  76. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
  77. snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
  78. snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
  79. snowflake/ml/modeling/covariance/oas.py +246 -175
  80. snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
  81. snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
  82. snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
  83. snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
  84. snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
  85. snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
  86. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
  87. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
  88. snowflake/ml/modeling/decomposition/pca.py +248 -175
  89. snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
  90. snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
  91. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
  92. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
  93. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
  94. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
  95. snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
  96. snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
  97. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
  98. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
  99. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
  100. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
  101. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
  102. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
  103. snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
  104. snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
  105. snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
  106. snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
  107. snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
  108. snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
  109. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
  110. snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
  111. snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
  112. snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
  113. snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
  114. snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
  115. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
  116. snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
  117. snowflake/ml/modeling/framework/_utils.py +8 -1
  118. snowflake/ml/modeling/framework/base.py +72 -37
  119. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
  120. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
  121. snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
  122. snowflake/ml/modeling/impute/knn_imputer.py +248 -175
  123. snowflake/ml/modeling/impute/missing_indicator.py +248 -175
  124. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
  125. snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
  126. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
  127. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
  128. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
  129. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
  130. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
  131. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
  132. snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
  133. snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
  134. snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
  135. snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
  136. snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
  137. snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
  138. snowflake/ml/modeling/linear_model/lars.py +246 -175
  139. snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
  140. snowflake/ml/modeling/linear_model/lasso.py +246 -175
  141. snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
  142. snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
  143. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
  144. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
  145. snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
  146. snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
  147. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
  148. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
  149. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
  150. snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
  151. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
  152. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
  153. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
  154. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
  155. snowflake/ml/modeling/linear_model/perceptron.py +246 -175
  156. snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
  157. snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
  158. snowflake/ml/modeling/linear_model/ridge.py +246 -175
  159. snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
  160. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
  161. snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
  162. snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
  163. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
  164. snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
  165. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
  166. snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
  167. snowflake/ml/modeling/manifold/isomap.py +248 -175
  168. snowflake/ml/modeling/manifold/mds.py +248 -175
  169. snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
  170. snowflake/ml/modeling/manifold/tsne.py +248 -175
  171. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
  172. snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
  173. snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
  174. snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
  175. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
  176. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
  177. snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
  178. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
  179. snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
  180. snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
  181. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
  182. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
  183. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
  184. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
  185. snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
  186. snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
  187. snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
  188. snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
  189. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
  190. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
  191. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
  192. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
  193. snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
  194. snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
  195. snowflake/ml/modeling/pipeline/pipeline.py +517 -35
  196. snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
  197. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
  198. snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
  199. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
  200. snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
  201. snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
  202. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
  203. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
  204. snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
  205. snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
  206. snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
  207. snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
  208. snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
  209. snowflake/ml/modeling/svm/linear_svc.py +246 -175
  210. snowflake/ml/modeling/svm/linear_svr.py +246 -175
  211. snowflake/ml/modeling/svm/nu_svc.py +246 -175
  212. snowflake/ml/modeling/svm/nu_svr.py +246 -175
  213. snowflake/ml/modeling/svm/svc.py +246 -175
  214. snowflake/ml/modeling/svm/svr.py +246 -175
  215. snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
  216. snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
  217. snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
  218. snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
  219. snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
  220. snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
  221. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
  222. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
  223. snowflake/ml/registry/model_registry.py +3 -149
  224. snowflake/ml/registry/registry.py +1 -1
  225. snowflake/ml/version.py +1 -1
  226. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
  227. snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
  228. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
  229. snowflake/ml/registry/_artifact_manager.py +0 -156
  230. snowflake/ml/registry/artifact.py +0 -46
  231. snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
  232. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
  233. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
  234. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -8,13 +8,17 @@ import re
8
8
  import warnings
9
9
  from dataclasses import dataclass
10
10
  from enum import Enum
11
- from typing import Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast
11
+ from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast
12
12
 
13
+ import packaging.version as pkg_version
14
+ import snowflake.ml.version as snowml_version
13
15
  from pytimeparse.timeparse import timeparse
14
16
  from typing_extensions import Concatenate, ParamSpec
15
17
 
18
+ from snowflake.ml import dataset
16
19
  from snowflake.ml._internal import telemetry
17
20
  from snowflake.ml._internal.exceptions import (
21
+ dataset_errors,
18
22
  error_codes,
19
23
  exceptions as snowml_exceptions,
20
24
  )
@@ -23,12 +27,8 @@ from snowflake.ml._internal.utils.sql_identifier import (
23
27
  SqlIdentifier,
24
28
  to_sql_identifiers,
25
29
  )
26
- from snowflake.ml.dataset.dataset import Dataset, FeatureStoreMetadata
27
- from snowflake.ml.feature_store.entity import (
28
- _ENTITY_NAME_LENGTH_LIMIT,
29
- _FEATURE_VIEW_ENTITY_TAG_DELIMITER,
30
- Entity,
31
- )
30
+ from snowflake.ml.dataset.dataset_metadata import FeatureStoreMetadata
31
+ from snowflake.ml.feature_store.entity import _ENTITY_NAME_LENGTH_LIMIT, Entity
32
32
  from snowflake.ml.feature_store.feature_view import (
33
33
  _FEATURE_OBJ_TYPE,
34
34
  _FEATURE_VIEW_NAME_DELIMITER,
@@ -37,11 +37,17 @@ from snowflake.ml.feature_store.feature_view import (
37
37
  FeatureViewSlice,
38
38
  FeatureViewStatus,
39
39
  FeatureViewVersion,
40
+ _FeatureViewMetadata,
40
41
  )
41
42
  from snowflake.snowpark import DataFrame, Row, Session, functions as F
42
- from snowflake.snowpark._internal import type_utils, utils as snowpark_utils
43
43
  from snowflake.snowpark.exceptions import SnowparkSQLException
44
- from snowflake.snowpark.types import StructField
44
+ from snowflake.snowpark.types import (
45
+ ArrayType,
46
+ StringType,
47
+ StructField,
48
+ StructType,
49
+ TimestampType,
50
+ )
45
51
 
46
52
  _Args = ParamSpec("_Args")
47
53
  _RT = TypeVar("_RT")
@@ -49,29 +55,80 @@ _RT = TypeVar("_RT")
49
55
  logger = logging.getLogger(__name__)
50
56
 
51
57
  _ENTITY_TAG_PREFIX = "SNOWML_FEATURE_STORE_ENTITY_"
52
- _FEATURE_VIEW_ENTITY_TAG = "SNOWML_FEATURE_STORE_FV_ENTITIES"
53
- _FEATURE_VIEW_TS_COL_TAG = "SNOWML_FEATURE_STORE_FV_TS_COL"
54
58
  _FEATURE_STORE_OBJECT_TAG = "SNOWML_FEATURE_STORE_OBJECT"
59
+ _FEATURE_VIEW_METADATA_TAG = "SNOWML_FEATURE_VIEW_METADATA"
60
+
61
+
62
+ @dataclass(frozen=True)
63
+ class _FeatureStoreObjInfo:
64
+ type: _FeatureStoreObjTypes
65
+ pkg_version: str
66
+
67
+ def to_json(self) -> str:
68
+ state_dict = self.__dict__.copy()
69
+ state_dict["type"] = state_dict["type"].value
70
+ return json.dumps(state_dict)
71
+
72
+ @classmethod
73
+ def from_json(cls, json_str: str) -> _FeatureStoreObjInfo:
74
+ json_dict = json.loads(json_str)
75
+ # since we may introduce new fields in the json blob in the future,
76
+ # in order to guarantee compatibility, we need to select ones that can be
77
+ # decoded in the current version
78
+ state_dict = {}
79
+ state_dict["type"] = _FeatureStoreObjTypes.parse(json_dict["type"])
80
+ state_dict["pkg_version"] = json_dict["pkg_version"]
81
+ return cls(**state_dict) # type: ignore[arg-type]
82
+
83
+
84
+ # TODO: remove "" after dataset is updated
85
+ class _FeatureStoreObjTypes(Enum):
86
+ UNKNOWN = "UNKNOWN" # for forward compatibility
87
+ MANAGED_FEATURE_VIEW = "MANAGED_FEATURE_VIEW"
88
+ EXTERNAL_FEATURE_VIEW = "EXTERNAL_FEATURE_VIEW"
89
+ FEATURE_VIEW_REFRESH_TASK = "FEATURE_VIEW_REFRESH_TASK"
90
+ TRAINING_DATA = ""
91
+
92
+ @classmethod
93
+ def parse(cls, val: str) -> _FeatureStoreObjTypes:
94
+ try:
95
+ return cls(val)
96
+ except ValueError:
97
+ return cls.UNKNOWN
98
+
99
+
55
100
  _PROJECT = "FeatureStore"
56
101
  _DT_OR_VIEW_QUERY_PATTERN = re.compile(
57
102
  r"""CREATE\ (OR\ REPLACE\ )?(?P<obj_type>(DYNAMIC\ TABLE|VIEW))\ .*
58
103
  COMMENT\ =\ '(?P<comment>.*)'\s*
59
- TAG.*?{entity_tag}\ =\ '(?P<entities>.*?)',\n
60
- .*?{ts_col_tag}\ =\ '(?P<ts_col>.*?)',?.*?
104
+ TAG.*?{fv_metadata_tag}\ =\ '(?P<fv_metadata>.*?)',?.*?
61
105
  AS\ (?P<query>.*)
62
106
  """.format(
63
- entity_tag=_FEATURE_VIEW_ENTITY_TAG, ts_col_tag=_FEATURE_VIEW_TS_COL_TAG
107
+ fv_metadata_tag=_FEATURE_VIEW_METADATA_TAG,
64
108
  ),
65
109
  flags=re.DOTALL | re.IGNORECASE | re.X,
66
110
  )
67
111
 
112
+ _LIST_FEATURE_VIEW_SCHEMA = StructType(
113
+ [
114
+ StructField("name", StringType()),
115
+ StructField("version", StringType()),
116
+ StructField("database_name", StringType()),
117
+ StructField("schema_name", StringType()),
118
+ StructField("created_on", TimestampType()),
119
+ StructField("owner", StringType()),
120
+ StructField("desc", StringType()),
121
+ StructField("entities", ArrayType(StringType())),
122
+ ]
123
+ )
124
+
68
125
 
69
126
  class CreationMode(Enum):
70
127
  FAIL_IF_NOT_EXIST = 1
71
128
  CREATE_IF_NOT_EXIST = 2
72
129
 
73
130
 
74
- @dataclass
131
+ @dataclass(frozen=True)
75
132
  class _FeatureStoreConfig:
76
133
  database: SqlIdentifier
77
134
  schema: SqlIdentifier
@@ -102,14 +159,14 @@ def switch_warehouse(
102
159
  return wrapper
103
160
 
104
161
 
105
- def dispatch_decorator(
106
- prpr_version: str,
107
- ) -> Callable[[Callable[Concatenate[FeatureStore, _Args], _RT]], Callable[Concatenate[FeatureStore, _Args], _RT],]:
162
+ def dispatch_decorator() -> Callable[
163
+ [Callable[Concatenate[FeatureStore, _Args], _RT]],
164
+ Callable[Concatenate[FeatureStore, _Args], _RT],
165
+ ]:
108
166
  def decorator(
109
167
  f: Callable[Concatenate[FeatureStore, _Args], _RT]
110
168
  ) -> Callable[Concatenate[FeatureStore, _Args], _RT]:
111
169
  @telemetry.send_api_usage_telemetry(project=_PROJECT)
112
- @snowpark_utils.private_preview(version=prpr_version)
113
170
  @switch_warehouse
114
171
  @functools.wraps(f)
115
172
  def wrap(self: FeatureStore, /, *args: _Args.args, **kargs: _Args.kwargs) -> _RT:
@@ -126,7 +183,6 @@ class FeatureStore:
126
183
  """
127
184
 
128
185
  @telemetry.send_api_usage_telemetry(project=_PROJECT)
129
- @snowpark_utils.private_preview(version="1.0.8")
130
186
  def __init__(
131
187
  self,
132
188
  session: Session,
@@ -143,14 +199,17 @@ class FeatureStore:
143
199
  database: Database to create the FeatureStore instance.
144
200
  name: Target FeatureStore name, maps to a schema in the database.
145
201
  default_warehouse: Default warehouse for feature store compute.
146
- creation_mode: Create new backend or fail if not exist upon feature store creation.
202
+ creation_mode: If FAIL_IF_NOT_EXIST, feature store throws when required resources not already exist; If
203
+ CREATE_IF_NOT_EXIST, feature store will create required resources if they not already exist. Required
204
+ resources include schema and tags. Note database must already exist in either mode.
147
205
 
148
206
  Raises:
149
207
  SnowflakeMLException: [ValueError] default_warehouse does not exist.
150
- SnowflakeMLException: [ValueError] FAIL_IF_NOT_EXIST is set and feature store not exists.
208
+ SnowflakeMLException: [ValueError] Required resources not exist when mode is FAIL_IF_NOT_EXIST.
151
209
  SnowflakeMLException: [RuntimeError] Failed to find resources.
152
210
  SnowflakeMLException: [RuntimeError] Failed to create feature store.
153
211
  """
212
+
154
213
  database = SqlIdentifier(database)
155
214
  name = SqlIdentifier(name)
156
215
 
@@ -166,7 +225,7 @@ class FeatureStore:
166
225
  # search space used in query "SHOW <object_TYPE> LIKE <object_name> IN <search_space>"
167
226
  # object domain used in query "TAG_REFERENCE(<object_name>, <object_domain>)"
168
227
  self._obj_search_spaces = {
169
- "TABLES": (self._config.full_schema_path, "TABLE"),
228
+ "DATASETS": (self._config.full_schema_path, "DATASET"),
170
229
  "DYNAMIC TABLES": (self._config.full_schema_path, "TABLE"),
171
230
  "VIEWS": (self._config.full_schema_path, "TABLE"),
172
231
  "SCHEMAS": (f"DATABASE {self._config.database}", "SCHEMA"),
@@ -177,31 +236,27 @@ class FeatureStore:
177
236
 
178
237
  self.update_default_warehouse(default_warehouse)
179
238
 
239
+ self._check_database_exists_or_throw()
180
240
  if creation_mode == CreationMode.FAIL_IF_NOT_EXIST:
181
- schema_result = self._find_object("SCHEMAS", self._config.schema)
182
- if len(schema_result) == 0:
183
- raise snowml_exceptions.SnowflakeMLException(
184
- error_code=error_codes.NOT_FOUND,
185
- original_exception=ValueError(f"Feature store {name} does not exist."),
186
- )
241
+ self._check_internal_objects_exist_or_throw()
242
+
187
243
  else:
188
244
  try:
189
- self._session.sql(f"CREATE DATABASE IF NOT EXISTS {self._config.database}").collect(
190
- statement_params=self._telemetry_stmp
191
- )
192
245
  self._session.sql(f"CREATE SCHEMA IF NOT EXISTS {self._config.full_schema_path}").collect(
193
246
  statement_params=self._telemetry_stmp
194
247
  )
195
248
  for tag in to_sql_identifiers(
196
249
  [
197
- _FEATURE_VIEW_ENTITY_TAG,
198
- _FEATURE_VIEW_TS_COL_TAG,
199
- _FEATURE_STORE_OBJECT_TAG,
250
+ _FEATURE_VIEW_METADATA_TAG,
200
251
  ]
201
252
  ):
202
253
  self._session.sql(f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(tag)}").collect(
203
254
  statement_params=self._telemetry_stmp
204
255
  )
256
+
257
+ self._session.sql(
258
+ f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}"
259
+ ).collect(statement_params=self._telemetry_stmp)
205
260
  except Exception as e:
206
261
  self.clear()
207
262
  raise snowml_exceptions.SnowflakeMLException(
@@ -209,10 +264,12 @@ class FeatureStore:
209
264
  original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
210
265
  )
211
266
 
267
+ # TODO: remove this after tag_ref_internal rollout
268
+ self._use_optimized_tag_ref = self._tag_ref_internal_enabled()
269
+ self._check_feature_store_object_versions()
212
270
  logger.info(f"Successfully connected to feature store: {self._config.full_schema_path}.")
213
271
 
214
272
  @telemetry.send_api_usage_telemetry(project=_PROJECT)
215
- @snowpark_utils.private_preview(version="1.0.12")
216
273
  def update_default_warehouse(self, warehouse_name: str) -> None:
217
274
  """Update default warehouse for feature store.
218
275
 
@@ -232,7 +289,7 @@ class FeatureStore:
232
289
 
233
290
  self._default_warehouse = warehouse
234
291
 
235
- @dispatch_decorator(prpr_version="1.0.8")
292
+ @dispatch_decorator()
236
293
  def register_entity(self, entity: Entity) -> Entity:
237
294
  """
238
295
  Register Entity in the FeatureStore.
@@ -244,26 +301,27 @@ class FeatureStore:
244
301
  A registered entity object.
245
302
 
246
303
  Raises:
247
- SnowflakeMLException: [ValueError] Entity with same name is already registered.
248
304
  SnowflakeMLException: [RuntimeError] Failed to find resources.
249
305
  """
306
+
250
307
  tag_name = self._get_entity_name(entity.name)
251
308
  found_rows = self._find_object("TAGS", tag_name)
252
309
  if len(found_rows) > 0:
253
- raise snowml_exceptions.SnowflakeMLException(
254
- error_code=error_codes.OBJECT_ALREADY_EXISTS,
255
- original_exception=ValueError(f"Entity {entity.name} already exists."),
256
- suppress_source_trace=True,
310
+ warnings.warn(
311
+ f"Entity {entity.name} already exists. Skip registration.",
312
+ stacklevel=2,
313
+ category=UserWarning,
257
314
  )
315
+ return entity
258
316
 
259
317
  # allowed_values will add double-quotes around each value, thus use resolved str here.
260
- join_keys = [f"'{key.resolved()}'" for key in entity.join_keys]
318
+ join_keys = [f"{key.resolved()}" for key in entity.join_keys]
261
319
  join_keys_str = ",".join(join_keys)
262
320
  full_tag_name = self._get_fully_qualified_name(tag_name)
263
321
  try:
264
322
  self._session.sql(
265
323
  f"""CREATE TAG IF NOT EXISTS {full_tag_name}
266
- ALLOWED_VALUES {join_keys_str}
324
+ ALLOWED_VALUES '{join_keys_str}'
267
325
  COMMENT = '{entity.desc}'
268
326
  """
269
327
  ).collect(statement_params=self._telemetry_stmp)
@@ -278,13 +336,13 @@ class FeatureStore:
278
336
  return self.get_entity(entity.name)
279
337
 
280
338
  # TODO: add support to update column desc once SNOW-894249 is fixed
281
- @dispatch_decorator(prpr_version="1.0.8")
339
+ @dispatch_decorator()
282
340
  def register_feature_view(
283
341
  self,
284
342
  feature_view: FeatureView,
285
343
  version: str,
286
344
  block: bool = True,
287
- override: bool = False,
345
+ overwrite: bool = False,
288
346
  ) -> FeatureView:
289
347
  """
290
348
  Materialize a FeatureView to Snowflake backend.
@@ -304,16 +362,14 @@ class FeatureStore:
304
362
  NOTE: Version only accepts letters, numbers and underscore. Also version will be capitalized.
305
363
  block: Specify whether the FeatureView backend materialization should be blocking or not. If blocking then
306
364
  the API will wait until the initial FeatureView data is generated. Default to true.
307
- override: Override the existing FeatureView with same version. This is the same as dropping the FeatureView
308
- first then recreate. NOTE: there will be backfill cost associated if the FeatureView is being
309
- continuously maintained.
365
+ overwrite: Overwrite the existing FeatureView with same version. This is the same as dropping the
366
+ FeatureView first then recreate. NOTE: there will be backfill cost associated if the FeatureView is
367
+ being continuously maintained.
310
368
 
311
369
  Returns:
312
370
  A materialized FeatureView object.
313
371
 
314
372
  Raises:
315
- SnowflakeMLException: [ValueError] FeatureView is already registered, or duplicate name and version
316
- are detected.
317
373
  SnowflakeMLException: [ValueError] FeatureView entity has not been registered.
318
374
  SnowflakeMLException: [ValueError] Warehouse or default warehouse is not specified.
319
375
  SnowflakeMLException: [RuntimeError] Failed to create dynamic table, task, or view.
@@ -322,15 +378,17 @@ class FeatureStore:
322
378
  version = FeatureViewVersion(version)
323
379
 
324
380
  if feature_view.status != FeatureViewStatus.DRAFT:
325
- warnings.warn(
326
- f"FeatureView {feature_view.name}/{feature_view.version} has already been registered. "
327
- + "Skipping registration.",
328
- stacklevel=2,
329
- category=UserWarning,
330
- )
331
- return feature_view
381
+ try:
382
+ return self._get_feature_view_if_exists(feature_view.name, str(version))
383
+ except Exception:
384
+ raise snowml_exceptions.SnowflakeMLException(
385
+ error_code=error_codes.NOT_FOUND,
386
+ original_exception=ValueError(
387
+ f"FeatureView {feature_view.name}/{feature_view.version} status is {feature_view.status}, "
388
+ + "but it doesn't exist."
389
+ ),
390
+ )
332
391
 
333
- # TODO: ideally we should move this to FeatureView creation time
334
392
  for e in feature_view.entities:
335
393
  if not self._validate_entity_exists(e.name):
336
394
  raise snowml_exceptions.SnowflakeMLException(
@@ -339,23 +397,30 @@ class FeatureStore:
339
397
  )
340
398
 
341
399
  feature_view_name = FeatureView._get_physical_name(feature_view.name, version)
342
- if not override:
343
- dynamic_table_results = self._find_object("DYNAMIC TABLES", feature_view_name)
344
- view_results = self._find_object("VIEWS", feature_view_name)
345
- if len(dynamic_table_results) > 0 or len(view_results) > 0:
346
- raise snowml_exceptions.SnowflakeMLException(
347
- error_code=error_codes.OBJECT_ALREADY_EXISTS,
348
- original_exception=ValueError(f"FeatureView {feature_view.name}/{version} already exists."),
349
- suppress_source_trace=True,
350
- )
400
+ if not overwrite:
401
+ try:
402
+ return self._get_feature_view_if_exists(feature_view.name, str(version))
403
+ except Exception:
404
+ pass
351
405
 
352
406
  fully_qualified_name = self._get_fully_qualified_name(feature_view_name)
353
- entities = _FEATURE_VIEW_ENTITY_TAG_DELIMITER.join([e.name for e in feature_view.entities])
354
- timestamp_col = (
355
- feature_view.timestamp_col
356
- if feature_view.timestamp_col is not None
357
- else SqlIdentifier(_TIMESTAMP_COL_PLACEHOLDER)
358
- )
407
+ refresh_freq = feature_view.refresh_freq
408
+
409
+ if refresh_freq is not None:
410
+ obj_info = _FeatureStoreObjInfo(_FeatureStoreObjTypes.MANAGED_FEATURE_VIEW, snowml_version.VERSION)
411
+ else:
412
+ obj_info = _FeatureStoreObjInfo(_FeatureStoreObjTypes.EXTERNAL_FEATURE_VIEW, snowml_version.VERSION)
413
+
414
+ tagging_clause = [
415
+ f"{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = '{obj_info.to_json()}'",
416
+ f"{self._get_fully_qualified_name(_FEATURE_VIEW_METADATA_TAG)} = '{feature_view._metadata().to_json()}'",
417
+ ]
418
+ for e in feature_view.entities:
419
+ join_keys = [f"{key.resolved()}" for key in e.join_keys]
420
+ tagging_clause.append(
421
+ f"{self._get_fully_qualified_name(self._get_entity_name(e.name))} = '{','.join(join_keys)}'"
422
+ )
423
+ tagging_clause_str = ",\n".join(tagging_clause)
359
424
 
360
425
  def create_col_desc(col: StructField) -> str:
361
426
  desc = feature_view.feature_descs.get(SqlIdentifier(col.name), None)
@@ -363,7 +428,6 @@ class FeatureStore:
363
428
  return f"{col.name} {desc}"
364
429
 
365
430
  column_descs = ", ".join([f"{create_col_desc(col)}" for col in feature_view.output_schema.fields])
366
- refresh_freq = feature_view.refresh_freq
367
431
 
368
432
  if refresh_freq is not None:
369
433
  schedule_task = refresh_freq != "DOWNSTREAM" and timeparse(refresh_freq) is None
@@ -372,22 +436,19 @@ class FeatureStore:
372
436
  feature_view,
373
437
  fully_qualified_name,
374
438
  column_descs,
375
- entities,
439
+ tagging_clause_str,
376
440
  schedule_task,
377
441
  self._default_warehouse,
378
- timestamp_col,
379
442
  block,
380
- override,
443
+ overwrite,
381
444
  )
382
445
  else:
383
446
  try:
384
- override_clause = " OR REPLACE" if override else ""
385
- query = f"""CREATE{override_clause} VIEW {fully_qualified_name} ({column_descs})
447
+ overwrite_clause = " OR REPLACE" if overwrite else ""
448
+ query = f"""CREATE{overwrite_clause} VIEW {fully_qualified_name} ({column_descs})
386
449
  COMMENT = '{feature_view.desc}'
387
450
  TAG (
388
- {_FEATURE_VIEW_ENTITY_TAG} = '{entities}',
389
- {_FEATURE_VIEW_TS_COL_TAG} = '{timestamp_col}',
390
- {_FEATURE_STORE_OBJECT_TAG} = ''
451
+ {tagging_clause_str}
391
452
  )
392
453
  AS {feature_view.query}
393
454
  """
@@ -398,47 +459,57 @@ class FeatureStore:
398
459
  original_exception=RuntimeError(f"Create view {fully_qualified_name} [\n{query}\n] failed: {e}"),
399
460
  ) from e
400
461
 
401
- logger.info(f"Registered FeatureView {feature_view.name}/{version}.")
462
+ logger.info(f"Registered FeatureView {feature_view.name}/{version} successfully.")
402
463
  return self.get_feature_view(feature_view.name, str(version))
403
464
 
404
- @dispatch_decorator(prpr_version="1.1.0")
405
- def update_feature_view(self, feature_view: FeatureView) -> None:
465
+ @dispatch_decorator()
466
+ def update_feature_view(
467
+ self, name: str, version: str, refresh_freq: Optional[str] = None, warehouse: Optional[str] = None
468
+ ) -> FeatureView:
406
469
  """Update a registered feature view.
407
470
  Check feature_view.py for which fields are allowed to be updated after registration.
408
471
 
409
472
  Args:
410
- feature_view: The feature view to be updated.
473
+ name: name of the FeatureView to be updated.
474
+ version: version of the FeatureView to be updated.
475
+ refresh_freq: updated refresh frequency.
476
+ warehouse: updated warehouse.
477
+
478
+ Returns:
479
+ Updated FeatureView.
411
480
 
412
481
  Raises:
413
- SnowflakeMLException: [RuntimeError] Feature view must be registered before updating.
482
+ SnowflakeMLException: [RuntimeError] If FeatureView is not managed and refresh_freq is defined.
414
483
  SnowflakeMLException: [RuntimeError] Failed to update feature view.
415
484
  """
416
- if feature_view.status == FeatureViewStatus.DRAFT or feature_view.status == FeatureViewStatus.STATIC:
485
+ feature_view = self.get_feature_view(name=name, version=version)
486
+ if refresh_freq is not None and feature_view.status == FeatureViewStatus.STATIC:
417
487
  full_name = f"{feature_view.name}/{feature_view.version}"
418
488
  raise snowml_exceptions.SnowflakeMLException(
419
489
  error_code=error_codes.INVALID_ARGUMENT,
420
- original_exception=RuntimeError(
421
- f"Feature view {full_name} must be registered and non-static so that can be updated."
422
- ),
490
+ original_exception=RuntimeError(f"Feature view {full_name} must be non-static so that can be updated."),
423
491
  )
424
492
 
425
- if feature_view.refresh_freq is not None:
426
- try:
427
- self._session.sql(
428
- f"""ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
429
- TARGET_LAG = '{feature_view.refresh_freq}'
430
- WAREHOUSE = {feature_view.warehouse}
431
- """
432
- ).collect()
433
- except Exception as e:
434
- raise snowml_exceptions.SnowflakeMLException(
435
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
436
- original_exception=RuntimeError(
437
- f"Update feature view {feature_view.name}/{feature_view.version} failed: {e}"
438
- ),
439
- ) from e
493
+ warehouse = SqlIdentifier(warehouse) if warehouse else feature_view.warehouse
494
+
495
+ # TODO(@wezhou): we need to properly handle cron expr
496
+ try:
497
+ self._session.sql(
498
+ f"""ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
499
+ TARGET_LAG = '{refresh_freq or feature_view.refresh_freq}'
500
+ WAREHOUSE = {warehouse}
501
+ """
502
+ ).collect(statement_params=self._telemetry_stmp)
503
+ except Exception as e:
504
+ raise snowml_exceptions.SnowflakeMLException(
505
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
506
+ original_exception=RuntimeError(
507
+ f"Update feature view {feature_view.name}/{feature_view.version} failed: {e}"
508
+ ),
509
+ ) from e
510
+ return self.get_feature_view(name=name, version=version)
440
511
 
441
- @dispatch_decorator(prpr_version="1.0.8")
512
+ @dispatch_decorator()
442
513
  def read_feature_view(self, feature_view: FeatureView) -> DataFrame:
443
514
  """
444
515
  Read FeatureView data.
@@ -460,13 +531,12 @@ class FeatureStore:
460
531
 
461
532
  return self._session.sql(f"SELECT * FROM {feature_view.fully_qualified_name()}")
462
533
 
463
- @dispatch_decorator(prpr_version="1.0.8")
534
+ @dispatch_decorator()
464
535
  def list_feature_views(
465
536
  self,
466
537
  entity_name: Optional[str] = None,
467
538
  feature_view_name: Optional[str] = None,
468
- as_dataframe: bool = True,
469
- ) -> Union[Optional[DataFrame], List[FeatureView]]:
539
+ ) -> DataFrame:
470
540
  """
471
541
  List FeatureViews in the FeatureStore.
472
542
  If entity_name is specified, FeatureViews associated with that Entity will be listed.
@@ -475,34 +545,26 @@ class FeatureStore:
475
545
  Args:
476
546
  entity_name: Entity name.
477
547
  feature_view_name: FeatureView name.
478
- as_dataframe: whether the return type should be a DataFrame.
479
548
 
480
549
  Returns:
481
- List of FeatureViews or in a DataFrame representation.
550
+ FeatureViews information as a Snowpark DataFrame.
482
551
  """
483
- if entity_name is not None:
484
- entity_name = SqlIdentifier(entity_name)
485
552
  if feature_view_name is not None:
486
553
  feature_view_name = SqlIdentifier(feature_view_name)
487
554
 
488
555
  if entity_name is not None:
489
- fvs = self._find_feature_views(entity_name, feature_view_name)
556
+ entity_name = SqlIdentifier(entity_name)
557
+ if self._use_optimized_tag_ref:
558
+ return self._optimized_find_feature_views(entity_name, feature_view_name)
559
+ else:
560
+ return self._find_feature_views(entity_name, feature_view_name)
490
561
  else:
491
- fvs = []
492
- entities = self.list_entities().collect()
562
+ output_values: List[List[Any]] = []
493
563
  for row in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
494
- fvs.append(self._compose_feature_view(row, entities))
495
-
496
- if as_dataframe:
497
- result = None
498
- for fv in fvs:
499
- fv_df = fv.to_df(self._session)
500
- result = fv_df if result is None else result.union(fv_df) # type: ignore[attr-defined]
501
- return result
502
- else:
503
- return fvs
564
+ self._extract_feature_view_info(row, output_values)
565
+ return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
504
566
 
505
- @dispatch_decorator(prpr_version="1.0.8")
567
+ @dispatch_decorator()
506
568
  def get_feature_view(self, name: str, version: str) -> FeatureView:
507
569
  """
508
570
  Retrieve previously registered FeatureView.
@@ -531,7 +593,7 @@ class FeatureStore:
531
593
 
532
594
  return self._compose_feature_view(results[0], self.list_entities().collect())
533
595
 
534
- @dispatch_decorator(prpr_version="1.0.8")
596
+ @dispatch_decorator()
535
597
  def resume_feature_view(self, feature_view: FeatureView) -> FeatureView:
536
598
  """
537
599
  Resume a previously suspended FeatureView.
@@ -544,7 +606,7 @@ class FeatureStore:
544
606
  """
545
607
  return self._update_feature_view_status(feature_view, "RESUME")
546
608
 
547
- @dispatch_decorator(prpr_version="1.0.8")
609
+ @dispatch_decorator()
548
610
  def suspend_feature_view(self, feature_view: FeatureView) -> FeatureView:
549
611
  """
550
612
  Suspend an active FeatureView.
@@ -557,7 +619,7 @@ class FeatureStore:
557
619
  """
558
620
  return self._update_feature_view_status(feature_view, "SUSPEND")
559
621
 
560
- @dispatch_decorator(prpr_version="1.0.8")
622
+ @dispatch_decorator()
561
623
  def delete_feature_view(self, feature_view: FeatureView) -> None:
562
624
  """
563
625
  Delete a FeatureView.
@@ -568,6 +630,8 @@ class FeatureStore:
568
630
  Raises:
569
631
  SnowflakeMLException: [ValueError] FeatureView is not registered.
570
632
  """
633
+ # TODO: we should leverage lineage graph to check downstream deps, and block the deletion
634
+ # if there're other FVs depending on this
571
635
  if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
572
636
  raise snowml_exceptions.SnowflakeMLException(
573
637
  error_code=error_codes.NOT_FOUND,
@@ -590,7 +654,7 @@ class FeatureStore:
590
654
 
591
655
  logger.info(f"Deleted FeatureView {feature_view.name}/{feature_view.version}.")
592
656
 
593
- @dispatch_decorator(prpr_version="1.0.8")
657
+ @dispatch_decorator()
594
658
  def list_entities(self) -> DataFrame:
595
659
  """
596
660
  List all Entities in the FeatureStore.
@@ -611,7 +675,7 @@ class FeatureStore:
611
675
  ),
612
676
  )
613
677
 
614
- @dispatch_decorator(prpr_version="1.0.8")
678
+ @dispatch_decorator()
615
679
  def get_entity(self, name: str) -> Entity:
616
680
  """
617
681
  Retrieve previously registered Entity object.
@@ -641,8 +705,7 @@ class FeatureStore:
641
705
  original_exception=ValueError(f"Cannot find Entity with name: {name}."),
642
706
  )
643
707
 
644
- raw_join_keys = result[0]["JOIN_KEYS"]
645
- join_keys = raw_join_keys.strip("[]").split(",")
708
+ join_keys = self._recompose_join_keys(result[0]["JOIN_KEYS"])
646
709
 
647
710
  return Entity._construct_entity(
648
711
  name=SqlIdentifier(result[0]["NAME"], case_sensitive=True).identifier(),
@@ -651,7 +714,7 @@ class FeatureStore:
651
714
  owner=result[0]["OWNER"],
652
715
  )
653
716
 
654
- @dispatch_decorator(prpr_version="1.0.8")
717
+ @dispatch_decorator()
655
718
  def delete_entity(self, name: str) -> None:
656
719
  """
657
720
  Delete a previously registered Entity.
@@ -672,13 +735,13 @@ class FeatureStore:
672
735
  original_exception=ValueError(f"Entity {name} does not exist."),
673
736
  )
674
737
 
675
- active_feature_views = cast(List[FeatureView], self.list_feature_views(entity_name=name, as_dataframe=False))
738
+ active_feature_views = self.list_feature_views(entity_name=name).collect(statement_params=self._telemetry_stmp)
739
+
676
740
  if len(active_feature_views) > 0:
741
+ active_fvs = [r["NAME"] for r in active_feature_views]
677
742
  raise snowml_exceptions.SnowflakeMLException(
678
743
  error_code=error_codes.SNOWML_DELETE_FAILED,
679
- original_exception=ValueError(
680
- f"Cannot delete Entity {name} due to active FeatureViews: {[f.name for f in active_feature_views]}."
681
- ),
744
+ original_exception=ValueError(f"Cannot delete Entity {name} due to active FeatureViews: {active_fvs}."),
682
745
  )
683
746
 
684
747
  tag_name = self._get_fully_qualified_name(self._get_entity_name(name))
@@ -691,7 +754,7 @@ class FeatureStore:
691
754
  ) from e
692
755
  logger.info(f"Deleted Entity {name}.")
693
756
 
694
- @dispatch_decorator(prpr_version="1.0.8")
757
+ @dispatch_decorator()
695
758
  def retrieve_feature_values(
696
759
  self,
697
760
  spine_df: DataFrame,
@@ -739,39 +802,35 @@ class FeatureStore:
739
802
 
740
803
  return df
741
804
 
742
- @dispatch_decorator(prpr_version="1.0.8")
805
+ @dispatch_decorator()
743
806
  def generate_dataset(
744
807
  self,
808
+ name: str,
745
809
  spine_df: DataFrame,
746
810
  features: List[Union[FeatureView, FeatureViewSlice]],
747
- materialized_table: Optional[str] = None,
811
+ version: Optional[str] = None,
748
812
  spine_timestamp_col: Optional[str] = None,
749
813
  spine_label_cols: Optional[List[str]] = None,
750
814
  exclude_columns: Optional[List[str]] = None,
751
- save_mode: str = "errorifexists",
752
815
  include_feature_view_timestamp_col: bool = False,
753
816
  desc: str = "",
754
- ) -> Dataset:
817
+ ) -> dataset.Dataset:
755
818
  """
756
819
  Generate dataset by given source table and feature views.
757
820
 
758
821
  Args:
822
+ name: The name of the Dataset to be generated. Datasets are uniquely identified within a schema
823
+ by their name and version.
759
824
  spine_df: The fact table contains the raw dataset.
760
825
  features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
761
- materialized_table: The destination table where produced result will be stored. If it's none, then result
762
- won't be registered. If materialized_table is provided, then produced result will be written into
763
- the provided table. Note result dataset will be a snowflake clone of registered table.
764
- New data can append on same registered table and previously generated dataset won't be affected.
765
- Default result table name will be a concatenation of materialized_table name and current timestamp.
826
+ version: The version of the Dataset to be generated. If none specified, the current timestamp
827
+ will be used instead.
766
828
  spine_timestamp_col: Name of timestamp column in spine_df that will be used to join
767
829
  time-series features. If spine_timestamp_col is not none, the input features also must have
768
830
  timestamp_col.
769
831
  spine_label_cols: Name of column(s) in spine_df that contains labels.
770
832
  exclude_columns: Column names to exclude from the result dataframe.
771
833
  The underlying storage will still contain the columns.
772
- save_mode: How new data is saved. currently support:
773
- errorifexists: Raise error if registered table already exists.
774
- merge: Merge new data if registered table already exists.
775
834
  include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
776
835
  (if feature view has timestamp column) if set true. Default to false.
777
836
  desc: A description about this dataset.
@@ -780,10 +839,8 @@ class FeatureStore:
780
839
  A Dataset object.
781
840
 
782
841
  Raises:
783
- SnowflakeMLException: [ValueError] save_mode is invalid.
784
842
  SnowflakeMLException: [ValueError] spine_df contains more than one query.
785
- SnowflakeMLException: [ValueError] Materialized_table contains invalid char `.`.
786
- SnowflakeMLException: [ValueError] Materialized_table already exists with save_mode `errorifexists`.
843
+ SnowflakeMLException: [ValueError] Dataset name/version already exists
787
844
  SnowflakeMLException: [ValueError] Snapshot creation failed.
788
845
  SnowflakeMLException: [RuntimeError] Failed to create clone from table.
789
846
  SnowflakeMLException: [RuntimeError] Failed to find resources.
@@ -793,15 +850,6 @@ class FeatureStore:
793
850
  if spine_label_cols is not None:
794
851
  spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
795
852
 
796
- allowed_save_mode = {"errorifexists", "merge"}
797
- if save_mode.lower() not in allowed_save_mode:
798
- raise snowml_exceptions.SnowflakeMLException(
799
- error_code=error_codes.INVALID_ARGUMENT,
800
- original_exception=ValueError(
801
- f"'{save_mode}' is not supported. Current supported save modes: {','.join(allowed_save_mode)}"
802
- ),
803
- )
804
-
805
853
  if len(spine_df.queries["queries"]) != 1:
806
854
  raise snowml_exceptions.SnowflakeMLException(
807
855
  error_code=error_codes.INVALID_ARGUMENT,
@@ -814,70 +862,55 @@ class FeatureStore:
814
862
  spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
815
863
  )
816
864
 
817
- snapshot_table = None
818
- if materialized_table is not None:
819
- if "." in materialized_table:
820
- raise snowml_exceptions.SnowflakeMLException(
821
- error_code=error_codes.INVALID_ARGUMENT,
822
- original_exception=ValueError(f"materialized_table {materialized_table} contains invalid char `.`"),
823
- )
824
-
825
- # TODO (wezhou) change materialized_table to SqlIdentifier
826
- found_rows = self._find_object("TABLES", SqlIdentifier(materialized_table))
827
- if save_mode.lower() == "errorifexists" and len(found_rows) > 0:
828
- raise snowml_exceptions.SnowflakeMLException(
829
- error_code=error_codes.OBJECT_ALREADY_EXISTS,
830
- original_exception=ValueError(f"Dataset table {materialized_table} already exists."),
831
- )
832
-
833
- self._dump_dataset(result_df, materialized_table, join_keys, spine_timestamp_col)
834
-
835
- snapshot_table = f"{materialized_table}_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}"
836
- snapshot_table = self._get_fully_qualified_name(snapshot_table)
837
- materialized_table = self._get_fully_qualified_name(materialized_table)
838
-
839
- try:
840
- self._session.sql(f"CREATE TABLE {snapshot_table} CLONE {materialized_table}").collect(
841
- statement_params=self._telemetry_stmp
842
- )
843
- except Exception as e:
844
- raise snowml_exceptions.SnowflakeMLException(
845
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
846
- original_exception=RuntimeError(
847
- f"Failed to create clone {materialized_table} from table {snapshot_table}: {e}."
848
- ),
849
- ) from e
850
-
851
- result_df = self._session.sql(f"SELECT * FROM {snapshot_table}")
865
+ # Convert name to fully qualified name if not already fully qualified
866
+ db_name, schema_name, object_name, _ = identifier.parse_schema_level_object_identifier(name)
867
+ name = "{}.{}.{}".format(
868
+ db_name or self._config.database,
869
+ schema_name or self._config.schema,
870
+ object_name,
871
+ )
872
+ version = version or datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
852
873
 
853
874
  if exclude_columns is not None:
854
875
  result_df = self._exclude_columns(result_df, exclude_columns)
855
876
 
856
877
  fs_meta = FeatureStoreMetadata(
857
878
  spine_query=spine_df.queries["queries"][0],
858
- connection_params=vars(self._config),
859
- features=[fv.to_json() for fv in features],
879
+ serialized_feature_views=[fv.to_json() for fv in features],
880
+ spine_timestamp_col=spine_timestamp_col,
860
881
  )
861
882
 
862
- dataset = Dataset(
863
- self._session,
864
- df=result_df,
865
- materialized_table=materialized_table,
866
- snapshot_table=snapshot_table,
867
- timestamp_col=spine_timestamp_col,
868
- label_cols=spine_label_cols,
869
- feature_store_metadata=fs_meta,
870
- desc=desc,
871
- )
872
- return dataset
883
+ try:
884
+ ds: dataset.Dataset = dataset.create_from_dataframe(
885
+ self._session,
886
+ name,
887
+ version,
888
+ input_dataframe=result_df,
889
+ exclude_cols=[spine_timestamp_col],
890
+ label_cols=spine_label_cols,
891
+ properties=fs_meta,
892
+ comment=desc,
893
+ )
894
+ return ds
873
895
 
874
- @dispatch_decorator(prpr_version="1.0.8")
875
- def load_feature_views_from_dataset(self, dataset: Dataset) -> List[Union[FeatureView, FeatureViewSlice]]:
896
+ except dataset_errors.DatasetExistError as e:
897
+ raise snowml_exceptions.SnowflakeMLException(
898
+ error_code=error_codes.OBJECT_ALREADY_EXISTS,
899
+ original_exception=ValueError(str(e)),
900
+ ) from e
901
+ except SnowparkSQLException as e:
902
+ raise snowml_exceptions.SnowflakeMLException(
903
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
904
+ original_exception=RuntimeError(f"An error occurred during Dataset generation: {e}."),
905
+ ) from e
906
+
907
+ @dispatch_decorator()
908
+ def load_feature_views_from_dataset(self, ds: dataset.Dataset) -> List[Union[FeatureView, FeatureViewSlice]]:
876
909
  """
877
910
  Retrieve FeatureViews used during Dataset construction.
878
911
 
879
912
  Args:
880
- dataset: Dataset object created from feature store.
913
+ ds: Dataset object created from feature store.
881
914
 
882
915
  Returns:
883
916
  List of FeatureViews used during Dataset construction.
@@ -885,13 +918,18 @@ class FeatureStore:
885
918
  Raises:
886
919
  ValueError: if dataset object is not generated from feature store.
887
920
  """
888
- serialized_objs = dataset.load_features()
889
- if serialized_objs is None:
890
- raise ValueError(f"Dataset {dataset} does not contain valid feature view information.")
891
-
892
- return self._load_serialized_feature_objects(serialized_objs)
893
-
894
- @dispatch_decorator(prpr_version="1.0.8")
921
+ assert ds.selected_version is not None
922
+ source_meta = ds.selected_version._get_metadata()
923
+ if (
924
+ source_meta is None
925
+ or not isinstance(source_meta.properties, FeatureStoreMetadata)
926
+ or source_meta.properties.serialized_feature_views is None
927
+ ):
928
+ raise ValueError(f"Dataset {ds} does not contain valid feature view information.")
929
+
930
+ return self._load_serialized_feature_objects(source_meta.properties.serialized_feature_views)
931
+
932
+ @dispatch_decorator()
895
933
  def clear(self) -> None:
896
934
  """
897
935
  Clear all feature store internal objects including feature views, entities etc. Note feature store
@@ -911,7 +949,11 @@ class FeatureStore:
911
949
  if len(result) == 0:
912
950
  return
913
951
 
914
- object_types = ["DYNAMIC TABLES", "TABLES", "VIEWS", "TASKS"]
952
+ fs_obj_tag = self._find_object("TAGS", SqlIdentifier(_FEATURE_STORE_OBJECT_TAG))
953
+ if len(fs_obj_tag) == 0:
954
+ return
955
+
956
+ object_types = ["DYNAMIC TABLES", "DATASETS", "VIEWS", "TASKS"]
915
957
  for obj_type in object_types:
916
958
  all_object_rows = self._find_object(obj_type, None)
917
959
  for row in all_object_rows:
@@ -921,9 +963,8 @@ class FeatureStore:
921
963
 
922
964
  entity_tags = self._find_object("TAGS", SqlIdentifier(_ENTITY_TAG_PREFIX), prefix_match=True)
923
965
  all_tags = [
924
- _FEATURE_VIEW_ENTITY_TAG,
925
- _FEATURE_VIEW_TS_COL_TAG,
926
966
  _FEATURE_STORE_OBJECT_TAG,
967
+ _FEATURE_VIEW_METADATA_TAG,
927
968
  ] + [SqlIdentifier(row["name"], case_sensitive=True) for row in entity_tags]
928
969
  for tag_name in all_tags:
929
970
  obj_name = self._get_fully_qualified_name(tag_name)
@@ -937,16 +978,36 @@ class FeatureStore:
937
978
  ) from e
938
979
  logger.info(f"Feature store {self._config.full_schema_path} has been cleared.")
939
980
 
981
+ def _get_feature_view_if_exists(self, name: str, version: str) -> FeatureView:
982
+ existing_fv = self.get_feature_view(name, version)
983
+ warnings.warn(
984
+ f"FeatureView {name}/{version} already exists. Skip registration."
985
+ + " Set `overwrite` to True if you want to replace existing FeatureView.",
986
+ stacklevel=2,
987
+ category=UserWarning,
988
+ )
989
+ return existing_fv
990
+
991
+ def _recompose_join_keys(self, join_key: str) -> List[str]:
992
+ # ALLOWED_VALUES in TAG will follow format ["key_1,key2,..."]
993
+ # since keys are already resolved following the SQL identifier rule on the write path,
994
+ # we simply parse the keys back and wrap them with quotes to preserve cases
995
+ # Example join_key repr from TAG value: "[key1,key2,key3]"
996
+ join_keys = join_key[2:-2].split(",")
997
+ res = []
998
+ for k in join_keys:
999
+ res.append(f'"{k}"')
1000
+ return res
1001
+
940
1002
  def _create_dynamic_table(
941
1003
  self,
942
1004
  feature_view_name: SqlIdentifier,
943
1005
  feature_view: FeatureView,
944
1006
  fully_qualified_name: str,
945
1007
  column_descs: str,
946
- entities: str,
1008
+ tagging_clause: str,
947
1009
  schedule_task: bool,
948
1010
  warehouse: SqlIdentifier,
949
- timestamp_col: SqlIdentifier,
950
1011
  block: bool,
951
1012
  override: bool,
952
1013
  ) -> None:
@@ -957,9 +1018,7 @@ class FeatureStore:
957
1018
  TARGET_LAG = '{'DOWNSTREAM' if schedule_task else feature_view.refresh_freq}'
958
1019
  COMMENT = '{feature_view.desc}'
959
1020
  TAG (
960
- {self._get_fully_qualified_name(_FEATURE_VIEW_ENTITY_TAG)} = '{entities}',
961
- {self._get_fully_qualified_name(_FEATURE_VIEW_TS_COL_TAG)} = '{timestamp_col}',
962
- {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = ''
1021
+ {tagging_clause}
963
1022
  )
964
1023
  WAREHOUSE = {warehouse}
965
1024
  AS {feature_view.query}
@@ -967,6 +1026,9 @@ class FeatureStore:
967
1026
  self._session.sql(query).collect(block=block, statement_params=self._telemetry_stmp)
968
1027
 
969
1028
  if schedule_task:
1029
+ task_obj_info = _FeatureStoreObjInfo(
1030
+ _FeatureStoreObjTypes.FEATURE_VIEW_REFRESH_TASK, snowml_version.VERSION
1031
+ )
970
1032
  try:
971
1033
  self._session.sql(
972
1034
  f"""CREATE{override_clause} TASK {fully_qualified_name}
@@ -978,7 +1040,7 @@ class FeatureStore:
978
1040
  self._session.sql(
979
1041
  f"""
980
1042
  ALTER TASK {fully_qualified_name}
981
- SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = ''
1043
+ SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}='{task_obj_info.to_json()}'
982
1044
  """
983
1045
  ).collect(statement_params=self._telemetry_stmp)
984
1046
  self._session.sql(f"ALTER TASK {fully_qualified_name} RESUME").collect(
@@ -988,6 +1050,9 @@ class FeatureStore:
988
1050
  self._session.sql(f"DROP DYNAMIC TABLE IF EXISTS {fully_qualified_name}").collect(
989
1051
  statement_params=self._telemetry_stmp
990
1052
  )
1053
+ self._session.sql(f"DROP TASK IF EXISTS {fully_qualified_name}").collect(
1054
+ statement_params=self._telemetry_stmp
1055
+ )
991
1056
  raise
992
1057
  except Exception as e:
993
1058
  raise snowml_exceptions.SnowflakeMLException(
@@ -1016,57 +1081,6 @@ class FeatureStore:
1016
1081
  category=UserWarning,
1017
1082
  )
1018
1083
 
1019
- def _dump_dataset(
1020
- self,
1021
- df: DataFrame,
1022
- table_name: str,
1023
- join_keys: List[SqlIdentifier],
1024
- spine_timestamp_col: Optional[SqlIdentifier] = None,
1025
- ) -> None:
1026
- if len(df.queries["queries"]) != 1:
1027
- raise snowml_exceptions.SnowflakeMLException(
1028
- error_code=error_codes.INVALID_ARGUMENT,
1029
- original_exception=ValueError(f"Dataset df must contain only one query. Got: {df.queries['queries']}"),
1030
- )
1031
- schema = ", ".join([f"{c.name} {type_utils.convert_sp_to_sf_type(c.datatype)}" for c in df.schema.fields])
1032
- fully_qualified_name = self._get_fully_qualified_name(table_name)
1033
-
1034
- try:
1035
- self._session.sql(
1036
- f"""CREATE TABLE IF NOT EXISTS {fully_qualified_name} ({schema})
1037
- CLUSTER BY ({', '.join(join_keys)})
1038
- TAG ({self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = '')
1039
- """
1040
- ).collect(block=True, statement_params=self._telemetry_stmp)
1041
- except Exception as e:
1042
- raise snowml_exceptions.SnowflakeMLException(
1043
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1044
- original_exception=RuntimeError(f"Failed to create table {fully_qualified_name}: {e}."),
1045
- ) from e
1046
-
1047
- source_query = df.queries["queries"][0]
1048
-
1049
- if spine_timestamp_col is not None:
1050
- join_keys.append(spine_timestamp_col)
1051
-
1052
- _, _, dest_alias, _ = identifier.parse_schema_level_object_identifier(fully_qualified_name)
1053
- source_alias = f"{dest_alias}_source"
1054
- join_cond = " AND ".join([f"{dest_alias}.{k} = {source_alias}.{k}" for k in join_keys])
1055
- update_clause = ", ".join([f"{dest_alias}.{c} = {source_alias}.{c}" for c in df.columns])
1056
- insert_clause = ", ".join([f"{source_alias}.{c}" for c in df.columns])
1057
- query = f"""
1058
- MERGE INTO {fully_qualified_name} USING ({source_query}) {source_alias} ON {join_cond}
1059
- WHEN MATCHED THEN UPDATE SET {update_clause}
1060
- WHEN NOT MATCHED THEN INSERT ({', '.join(df.columns)}) VALUES ({insert_clause})
1061
- """
1062
- try:
1063
- self._session.sql(query).collect(block=True, statement_params=self._telemetry_stmp)
1064
- except Exception as e:
1065
- raise snowml_exceptions.SnowflakeMLException(
1066
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1067
- original_exception=RuntimeError(f"Failed to create dataset {fully_qualified_name} with merge: {e}."),
1068
- ) from e
1069
-
1070
1084
  def _validate_entity_exists(self, name: SqlIdentifier) -> bool:
1071
1085
  full_entity_tag_name = self._get_entity_name(name)
1072
1086
  found_rows = self._find_object("TAGS", full_entity_tag_name)
@@ -1117,7 +1131,7 @@ class FeatureStore:
1117
1131
  else:
1118
1132
  cols = f.feature_names
1119
1133
 
1120
- join_keys = [k for e in f.entities for k in e.join_keys]
1134
+ join_keys = list({k for e in f.entities for k in e.join_keys})
1121
1135
  join_keys_str = ", ".join(join_keys)
1122
1136
  assert f.version is not None
1123
1137
  join_table_name = f.fully_qualified_name()
@@ -1168,6 +1182,45 @@ class FeatureStore:
1168
1182
 
1169
1183
  return self._session.sql(query), join_keys
1170
1184
 
1185
+ def _check_database_exists_or_throw(self) -> None:
1186
+ resolved_db_name = self._config.database.resolved()
1187
+ dbs = self._session.sql(
1188
+ f"""
1189
+ SHOW DATABASES LIKE '{resolved_db_name}' STARTS WITH '{resolved_db_name}'
1190
+ """
1191
+ ).collect(statement_params=self._telemetry_stmp)
1192
+ if len(dbs) == 0:
1193
+ raise snowml_exceptions.SnowflakeMLException(
1194
+ error_code=error_codes.NOT_FOUND,
1195
+ original_exception=ValueError(f"Database {resolved_db_name} does not exist."),
1196
+ )
1197
+
1198
+ def _check_internal_objects_exist_or_throw(self) -> None:
1199
+ schema_result = self._find_object("SCHEMAS", self._config.schema)
1200
+ if len(schema_result) == 0:
1201
+ raise snowml_exceptions.SnowflakeMLException(
1202
+ error_code=error_codes.NOT_FOUND,
1203
+ original_exception=ValueError(
1204
+ f"Feature store schema {self._config.schema} does not exist. "
1205
+ "Use CreationMode.CREATE_IF_NOT_EXIST mode instead if you want to create one."
1206
+ ),
1207
+ )
1208
+ for tag_name in to_sql_identifiers(
1209
+ [
1210
+ _FEATURE_STORE_OBJECT_TAG,
1211
+ _FEATURE_VIEW_METADATA_TAG,
1212
+ ]
1213
+ ):
1214
+ tag_result = self._find_object("TAGS", tag_name)
1215
+ if len(tag_result) == 0:
1216
+ raise snowml_exceptions.SnowflakeMLException(
1217
+ error_code=error_codes.NOT_FOUND,
1218
+ original_exception=ValueError(
1219
+ f"Feature store internal tag {tag_name} does not exist. "
1220
+ "Use CreationMode.CREATE_IF_NOT_EXIST mode instead if you want to create one."
1221
+ ),
1222
+ )
1223
+
1171
1224
  def _is_asof_join_enabled(self) -> bool:
1172
1225
  result = None
1173
1226
  try:
@@ -1267,7 +1320,8 @@ class FeatureStore:
1267
1320
 
1268
1321
  # Part 4: join original spine table with window table
1269
1322
  prefix_f_only_cols = to_sql_identifiers(
1270
- [f"{temp_prefix}{name.resolved()}" for name in f_only_cols], case_sensitive=True
1323
+ [f"{temp_prefix}{name.resolved()}" for name in f_only_cols],
1324
+ case_sensitive=True,
1271
1325
  )
1272
1326
  last_select = f"""
1273
1327
  SELECT
@@ -1300,7 +1354,10 @@ class FeatureStore:
1300
1354
  return dynamic_table_results + view_results
1301
1355
 
1302
1356
  def _update_feature_view_status(self, feature_view: FeatureView, operation: str) -> FeatureView:
1303
- assert operation in ["RESUME", "SUSPEND"], f"Operation: {operation} not supported"
1357
+ assert operation in [
1358
+ "RESUME",
1359
+ "SUSPEND",
1360
+ ], f"Operation: {operation} not supported"
1304
1361
  if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
1305
1362
  raise snowml_exceptions.SnowflakeMLException(
1306
1363
  error_code=error_codes.NOT_FOUND,
@@ -1324,17 +1381,76 @@ class FeatureStore:
1324
1381
  logger.info(f"Successfully {operation} FeatureView {feature_view.name}/{feature_view.version}.")
1325
1382
  return self.get_feature_view(feature_view.name, feature_view.version)
1326
1383
 
1327
- def _find_feature_views(
1384
+ def _optimized_find_feature_views(
1328
1385
  self, entity_name: SqlIdentifier, feature_view_name: Optional[SqlIdentifier]
1329
- ) -> List[FeatureView]:
1386
+ ) -> DataFrame:
1330
1387
  if not self._validate_entity_exists(entity_name):
1331
- return []
1388
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1332
1389
 
1390
+ # TODO: this can be optimized further by directly getting all possible FVs and filter by tag
1391
+ # it's easier to rewrite the code once we can remove the tag_reference path
1333
1392
  all_fvs = self._get_fv_backend_representations(object_name=None)
1334
1393
  fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
1335
1394
 
1336
1395
  if len(fv_maps.keys()) == 0:
1337
- return []
1396
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1397
+
1398
+ filter_clause = f"WHERE OBJECT_NAME LIKE '{feature_view_name.resolved()}%'" if feature_view_name else ""
1399
+ try:
1400
+ res = self._session.sql(
1401
+ f"""
1402
+ SELECT
1403
+ OBJECT_NAME
1404
+ FROM TABLE(
1405
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1406
+ TAG_NAME => '{self._get_fully_qualified_name(self._get_entity_name(entity_name))}'
1407
+ )
1408
+ ) {filter_clause}"""
1409
+ ).collect(statement_params=self._telemetry_stmp)
1410
+ except Exception as e:
1411
+ raise snowml_exceptions.SnowflakeMLException(
1412
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1413
+ original_exception=RuntimeError(f"Failed to find feature views' by entity {entity_name}: {e}"),
1414
+ ) from e
1415
+
1416
+ output_values: List[List[Any]] = []
1417
+ for r in res:
1418
+ row = fv_maps[SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)]
1419
+ self._extract_feature_view_info(row, output_values)
1420
+
1421
+ return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
1422
+
1423
+ def _extract_feature_view_info(self, row: Row, output_values: List[List[Any]]) -> None:
1424
+ name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
1425
+ m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
1426
+ if m is None:
1427
+ raise snowml_exceptions.SnowflakeMLException(
1428
+ error_code=error_codes.INTERNAL_SNOWML_ERROR,
1429
+ original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
1430
+ )
1431
+
1432
+ fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1433
+
1434
+ values: List[Any] = []
1435
+ values.append(name)
1436
+ values.append(version)
1437
+ values.append(row["database_name"])
1438
+ values.append(row["schema_name"])
1439
+ values.append(row["created_on"])
1440
+ values.append(row["owner"])
1441
+ values.append(row["comment"])
1442
+ values.append(fv_metadata.entities)
1443
+ output_values.append(values)
1444
+
1445
+ def _find_feature_views(self, entity_name: SqlIdentifier, feature_view_name: Optional[SqlIdentifier]) -> DataFrame:
1446
+ if not self._validate_entity_exists(entity_name):
1447
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1448
+
1449
+ all_fvs = self._get_fv_backend_representations(object_name=None)
1450
+ fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
1451
+
1452
+ if len(fv_maps.keys()) == 0:
1453
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1338
1454
 
1339
1455
  # NOTE: querying INFORMATION_SCHEMA for Entity lineage can be expensive depending on how many active
1340
1456
  # FeatureViews there are. If this ever become an issue, consider exploring improvements.
@@ -1351,7 +1467,7 @@ class FeatureStore:
1351
1467
  )
1352
1468
  )
1353
1469
  WHERE LEVEL = 'TABLE'
1354
- AND TAG_NAME = '{_FEATURE_VIEW_ENTITY_TAG}'
1470
+ AND TAG_NAME = '{_FEATURE_VIEW_METADATA_TAG}'
1355
1471
  """
1356
1472
  for fv_name in fv_maps.keys()
1357
1473
  ]
@@ -1363,21 +1479,22 @@ class FeatureStore:
1363
1479
  original_exception=RuntimeError(f"Failed to retrieve feature views' information: {e}"),
1364
1480
  ) from e
1365
1481
 
1366
- entities = self.list_entities().collect()
1367
- outputs = []
1482
+ output_values: List[List[Any]] = []
1368
1483
  for r in results:
1369
- if entity_name == SqlIdentifier(r["TAG_VALUE"], case_sensitive=True):
1370
- fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
1371
- fv_name = SqlIdentifier(fv_name, case_sensitive=True)
1372
- obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
1373
- if feature_view_name is not None:
1374
- if fv_name == feature_view_name:
1375
- outputs.append(self._compose_feature_view(fv_maps[obj_name], entities))
1484
+ fv_metadata = _FeatureViewMetadata.from_json(r["TAG_VALUE"])
1485
+ for retrieved_entity in fv_metadata.entities:
1486
+ if entity_name == SqlIdentifier(retrieved_entity, case_sensitive=True):
1487
+ fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
1488
+ fv_name = SqlIdentifier(fv_name, case_sensitive=True)
1489
+ obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
1490
+ if feature_view_name is not None:
1491
+ if fv_name == feature_view_name:
1492
+ self._extract_feature_view_info(fv_maps[obj_name], output_values)
1493
+ else:
1494
+ continue
1376
1495
  else:
1377
- continue
1378
- else:
1379
- outputs.append(self._compose_feature_view(fv_maps[obj_name], entities))
1380
- return outputs
1496
+ self._extract_feature_view_info(fv_maps[obj_name], output_values)
1497
+ return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
1381
1498
 
1382
1499
  def _compose_feature_view(self, row: Row, entity_list: List[Row]) -> FeatureView:
1383
1500
  def find_and_compose_entity(name: str) -> Entity:
@@ -1386,7 +1503,7 @@ class FeatureStore:
1386
1503
  if e["NAME"] == name:
1387
1504
  return Entity(
1388
1505
  name=SqlIdentifier(e["NAME"], case_sensitive=True).identifier(),
1389
- join_keys=e["JOIN_KEYS"].strip("[]").split(","),
1506
+ join_keys=self._recompose_join_keys(e["JOIN_KEYS"]),
1390
1507
  desc=e["DESC"],
1391
1508
  )
1392
1509
  raise RuntimeError(f"Cannot find entity {name} from retrieved entity list: {entity_list}")
@@ -1404,9 +1521,9 @@ class FeatureStore:
1404
1521
  query = m.group("query")
1405
1522
  df = self._session.sql(query)
1406
1523
  desc = m.group("comment")
1407
- entity_names = m.group("entities")
1408
- entities = [find_and_compose_entity(n) for n in entity_names.split(_FEATURE_VIEW_ENTITY_TAG_DELIMITER)]
1409
- ts_col = m.group("ts_col")
1524
+ fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1525
+ entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1526
+ ts_col = fv_metadata.timestamp_col
1410
1527
  timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1411
1528
 
1412
1529
  fv = FeatureView._construct_feature_view(
@@ -1433,9 +1550,9 @@ class FeatureStore:
1433
1550
  query = m.group("query")
1434
1551
  df = self._session.sql(query)
1435
1552
  desc = m.group("comment")
1436
- entity_names = m.group("entities")
1437
- entities = [find_and_compose_entity(n) for n in entity_names.split(_FEATURE_VIEW_ENTITY_TAG_DELIMITER)]
1438
- ts_col = m.group("ts_col")
1553
+ fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1554
+ entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1555
+ ts_col = fv_metadata.timestamp_col
1439
1556
  timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1440
1557
 
1441
1558
  fv = FeatureView._construct_feature_view(
@@ -1469,7 +1586,10 @@ class FeatureStore:
1469
1586
  return descs
1470
1587
 
1471
1588
  def _find_object(
1472
- self, object_type: str, object_name: Optional[SqlIdentifier], prefix_match: bool = False
1589
+ self,
1590
+ object_type: str,
1591
+ object_name: Optional[SqlIdentifier],
1592
+ prefix_match: bool = False,
1473
1593
  ) -> List[Row]:
1474
1594
  """Try to find an object by given type and name pattern.
1475
1595
 
@@ -1496,7 +1616,7 @@ class FeatureStore:
1496
1616
  search_space, obj_domain = self._obj_search_spaces[object_type]
1497
1617
  all_rows = []
1498
1618
  fs_tag_objects = []
1499
- tag_free_object_types = ["TAGS", "SCHEMAS", "WAREHOUSES"]
1619
+ tag_free_object_types = ["TAGS", "SCHEMAS", "WAREHOUSES", "DATASETS"]
1500
1620
  try:
1501
1621
  search_scope = f"IN {search_space}" if search_space is not None else ""
1502
1622
  all_rows = self._session.sql(f"SHOW {object_type} LIKE '{match_name}' {search_scope}").collect(
@@ -1504,25 +1624,41 @@ class FeatureStore:
1504
1624
  )
1505
1625
  # There could be none-FS objects under FS schema, thus filter on objects with FS special tag.
1506
1626
  if object_type not in tag_free_object_types and len(all_rows) > 0:
1507
- # Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
1508
- # use double quotes to make it case-sensitive.
1509
- queries = [
1510
- f"""
1511
- SELECT OBJECT_NAME
1512
- FROM TABLE(
1513
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1514
- '{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
1515
- '{obj_domain}'
1627
+ if self._use_optimized_tag_ref:
1628
+ fs_obj_rows = self._session.sql(
1629
+ f"""
1630
+ SELECT
1631
+ OBJECT_NAME
1632
+ FROM TABLE(
1633
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1634
+ TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
1635
+ )
1516
1636
  )
1517
- )
1518
- WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
1519
- AND TAG_SCHEMA = '{self._config.schema.resolved()}'
1520
- """
1521
- for row in all_rows
1522
- ]
1523
- fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
1524
- statement_params=self._telemetry_stmp
1525
- )
1637
+ WHERE DOMAIN='{obj_domain}'
1638
+ """
1639
+ ).collect(statement_params=self._telemetry_stmp)
1640
+ else:
1641
+ # TODO: remove this after tag_ref_internal rollout
1642
+ # Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
1643
+ # use double quotes to make it case-sensitive.
1644
+ queries = [
1645
+ f"""
1646
+ SELECT OBJECT_NAME
1647
+ FROM TABLE(
1648
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1649
+ '{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
1650
+ '{obj_domain}'
1651
+ )
1652
+ )
1653
+ WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
1654
+ AND TAG_SCHEMA = '{self._config.schema.resolved()}'
1655
+ """
1656
+ for row in all_rows
1657
+ ]
1658
+ fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
1659
+ statement_params=self._telemetry_stmp
1660
+ )
1661
+
1526
1662
  fs_tag_objects = [row["OBJECT_NAME"] for row in fs_obj_rows]
1527
1663
  except Exception as e:
1528
1664
  raise snowml_exceptions.SnowflakeMLException(
@@ -1568,3 +1704,66 @@ class FeatureStore:
1568
1704
  ),
1569
1705
  )
1570
1706
  return cast(DataFrame, df.drop(exclude_columns))
1707
+
1708
+ def _tag_ref_internal_enabled(self) -> bool:
1709
+ try:
1710
+ self._session.sql(
1711
+ f"""
1712
+ SELECT * FROM TABLE(
1713
+ INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1714
+ TAG_NAME => '{_FEATURE_STORE_OBJECT_TAG}'
1715
+ )
1716
+ ) LIMIT 1;
1717
+ """
1718
+ ).collect()
1719
+ return True
1720
+ except Exception:
1721
+ return False
1722
+
1723
+ def _check_feature_store_object_versions(self) -> None:
1724
+ versions = self._collapse_object_versions()
1725
+ if len(versions) > 0 and pkg_version.parse(snowml_version.VERSION) < versions[0]:
1726
+ warnings.warn(
1727
+ "The current snowflake-ml-python version out of date, package upgrade recommended "
1728
+ + f"(current={snowml_version.VERSION}, recommended>={str(versions[0])})",
1729
+ stacklevel=2,
1730
+ category=UserWarning,
1731
+ )
1732
+
1733
+ def _collapse_object_versions(self) -> List[pkg_version.Version]:
1734
+ if not self._use_optimized_tag_ref:
1735
+ return []
1736
+
1737
+ query = f"""
1738
+ SELECT
1739
+ TAG_VALUE
1740
+ FROM TABLE(
1741
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1742
+ TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
1743
+ )
1744
+ )
1745
+ """
1746
+ try:
1747
+ res = self._session.sql(query).collect(statement_params=self._telemetry_stmp)
1748
+ except Exception:
1749
+ # since this is a best effort user warning to upgrade pkg versions
1750
+ # we are treating failures as benign error
1751
+ return []
1752
+ versions = set()
1753
+ compatibility_breakage_detected = False
1754
+ for r in res:
1755
+ info = _FeatureStoreObjInfo.from_json(r["TAG_VALUE"])
1756
+ if info.type == _FeatureStoreObjTypes.UNKNOWN:
1757
+ compatibility_breakage_detected = True
1758
+ versions.add(pkg_version.parse(info.pkg_version))
1759
+
1760
+ sorted_versions = sorted(versions, reverse=True)
1761
+ if compatibility_breakage_detected:
1762
+ raise snowml_exceptions.SnowflakeMLException(
1763
+ error_code=error_codes.SNOWML_PACKAGE_OUTDATED,
1764
+ original_exception=RuntimeError(
1765
+ f"The current snowflake-ml-python version {snowml_version.VERSION} is out of date, "
1766
+ + f"please upgrade to at least {sorted_versions[0]}."
1767
+ ),
1768
+ )
1769
+ return sorted_versions