snowflake-ml-python 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. snowflake/ml/_internal/env_utils.py +72 -31
  2. snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
  3. snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
  4. snowflake/ml/_internal/exceptions/error_codes.py +3 -0
  5. snowflake/ml/_internal/lineage/data_source.py +10 -0
  6. snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
  7. snowflake/ml/_internal/telemetry.py +1 -0
  8. snowflake/ml/_internal/utils/identifier.py +1 -1
  9. snowflake/ml/_internal/utils/sql_identifier.py +14 -1
  10. snowflake/ml/dataset/__init__.py +11 -0
  11. snowflake/ml/dataset/dataset.py +455 -129
  12. snowflake/ml/dataset/dataset_factory.py +53 -0
  13. snowflake/ml/dataset/dataset_metadata.py +103 -0
  14. snowflake/ml/dataset/dataset_reader.py +199 -0
  15. snowflake/ml/feature_store/__init__.py +6 -0
  16. snowflake/ml/feature_store/access_manager.py +279 -0
  17. snowflake/ml/feature_store/feature_store.py +544 -358
  18. snowflake/ml/feature_store/feature_view.py +55 -16
  19. snowflake/ml/fileset/embedded_stage_fs.py +149 -0
  20. snowflake/ml/fileset/sfcfs.py +0 -4
  21. snowflake/ml/fileset/snowfs.py +160 -0
  22. snowflake/ml/fileset/stage_fs.py +25 -10
  23. snowflake/ml/model/__init__.py +2 -2
  24. snowflake/ml/model/_api.py +16 -1
  25. snowflake/ml/model/_client/model/model_impl.py +65 -31
  26. snowflake/ml/model/_client/model/model_version_impl.py +159 -2
  27. snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
  28. snowflake/ml/model/_client/ops/model_ops.py +268 -83
  29. snowflake/ml/model/_client/sql/_base.py +34 -0
  30. snowflake/ml/model/_client/sql/model.py +42 -47
  31. snowflake/ml/model/_client/sql/model_version.py +164 -39
  32. snowflake/ml/model/_client/sql/stage.py +6 -32
  33. snowflake/ml/model/_client/sql/tag.py +32 -56
  34. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
  35. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
  36. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  37. snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
  38. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
  39. snowflake/ml/model/_model_composer/model_composer.py +22 -1
  40. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +22 -0
  41. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +11 -0
  42. snowflake/ml/model/_packager/model_env/model_env.py +41 -0
  43. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
  44. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -5
  45. snowflake/ml/model/_packager/model_packager.py +0 -3
  46. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
  47. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
  48. snowflake/ml/modeling/_internal/model_trainer.py +7 -0
  49. snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
  50. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +50 -21
  51. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +24 -2
  52. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +340 -17
  53. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -52
  54. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -52
  55. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -52
  56. snowflake/ml/modeling/cluster/birch.py +53 -52
  57. snowflake/ml/modeling/cluster/bisecting_k_means.py +53 -52
  58. snowflake/ml/modeling/cluster/dbscan.py +51 -52
  59. snowflake/ml/modeling/cluster/feature_agglomeration.py +53 -52
  60. snowflake/ml/modeling/cluster/k_means.py +53 -52
  61. snowflake/ml/modeling/cluster/mean_shift.py +51 -52
  62. snowflake/ml/modeling/cluster/mini_batch_k_means.py +53 -52
  63. snowflake/ml/modeling/cluster/optics.py +51 -52
  64. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -52
  65. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -52
  66. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -52
  67. snowflake/ml/modeling/compose/column_transformer.py +53 -52
  68. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -52
  69. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -52
  70. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -52
  71. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -52
  72. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -52
  73. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -52
  74. snowflake/ml/modeling/covariance/min_cov_det.py +51 -52
  75. snowflake/ml/modeling/covariance/oas.py +51 -52
  76. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -52
  77. snowflake/ml/modeling/decomposition/dictionary_learning.py +53 -52
  78. snowflake/ml/modeling/decomposition/factor_analysis.py +53 -52
  79. snowflake/ml/modeling/decomposition/fast_ica.py +53 -52
  80. snowflake/ml/modeling/decomposition/incremental_pca.py +53 -52
  81. snowflake/ml/modeling/decomposition/kernel_pca.py +53 -52
  82. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +53 -52
  83. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +53 -52
  84. snowflake/ml/modeling/decomposition/pca.py +53 -52
  85. snowflake/ml/modeling/decomposition/sparse_pca.py +53 -52
  86. snowflake/ml/modeling/decomposition/truncated_svd.py +53 -52
  87. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +53 -52
  88. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -52
  89. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -52
  90. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -52
  91. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -52
  92. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -52
  93. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -52
  94. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -52
  95. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -52
  96. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -52
  97. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -52
  98. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -52
  99. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -52
  100. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -52
  101. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -52
  102. snowflake/ml/modeling/ensemble/stacking_regressor.py +53 -52
  103. snowflake/ml/modeling/ensemble/voting_classifier.py +53 -52
  104. snowflake/ml/modeling/ensemble/voting_regressor.py +53 -52
  105. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +53 -52
  106. snowflake/ml/modeling/feature_selection/select_fdr.py +53 -52
  107. snowflake/ml/modeling/feature_selection/select_fpr.py +53 -52
  108. snowflake/ml/modeling/feature_selection/select_fwe.py +53 -52
  109. snowflake/ml/modeling/feature_selection/select_k_best.py +53 -52
  110. snowflake/ml/modeling/feature_selection/select_percentile.py +53 -52
  111. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +53 -52
  112. snowflake/ml/modeling/feature_selection/variance_threshold.py +53 -52
  113. snowflake/ml/modeling/framework/base.py +64 -36
  114. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -52
  115. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -52
  116. snowflake/ml/modeling/impute/iterative_imputer.py +53 -52
  117. snowflake/ml/modeling/impute/knn_imputer.py +53 -52
  118. snowflake/ml/modeling/impute/missing_indicator.py +53 -52
  119. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +53 -52
  120. snowflake/ml/modeling/kernel_approximation/nystroem.py +53 -52
  121. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +53 -52
  122. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +53 -52
  123. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +53 -52
  124. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -52
  125. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -52
  126. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -52
  127. snowflake/ml/modeling/linear_model/ard_regression.py +51 -52
  128. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -52
  129. snowflake/ml/modeling/linear_model/elastic_net.py +51 -52
  130. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -52
  131. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -52
  132. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -52
  133. snowflake/ml/modeling/linear_model/lars.py +51 -52
  134. snowflake/ml/modeling/linear_model/lars_cv.py +51 -52
  135. snowflake/ml/modeling/linear_model/lasso.py +51 -52
  136. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -52
  137. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -52
  138. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -52
  139. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -52
  140. snowflake/ml/modeling/linear_model/linear_regression.py +51 -52
  141. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -52
  142. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -52
  143. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -52
  144. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -52
  145. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -52
  146. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -52
  147. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -52
  148. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -52
  149. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -52
  150. snowflake/ml/modeling/linear_model/perceptron.py +51 -52
  151. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -52
  152. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -52
  153. snowflake/ml/modeling/linear_model/ridge.py +51 -52
  154. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -52
  155. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -52
  156. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -52
  157. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -52
  158. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -52
  159. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -52
  160. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -52
  161. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -52
  162. snowflake/ml/modeling/manifold/isomap.py +53 -52
  163. snowflake/ml/modeling/manifold/mds.py +53 -52
  164. snowflake/ml/modeling/manifold/spectral_embedding.py +53 -52
  165. snowflake/ml/modeling/manifold/tsne.py +53 -52
  166. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -52
  167. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -52
  168. snowflake/ml/modeling/model_selection/grid_search_cv.py +21 -23
  169. snowflake/ml/modeling/model_selection/randomized_search_cv.py +38 -20
  170. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -52
  171. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -52
  172. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -52
  173. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -52
  174. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -52
  175. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -52
  176. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -52
  177. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -52
  178. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -52
  179. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -52
  180. snowflake/ml/modeling/neighbors/kernel_density.py +51 -52
  181. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -52
  182. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -52
  183. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -52
  184. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +53 -52
  185. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -52
  186. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -52
  187. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +53 -52
  188. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -52
  189. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -52
  190. snowflake/ml/modeling/pipeline/pipeline.py +538 -36
  191. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +12 -0
  192. snowflake/ml/modeling/preprocessing/polynomial_features.py +53 -52
  193. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -52
  194. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -52
  195. snowflake/ml/modeling/svm/linear_svc.py +51 -52
  196. snowflake/ml/modeling/svm/linear_svr.py +51 -52
  197. snowflake/ml/modeling/svm/nu_svc.py +51 -52
  198. snowflake/ml/modeling/svm/nu_svr.py +51 -52
  199. snowflake/ml/modeling/svm/svc.py +51 -52
  200. snowflake/ml/modeling/svm/svr.py +51 -52
  201. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -52
  202. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -52
  203. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -52
  204. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -52
  205. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -52
  206. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -52
  207. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -52
  208. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -52
  209. snowflake/ml/registry/_manager/model_manager.py +36 -7
  210. snowflake/ml/registry/model_registry.py +3 -149
  211. snowflake/ml/version.py +1 -1
  212. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/METADATA +112 -7
  213. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/RECORD +216 -206
  214. snowflake/ml/registry/_artifact_manager.py +0 -156
  215. snowflake/ml/registry/artifact.py +0 -46
  216. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/LICENSE.txt +0 -0
  217. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/WHEEL +0 -0
  218. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/top_level.txt +0 -0
@@ -8,13 +8,29 @@ import re
8
8
  import warnings
9
9
  from dataclasses import dataclass
10
10
  from enum import Enum
11
- from typing import Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast
11
+ from typing import (
12
+ Any,
13
+ Callable,
14
+ Dict,
15
+ List,
16
+ Literal,
17
+ Optional,
18
+ Tuple,
19
+ TypeVar,
20
+ Union,
21
+ cast,
22
+ overload,
23
+ )
12
24
 
25
+ import packaging.version as pkg_version
26
+ import snowflake.ml.version as snowml_version
13
27
  from pytimeparse.timeparse import timeparse
14
28
  from typing_extensions import Concatenate, ParamSpec
15
29
 
30
+ from snowflake.ml import dataset
16
31
  from snowflake.ml._internal import telemetry
17
32
  from snowflake.ml._internal.exceptions import (
33
+ dataset_errors,
18
34
  error_codes,
19
35
  exceptions as snowml_exceptions,
20
36
  )
@@ -23,25 +39,27 @@ from snowflake.ml._internal.utils.sql_identifier import (
23
39
  SqlIdentifier,
24
40
  to_sql_identifiers,
25
41
  )
26
- from snowflake.ml.dataset.dataset import Dataset, FeatureStoreMetadata
27
- from snowflake.ml.feature_store.entity import (
28
- _ENTITY_NAME_LENGTH_LIMIT,
29
- _FEATURE_VIEW_ENTITY_TAG_DELIMITER,
30
- Entity,
31
- )
42
+ from snowflake.ml.dataset.dataset_metadata import FeatureStoreMetadata
43
+ from snowflake.ml.feature_store.entity import _ENTITY_NAME_LENGTH_LIMIT, Entity
32
44
  from snowflake.ml.feature_store.feature_view import (
33
45
  _FEATURE_OBJ_TYPE,
34
46
  _FEATURE_VIEW_NAME_DELIMITER,
35
- _TIMESTAMP_COL_PLACEHOLDER,
47
+ _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS,
36
48
  FeatureView,
37
49
  FeatureViewSlice,
38
50
  FeatureViewStatus,
39
51
  FeatureViewVersion,
52
+ _FeatureViewMetadata,
40
53
  )
41
54
  from snowflake.snowpark import DataFrame, Row, Session, functions as F
42
- from snowflake.snowpark._internal import type_utils, utils as snowpark_utils
43
55
  from snowflake.snowpark.exceptions import SnowparkSQLException
44
- from snowflake.snowpark.types import StructField
56
+ from snowflake.snowpark.types import (
57
+ ArrayType,
58
+ StringType,
59
+ StructField,
60
+ StructType,
61
+ TimestampType,
62
+ )
45
63
 
46
64
  _Args = ParamSpec("_Args")
47
65
  _RT = TypeVar("_RT")
@@ -49,38 +67,80 @@ _RT = TypeVar("_RT")
49
67
  logger = logging.getLogger(__name__)
50
68
 
51
69
  _ENTITY_TAG_PREFIX = "SNOWML_FEATURE_STORE_ENTITY_"
52
- _FEATURE_VIEW_ENTITY_TAG = "SNOWML_FEATURE_STORE_FV_ENTITIES"
53
- _FEATURE_VIEW_TS_COL_TAG = "SNOWML_FEATURE_STORE_FV_TS_COL"
54
70
  _FEATURE_STORE_OBJECT_TAG = "SNOWML_FEATURE_STORE_OBJECT"
71
+ _FEATURE_VIEW_METADATA_TAG = "SNOWML_FEATURE_VIEW_METADATA"
72
+
73
+
74
+ @dataclass(frozen=True)
75
+ class _FeatureStoreObjInfo:
76
+ type: _FeatureStoreObjTypes
77
+ pkg_version: str
78
+
79
+ def to_json(self) -> str:
80
+ state_dict = self.__dict__.copy()
81
+ state_dict["type"] = state_dict["type"].value
82
+ return json.dumps(state_dict)
83
+
84
+ @classmethod
85
+ def from_json(cls, json_str: str) -> _FeatureStoreObjInfo:
86
+ json_dict = json.loads(json_str)
87
+ # since we may introduce new fields in the json blob in the future,
88
+ # in order to guarantee compatibility, we need to select ones that can be
89
+ # decoded in the current version
90
+ state_dict = {}
91
+ state_dict["type"] = _FeatureStoreObjTypes.parse(json_dict["type"])
92
+ state_dict["pkg_version"] = json_dict["pkg_version"]
93
+ return cls(**state_dict) # type: ignore[arg-type]
55
94
 
56
95
 
57
96
  # TODO: remove "" after dataset is updated
58
97
  class _FeatureStoreObjTypes(Enum):
59
- FEATURE_VIEW = "FEATURE_VIEW"
98
+ UNKNOWN = "UNKNOWN" # for forward compatibility
99
+ MANAGED_FEATURE_VIEW = "MANAGED_FEATURE_VIEW"
100
+ EXTERNAL_FEATURE_VIEW = "EXTERNAL_FEATURE_VIEW"
60
101
  FEATURE_VIEW_REFRESH_TASK = "FEATURE_VIEW_REFRESH_TASK"
61
102
  TRAINING_DATA = ""
62
103
 
104
+ @classmethod
105
+ def parse(cls, val: str) -> _FeatureStoreObjTypes:
106
+ try:
107
+ return cls(val)
108
+ except ValueError:
109
+ return cls.UNKNOWN
110
+
63
111
 
64
112
  _PROJECT = "FeatureStore"
65
113
  _DT_OR_VIEW_QUERY_PATTERN = re.compile(
66
114
  r"""CREATE\ (OR\ REPLACE\ )?(?P<obj_type>(DYNAMIC\ TABLE|VIEW))\ .*
67
115
  COMMENT\ =\ '(?P<comment>.*)'\s*
68
- TAG.*?{entity_tag}\ =\ '(?P<entities>.*?)',\n
69
- .*?{ts_col_tag}\ =\ '(?P<ts_col>.*?)',?.*?
116
+ TAG.*?{fv_metadata_tag}\ =\ '(?P<fv_metadata>.*?)',?.*?
70
117
  AS\ (?P<query>.*)
71
118
  """.format(
72
- entity_tag=_FEATURE_VIEW_ENTITY_TAG, ts_col_tag=_FEATURE_VIEW_TS_COL_TAG
119
+ fv_metadata_tag=_FEATURE_VIEW_METADATA_TAG,
73
120
  ),
74
121
  flags=re.DOTALL | re.IGNORECASE | re.X,
75
122
  )
76
123
 
124
+ _LIST_FEATURE_VIEW_SCHEMA = StructType(
125
+ [
126
+ StructField("name", StringType()),
127
+ StructField("version", StringType()),
128
+ StructField("database_name", StringType()),
129
+ StructField("schema_name", StringType()),
130
+ StructField("created_on", TimestampType()),
131
+ StructField("owner", StringType()),
132
+ StructField("desc", StringType()),
133
+ StructField("entities", ArrayType(StringType())),
134
+ ]
135
+ )
136
+
77
137
 
78
138
  class CreationMode(Enum):
79
139
  FAIL_IF_NOT_EXIST = 1
80
140
  CREATE_IF_NOT_EXIST = 2
81
141
 
82
142
 
83
- @dataclass
143
+ @dataclass(frozen=True)
84
144
  class _FeatureStoreConfig:
85
145
  database: SqlIdentifier
86
146
  schema: SqlIdentifier
@@ -111,14 +171,14 @@ def switch_warehouse(
111
171
  return wrapper
112
172
 
113
173
 
114
- def dispatch_decorator(
115
- prpr_version: str,
116
- ) -> Callable[[Callable[Concatenate[FeatureStore, _Args], _RT]], Callable[Concatenate[FeatureStore, _Args], _RT],]:
174
+ def dispatch_decorator() -> Callable[
175
+ [Callable[Concatenate[FeatureStore, _Args], _RT]],
176
+ Callable[Concatenate[FeatureStore, _Args], _RT],
177
+ ]:
117
178
  def decorator(
118
179
  f: Callable[Concatenate[FeatureStore, _Args], _RT]
119
180
  ) -> Callable[Concatenate[FeatureStore, _Args], _RT]:
120
181
  @telemetry.send_api_usage_telemetry(project=_PROJECT)
121
- @snowpark_utils.private_preview(version=prpr_version)
122
182
  @switch_warehouse
123
183
  @functools.wraps(f)
124
184
  def wrap(self: FeatureStore, /, *args: _Args.args, **kargs: _Args.kwargs) -> _RT:
@@ -135,7 +195,6 @@ class FeatureStore:
135
195
  """
136
196
 
137
197
  @telemetry.send_api_usage_telemetry(project=_PROJECT)
138
- @snowpark_utils.private_preview(version="1.0.8")
139
198
  def __init__(
140
199
  self,
141
200
  session: Session,
@@ -178,7 +237,7 @@ class FeatureStore:
178
237
  # search space used in query "SHOW <object_TYPE> LIKE <object_name> IN <search_space>"
179
238
  # object domain used in query "TAG_REFERENCE(<object_name>, <object_domain>)"
180
239
  self._obj_search_spaces = {
181
- "TABLES": (self._config.full_schema_path, "TABLE"),
240
+ "DATASETS": (self._config.full_schema_path, "DATASET"),
182
241
  "DYNAMIC TABLES": (self._config.full_schema_path, "TABLE"),
183
242
  "VIEWS": (self._config.full_schema_path, "TABLE"),
184
243
  "SCHEMAS": (f"DATABASE {self._config.database}", "SCHEMA"),
@@ -195,34 +254,27 @@ class FeatureStore:
195
254
 
196
255
  else:
197
256
  try:
198
- self._session.sql(f"CREATE SCHEMA IF NOT EXISTS {self._config.full_schema_path}").collect(
199
- statement_params=self._telemetry_stmp
200
- )
201
- for tag in to_sql_identifiers(
202
- [
203
- _FEATURE_VIEW_ENTITY_TAG,
204
- _FEATURE_VIEW_TS_COL_TAG,
205
- ]
206
- ):
257
+ # Explicitly check if schema exists first since we may not have CREATE SCHEMA privilege
258
+ if len(self._find_object("SCHEMAS", self._config.schema)) == 0:
259
+ self._session.sql(f"CREATE SCHEMA IF NOT EXISTS {self._config.full_schema_path}").collect(
260
+ statement_params=self._telemetry_stmp
261
+ )
262
+ for tag in to_sql_identifiers([_FEATURE_VIEW_METADATA_TAG, _FEATURE_STORE_OBJECT_TAG]):
207
263
  self._session.sql(f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(tag)}").collect(
208
264
  statement_params=self._telemetry_stmp
209
265
  )
210
-
211
- self._session.sql(
212
- f"""CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}
213
- ALLOWED_VALUES {','.join([f"'{v.value}'" for v in _FeatureStoreObjTypes])}"""
214
- ).collect(statement_params=self._telemetry_stmp)
215
266
  except Exception as e:
216
- self.clear()
217
267
  raise snowml_exceptions.SnowflakeMLException(
218
268
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
219
269
  original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
220
270
  )
221
271
 
272
+ # TODO: remove this after tag_ref_internal rollout
273
+ self._use_optimized_tag_ref = self._tag_ref_internal_enabled()
274
+ self._check_feature_store_object_versions()
222
275
  logger.info(f"Successfully connected to feature store: {self._config.full_schema_path}.")
223
276
 
224
277
  @telemetry.send_api_usage_telemetry(project=_PROJECT)
225
- @snowpark_utils.private_preview(version="1.0.12")
226
278
  def update_default_warehouse(self, warehouse_name: str) -> None:
227
279
  """Update default warehouse for feature store.
228
280
 
@@ -242,7 +294,7 @@ class FeatureStore:
242
294
 
243
295
  self._default_warehouse = warehouse
244
296
 
245
- @dispatch_decorator(prpr_version="1.0.8")
297
+ @dispatch_decorator()
246
298
  def register_entity(self, entity: Entity) -> Entity:
247
299
  """
248
300
  Register Entity in the FeatureStore.
@@ -268,13 +320,13 @@ class FeatureStore:
268
320
  return entity
269
321
 
270
322
  # allowed_values will add double-quotes around each value, thus use resolved str here.
271
- join_keys = [f"'{key.resolved()}'" for key in entity.join_keys]
323
+ join_keys = [f"{key.resolved()}" for key in entity.join_keys]
272
324
  join_keys_str = ",".join(join_keys)
273
325
  full_tag_name = self._get_fully_qualified_name(tag_name)
274
326
  try:
275
327
  self._session.sql(
276
328
  f"""CREATE TAG IF NOT EXISTS {full_tag_name}
277
- ALLOWED_VALUES {join_keys_str}
329
+ ALLOWED_VALUES '{join_keys_str}'
278
330
  COMMENT = '{entity.desc}'
279
331
  """
280
332
  ).collect(statement_params=self._telemetry_stmp)
@@ -289,7 +341,7 @@ class FeatureStore:
289
341
  return self.get_entity(entity.name)
290
342
 
291
343
  # TODO: add support to update column desc once SNOW-894249 is fixed
292
- @dispatch_decorator(prpr_version="1.0.8")
344
+ @dispatch_decorator()
293
345
  def register_feature_view(
294
346
  self,
295
347
  feature_view: FeatureView,
@@ -342,7 +394,6 @@ class FeatureStore:
342
394
  ),
343
395
  )
344
396
 
345
- # TODO: ideally we should move this to FeatureView creation time
346
397
  for e in feature_view.entities:
347
398
  if not self._validate_entity_exists(e.name):
348
399
  raise snowml_exceptions.SnowflakeMLException(
@@ -358,12 +409,23 @@ class FeatureStore:
358
409
  pass
359
410
 
360
411
  fully_qualified_name = self._get_fully_qualified_name(feature_view_name)
361
- entities = _FEATURE_VIEW_ENTITY_TAG_DELIMITER.join([e.name for e in feature_view.entities])
362
- timestamp_col = (
363
- feature_view.timestamp_col
364
- if feature_view.timestamp_col is not None
365
- else SqlIdentifier(_TIMESTAMP_COL_PLACEHOLDER)
366
- )
412
+ refresh_freq = feature_view.refresh_freq
413
+
414
+ if refresh_freq is not None:
415
+ obj_info = _FeatureStoreObjInfo(_FeatureStoreObjTypes.MANAGED_FEATURE_VIEW, snowml_version.VERSION)
416
+ else:
417
+ obj_info = _FeatureStoreObjInfo(_FeatureStoreObjTypes.EXTERNAL_FEATURE_VIEW, snowml_version.VERSION)
418
+
419
+ tagging_clause = [
420
+ f"{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = '{obj_info.to_json()}'",
421
+ f"{self._get_fully_qualified_name(_FEATURE_VIEW_METADATA_TAG)} = '{feature_view._metadata().to_json()}'",
422
+ ]
423
+ for e in feature_view.entities:
424
+ join_keys = [f"{key.resolved()}" for key in e.join_keys]
425
+ tagging_clause.append(
426
+ f"{self._get_fully_qualified_name(self._get_entity_name(e.name))} = '{','.join(join_keys)}'"
427
+ )
428
+ tagging_clause_str = ",\n".join(tagging_clause)
367
429
 
368
430
  def create_col_desc(col: StructField) -> str:
369
431
  desc = feature_view.feature_descs.get(SqlIdentifier(col.name), None)
@@ -371,7 +433,6 @@ class FeatureStore:
371
433
  return f"{col.name} {desc}"
372
434
 
373
435
  column_descs = ", ".join([f"{create_col_desc(col)}" for col in feature_view.output_schema.fields])
374
- refresh_freq = feature_view.refresh_freq
375
436
 
376
437
  if refresh_freq is not None:
377
438
  schedule_task = refresh_freq != "DOWNSTREAM" and timeparse(refresh_freq) is None
@@ -380,10 +441,9 @@ class FeatureStore:
380
441
  feature_view,
381
442
  fully_qualified_name,
382
443
  column_descs,
383
- entities,
444
+ tagging_clause_str,
384
445
  schedule_task,
385
446
  self._default_warehouse,
386
- timestamp_col,
387
447
  block,
388
448
  overwrite,
389
449
  )
@@ -393,9 +453,7 @@ class FeatureStore:
393
453
  query = f"""CREATE{overwrite_clause} VIEW {fully_qualified_name} ({column_descs})
394
454
  COMMENT = '{feature_view.desc}'
395
455
  TAG (
396
- {_FEATURE_VIEW_ENTITY_TAG} = '{entities}',
397
- {_FEATURE_VIEW_TS_COL_TAG} = '{timestamp_col}',
398
- {_FEATURE_STORE_OBJECT_TAG} = '{_FeatureStoreObjTypes.FEATURE_VIEW.value}'
456
+ {tagging_clause_str}
399
457
  )
400
458
  AS {feature_view.query}
401
459
  """
@@ -406,10 +464,10 @@ class FeatureStore:
406
464
  original_exception=RuntimeError(f"Create view {fully_qualified_name} [\n{query}\n] failed: {e}"),
407
465
  ) from e
408
466
 
409
- logger.info(f"Registered FeatureView {feature_view.name}/{version}.")
467
+ logger.info(f"Registered FeatureView {feature_view.name}/{version} successfully.")
410
468
  return self.get_feature_view(feature_view.name, str(version))
411
469
 
412
- @dispatch_decorator(prpr_version="1.1.0")
470
+ @dispatch_decorator()
413
471
  def update_feature_view(
414
472
  self, name: str, version: str, refresh_freq: Optional[str] = None, warehouse: Optional[str] = None
415
473
  ) -> FeatureView:
@@ -456,7 +514,7 @@ class FeatureStore:
456
514
  ) from e
457
515
  return self.get_feature_view(name=name, version=version)
458
516
 
459
- @dispatch_decorator(prpr_version="1.0.8")
517
+ @dispatch_decorator()
460
518
  def read_feature_view(self, feature_view: FeatureView) -> DataFrame:
461
519
  """
462
520
  Read FeatureView data.
@@ -478,13 +536,12 @@ class FeatureStore:
478
536
 
479
537
  return self._session.sql(f"SELECT * FROM {feature_view.fully_qualified_name()}")
480
538
 
481
- @dispatch_decorator(prpr_version="1.0.8")
539
+ @dispatch_decorator()
482
540
  def list_feature_views(
483
541
  self,
484
542
  entity_name: Optional[str] = None,
485
543
  feature_view_name: Optional[str] = None,
486
- as_dataframe: bool = True,
487
- ) -> Union[Optional[DataFrame], List[FeatureView]]:
544
+ ) -> DataFrame:
488
545
  """
489
546
  List FeatureViews in the FeatureStore.
490
547
  If entity_name is specified, FeatureViews associated with that Entity will be listed.
@@ -493,34 +550,26 @@ class FeatureStore:
493
550
  Args:
494
551
  entity_name: Entity name.
495
552
  feature_view_name: FeatureView name.
496
- as_dataframe: whether the return type should be a DataFrame.
497
553
 
498
554
  Returns:
499
- List of FeatureViews or in a DataFrame representation.
555
+ FeatureViews information as a Snowpark DataFrame.
500
556
  """
501
- if entity_name is not None:
502
- entity_name = SqlIdentifier(entity_name)
503
557
  if feature_view_name is not None:
504
558
  feature_view_name = SqlIdentifier(feature_view_name)
505
559
 
506
560
  if entity_name is not None:
507
- fvs = self._find_feature_views(entity_name, feature_view_name)
561
+ entity_name = SqlIdentifier(entity_name)
562
+ if self._use_optimized_tag_ref:
563
+ return self._optimized_find_feature_views(entity_name, feature_view_name)
564
+ else:
565
+ return self._find_feature_views(entity_name, feature_view_name)
508
566
  else:
509
- fvs = []
510
- entities = self.list_entities().collect()
567
+ output_values: List[List[Any]] = []
511
568
  for row in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
512
- fvs.append(self._compose_feature_view(row, entities))
513
-
514
- if as_dataframe:
515
- result = None
516
- for fv in fvs:
517
- fv_df = fv.to_df(self._session)
518
- result = fv_df if result is None else result.union(fv_df) # type: ignore[attr-defined]
519
- return result
520
- else:
521
- return fvs
569
+ self._extract_feature_view_info(row, output_values)
570
+ return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
522
571
 
523
- @dispatch_decorator(prpr_version="1.0.8")
572
+ @dispatch_decorator()
524
573
  def get_feature_view(self, name: str, version: str) -> FeatureView:
525
574
  """
526
575
  Retrieve previously registered FeatureView.
@@ -549,7 +598,7 @@ class FeatureStore:
549
598
 
550
599
  return self._compose_feature_view(results[0], self.list_entities().collect())
551
600
 
552
- @dispatch_decorator(prpr_version="1.0.8")
601
+ @dispatch_decorator()
553
602
  def resume_feature_view(self, feature_view: FeatureView) -> FeatureView:
554
603
  """
555
604
  Resume a previously suspended FeatureView.
@@ -562,7 +611,7 @@ class FeatureStore:
562
611
  """
563
612
  return self._update_feature_view_status(feature_view, "RESUME")
564
613
 
565
- @dispatch_decorator(prpr_version="1.0.8")
614
+ @dispatch_decorator()
566
615
  def suspend_feature_view(self, feature_view: FeatureView) -> FeatureView:
567
616
  """
568
617
  Suspend an active FeatureView.
@@ -575,7 +624,7 @@ class FeatureStore:
575
624
  """
576
625
  return self._update_feature_view_status(feature_view, "SUSPEND")
577
626
 
578
- @dispatch_decorator(prpr_version="1.0.8")
627
+ @dispatch_decorator()
579
628
  def delete_feature_view(self, feature_view: FeatureView) -> None:
580
629
  """
581
630
  Delete a FeatureView.
@@ -586,6 +635,8 @@ class FeatureStore:
586
635
  Raises:
587
636
  SnowflakeMLException: [ValueError] FeatureView is not registered.
588
637
  """
638
+ # TODO: we should leverage lineage graph to check downstream deps, and block the deletion
639
+ # if there're other FVs depending on this
589
640
  if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
590
641
  raise snowml_exceptions.SnowflakeMLException(
591
642
  error_code=error_codes.NOT_FOUND,
@@ -608,7 +659,7 @@ class FeatureStore:
608
659
 
609
660
  logger.info(f"Deleted FeatureView {feature_view.name}/{feature_view.version}.")
610
661
 
611
- @dispatch_decorator(prpr_version="1.0.8")
662
+ @dispatch_decorator()
612
663
  def list_entities(self) -> DataFrame:
613
664
  """
614
665
  List all Entities in the FeatureStore.
@@ -629,7 +680,7 @@ class FeatureStore:
629
680
  ),
630
681
  )
631
682
 
632
- @dispatch_decorator(prpr_version="1.0.8")
683
+ @dispatch_decorator()
633
684
  def get_entity(self, name: str) -> Entity:
634
685
  """
635
686
  Retrieve previously registered Entity object.
@@ -659,8 +710,7 @@ class FeatureStore:
659
710
  original_exception=ValueError(f"Cannot find Entity with name: {name}."),
660
711
  )
661
712
 
662
- raw_join_keys = result[0]["JOIN_KEYS"]
663
- join_keys = raw_join_keys.strip("[]").split(",")
713
+ join_keys = self._recompose_join_keys(result[0]["JOIN_KEYS"])
664
714
 
665
715
  return Entity._construct_entity(
666
716
  name=SqlIdentifier(result[0]["NAME"], case_sensitive=True).identifier(),
@@ -669,7 +719,7 @@ class FeatureStore:
669
719
  owner=result[0]["OWNER"],
670
720
  )
671
721
 
672
- @dispatch_decorator(prpr_version="1.0.8")
722
+ @dispatch_decorator()
673
723
  def delete_entity(self, name: str) -> None:
674
724
  """
675
725
  Delete a previously registered Entity.
@@ -690,13 +740,13 @@ class FeatureStore:
690
740
  original_exception=ValueError(f"Entity {name} does not exist."),
691
741
  )
692
742
 
693
- active_feature_views = cast(List[FeatureView], self.list_feature_views(entity_name=name, as_dataframe=False))
743
+ active_feature_views = self.list_feature_views(entity_name=name).collect(statement_params=self._telemetry_stmp)
744
+
694
745
  if len(active_feature_views) > 0:
746
+ active_fvs = [r["NAME"] for r in active_feature_views]
695
747
  raise snowml_exceptions.SnowflakeMLException(
696
748
  error_code=error_codes.SNOWML_DELETE_FAILED,
697
- original_exception=ValueError(
698
- f"Cannot delete Entity {name} due to active FeatureViews: {[f.name for f in active_feature_views]}."
699
- ),
749
+ original_exception=ValueError(f"Cannot delete Entity {name} due to active FeatureViews: {active_fvs}."),
700
750
  )
701
751
 
702
752
  tag_name = self._get_fully_qualified_name(self._get_entity_name(name))
@@ -705,11 +755,11 @@ class FeatureStore:
705
755
  except Exception as e:
706
756
  raise snowml_exceptions.SnowflakeMLException(
707
757
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
708
- original_exception=RuntimeError(f"Failed to alter schema or drop tag: {e}."),
758
+ original_exception=RuntimeError(f"Failed to delete entity: {e}."),
709
759
  ) from e
710
760
  logger.info(f"Deleted Entity {name}.")
711
761
 
712
- @dispatch_decorator(prpr_version="1.0.8")
762
+ @dispatch_decorator()
713
763
  def retrieve_feature_values(
714
764
  self,
715
765
  spine_df: DataFrame,
@@ -757,145 +807,163 @@ class FeatureStore:
757
807
 
758
808
  return df
759
809
 
760
- @dispatch_decorator(prpr_version="1.0.8")
810
+ @overload
811
+ def generate_dataset(
812
+ self,
813
+ name: str,
814
+ spine_df: DataFrame,
815
+ features: List[Union[FeatureView, FeatureViewSlice]],
816
+ version: Optional[str] = None,
817
+ spine_timestamp_col: Optional[str] = None,
818
+ spine_label_cols: Optional[List[str]] = None,
819
+ exclude_columns: Optional[List[str]] = None,
820
+ include_feature_view_timestamp_col: bool = False,
821
+ desc: str = "",
822
+ output_type: Literal["dataset"] = "dataset",
823
+ ) -> dataset.Dataset:
824
+ ...
825
+
826
+ @overload
761
827
  def generate_dataset(
762
828
  self,
829
+ name: str,
763
830
  spine_df: DataFrame,
764
831
  features: List[Union[FeatureView, FeatureViewSlice]],
765
- materialized_table: Optional[str] = None,
832
+ output_type: Literal["table"],
833
+ version: Optional[str] = None,
766
834
  spine_timestamp_col: Optional[str] = None,
767
835
  spine_label_cols: Optional[List[str]] = None,
768
836
  exclude_columns: Optional[List[str]] = None,
769
- save_mode: str = "errorifexists",
770
837
  include_feature_view_timestamp_col: bool = False,
771
838
  desc: str = "",
772
- ) -> Dataset:
839
+ ) -> DataFrame:
840
+ ...
841
+
842
+ @dispatch_decorator() # type: ignore[misc]
843
+ def generate_dataset(
844
+ self,
845
+ name: str,
846
+ spine_df: DataFrame,
847
+ features: List[Union[FeatureView, FeatureViewSlice]],
848
+ version: Optional[str] = None,
849
+ spine_timestamp_col: Optional[str] = None,
850
+ spine_label_cols: Optional[List[str]] = None,
851
+ exclude_columns: Optional[List[str]] = None,
852
+ include_feature_view_timestamp_col: bool = False,
853
+ desc: str = "",
854
+ output_type: Literal["dataset", "table"] = "dataset",
855
+ ) -> Union[dataset.Dataset, DataFrame]:
773
856
  """
774
857
  Generate dataset by given source table and feature views.
775
858
 
776
859
  Args:
860
+ name: The name of the Dataset to be generated. Datasets are uniquely identified within a schema
861
+ by their name and version.
777
862
  spine_df: The fact table contains the raw dataset.
778
863
  features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
779
- materialized_table: The destination table where produced result will be stored. If it's none, then result
780
- won't be registered. If materialized_table is provided, then produced result will be written into
781
- the provided table. Note result dataset will be a snowflake clone of registered table.
782
- New data can append on same registered table and previously generated dataset won't be affected.
783
- Default result table name will be a concatenation of materialized_table name and current timestamp.
864
+ version: The version of the Dataset to be generated. If none specified, the current timestamp
865
+ will be used instead.
784
866
  spine_timestamp_col: Name of timestamp column in spine_df that will be used to join
785
867
  time-series features. If spine_timestamp_col is not none, the input features also must have
786
868
  timestamp_col.
787
869
  spine_label_cols: Name of column(s) in spine_df that contains labels.
788
870
  exclude_columns: Column names to exclude from the result dataframe.
789
871
  The underlying storage will still contain the columns.
790
- save_mode: How new data is saved. currently support:
791
- errorifexists: Raise error if registered table already exists.
792
- merge: Merge new data if registered table already exists.
793
872
  include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
794
873
  (if feature view has timestamp column) if set true. Default to false.
795
874
  desc: A description about this dataset.
875
+ output_type: The type of Snowflake storage to use for the generated training data.
796
876
 
797
877
  Returns:
798
- A Dataset object.
878
+ If output_type is "dataset" (default), returns a Dataset object.
879
+ If output_type is "table", returns a Snowpark DataFrame representing the table.
799
880
 
800
881
  Raises:
801
- SnowflakeMLException: [ValueError] save_mode is invalid.
802
- SnowflakeMLException: [ValueError] spine_df contains more than one query.
803
- SnowflakeMLException: [ValueError] Materialized_table contains invalid char `.`.
804
- SnowflakeMLException: [ValueError] Materialized_table already exists with save_mode `errorifexists`.
882
+ SnowflakeMLException: [ValueError] Dataset name/version already exists
805
883
  SnowflakeMLException: [ValueError] Snapshot creation failed.
884
+ SnowflakeMLException: [ValueError] Invalid output_type specified.
806
885
  SnowflakeMLException: [RuntimeError] Failed to create clone from table.
807
886
  SnowflakeMLException: [RuntimeError] Failed to find resources.
808
887
  """
888
+ if output_type not in {"table", "dataset"}:
889
+ raise snowml_exceptions.SnowflakeMLException(
890
+ error_code=error_codes.INVALID_ARGUMENT,
891
+ original_exception=ValueError(f"Invalid output_type: {output_type}."),
892
+ )
809
893
  if spine_timestamp_col is not None:
810
894
  spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
811
895
  if spine_label_cols is not None:
812
896
  spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
813
897
 
814
- allowed_save_mode = {"errorifexists", "merge"}
815
- if save_mode.lower() not in allowed_save_mode:
816
- raise snowml_exceptions.SnowflakeMLException(
817
- error_code=error_codes.INVALID_ARGUMENT,
818
- original_exception=ValueError(
819
- f"'{save_mode}' is not supported. Current supported save modes: {','.join(allowed_save_mode)}"
820
- ),
821
- )
822
-
823
- if len(spine_df.queries["queries"]) != 1:
824
- raise snowml_exceptions.SnowflakeMLException(
825
- error_code=error_codes.INVALID_ARGUMENT,
826
- original_exception=ValueError(
827
- f"spine_df must contain only one query. Got: {spine_df.queries['queries']}"
828
- ),
829
- )
830
-
831
898
  result_df, join_keys = self._join_features(
832
899
  spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
833
900
  )
834
901
 
835
- snapshot_table = None
836
- if materialized_table is not None:
837
- if "." in materialized_table:
838
- raise snowml_exceptions.SnowflakeMLException(
839
- error_code=error_codes.INVALID_ARGUMENT,
840
- original_exception=ValueError(f"materialized_table {materialized_table} contains invalid char `.`"),
841
- )
842
-
843
- # TODO (wezhou) change materialized_table to SqlIdentifier
844
- found_rows = self._find_object("TABLES", SqlIdentifier(materialized_table))
845
- if save_mode.lower() == "errorifexists" and len(found_rows) > 0:
846
- raise snowml_exceptions.SnowflakeMLException(
847
- error_code=error_codes.OBJECT_ALREADY_EXISTS,
848
- original_exception=ValueError(f"Dataset table {materialized_table} already exists."),
849
- )
850
-
851
- self._dump_dataset(result_df, materialized_table, join_keys, spine_timestamp_col)
852
-
853
- snapshot_table = f"{materialized_table}_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}"
854
- snapshot_table = self._get_fully_qualified_name(snapshot_table)
855
- materialized_table = self._get_fully_qualified_name(materialized_table)
856
-
857
- try:
858
- self._session.sql(f"CREATE TABLE {snapshot_table} CLONE {materialized_table}").collect(
859
- statement_params=self._telemetry_stmp
860
- )
861
- except Exception as e:
862
- raise snowml_exceptions.SnowflakeMLException(
863
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
864
- original_exception=RuntimeError(
865
- f"Failed to create clone {materialized_table} from table {snapshot_table}: {e}."
866
- ),
867
- ) from e
868
-
869
- result_df = self._session.sql(f"SELECT * FROM {snapshot_table}")
902
+ # Convert name to fully qualified name if not already fully qualified
903
+ db_name, schema_name, object_name, _ = identifier.parse_schema_level_object_identifier(name)
904
+ name = "{}.{}.{}".format(
905
+ db_name or self._config.database,
906
+ schema_name or self._config.schema,
907
+ object_name,
908
+ )
909
+ version = version or datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
870
910
 
871
911
  if exclude_columns is not None:
872
912
  result_df = self._exclude_columns(result_df, exclude_columns)
873
913
 
874
914
  fs_meta = FeatureStoreMetadata(
875
- spine_query=spine_df.queries["queries"][0],
876
- connection_params=vars(self._config),
877
- features=[fv.to_json() for fv in features],
915
+ spine_query=spine_df.queries["queries"][-1],
916
+ serialized_feature_views=[fv.to_json() for fv in features],
917
+ spine_timestamp_col=spine_timestamp_col,
878
918
  )
879
919
 
880
- dataset = Dataset(
881
- self._session,
882
- df=result_df,
883
- materialized_table=materialized_table,
884
- snapshot_table=snapshot_table,
885
- timestamp_col=spine_timestamp_col,
886
- label_cols=spine_label_cols,
887
- feature_store_metadata=fs_meta,
888
- desc=desc,
889
- )
890
- return dataset
920
+ try:
921
+ if output_type == "table":
922
+ table_name = f"{name}_{version}"
923
+ result_df.write.mode("errorifexists").save_as_table(table_name) # type: ignore[call-overload]
924
+ ds_df = self._session.table(table_name)
925
+ return ds_df
926
+ else:
927
+ assert output_type == "dataset"
928
+ if not self._is_dataset_enabled():
929
+ raise snowml_exceptions.SnowflakeMLException(
930
+ error_code=error_codes.SNOWML_CREATE_FAILED,
931
+ original_exception=RuntimeError(
932
+ "Dataset is not enabled in your account. Ask your account admin to set"
933
+ ' FEATURE_DATASET=ENABLED or set output_type="table" to generate the data'
934
+ " as a Snowflake Table instead."
935
+ ),
936
+ )
937
+ ds: dataset.Dataset = dataset.create_from_dataframe(
938
+ self._session,
939
+ name,
940
+ version,
941
+ input_dataframe=result_df,
942
+ exclude_cols=[spine_timestamp_col],
943
+ label_cols=spine_label_cols,
944
+ properties=fs_meta,
945
+ comment=desc,
946
+ )
947
+ return ds
948
+
949
+ except dataset_errors.DatasetExistError as e:
950
+ raise snowml_exceptions.SnowflakeMLException(
951
+ error_code=error_codes.OBJECT_ALREADY_EXISTS,
952
+ original_exception=RuntimeError(str(e)),
953
+ ) from e
954
+ except SnowparkSQLException as e:
955
+ raise snowml_exceptions.SnowflakeMLException(
956
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
957
+ original_exception=RuntimeError(f"An error occurred during dataset generation: {e}."),
958
+ ) from e
891
959
 
892
- @dispatch_decorator(prpr_version="1.0.8")
893
- def load_feature_views_from_dataset(self, dataset: Dataset) -> List[Union[FeatureView, FeatureViewSlice]]:
960
+ @dispatch_decorator()
961
+ def load_feature_views_from_dataset(self, ds: dataset.Dataset) -> List[Union[FeatureView, FeatureViewSlice]]:
894
962
  """
895
963
  Retrieve FeatureViews used during Dataset construction.
896
964
 
897
965
  Args:
898
- dataset: Dataset object created from feature store.
966
+ ds: Dataset object created from feature store.
899
967
 
900
968
  Returns:
901
969
  List of FeatureViews used during Dataset construction.
@@ -903,56 +971,59 @@ class FeatureStore:
903
971
  Raises:
904
972
  ValueError: if dataset object is not generated from feature store.
905
973
  """
906
- serialized_objs = dataset.load_features()
907
- if serialized_objs is None:
908
- raise ValueError(f"Dataset {dataset} does not contain valid feature view information.")
974
+ assert ds.selected_version is not None
975
+ source_meta = ds.selected_version._get_metadata()
976
+ if (
977
+ source_meta is None
978
+ or not isinstance(source_meta.properties, FeatureStoreMetadata)
979
+ or source_meta.properties.serialized_feature_views is None
980
+ ):
981
+ raise ValueError(f"Dataset {ds} does not contain valid feature view information.")
909
982
 
910
- return self._load_serialized_feature_objects(serialized_objs)
983
+ return self._load_serialized_feature_objects(source_meta.properties.serialized_feature_views)
911
984
 
912
- @dispatch_decorator(prpr_version="1.0.8")
913
- def clear(self) -> None:
985
+ @dispatch_decorator()
986
+ def _clear(self, dryrun: bool = True) -> None:
914
987
  """
915
- Clear all feature store internal objects including feature views, entities etc. Note feature store
916
- instance (snowflake schema) won't be deleted. Use snowflake to delete feature store instance.
988
+ Clear all feature views and entities. Note Feature Store schema and metadata will NOT be purged
989
+ together. Use SQL to delete schema and metadata instead.
917
990
 
918
- Raises:
919
- SnowflakeMLException: [RuntimeError] Failed to clear feature store.
991
+ Args:
992
+ dryrun: Print a list of objects will be deleted but not actually perform the deletion when true.
920
993
  """
921
- try:
922
- result = self._session.sql(
923
- f"""
924
- SELECT *
925
- FROM {self._config.database}.INFORMATION_SCHEMA.SCHEMATA
926
- WHERE SCHEMA_NAME = '{self._config.schema.resolved()}'
927
- """
928
- ).collect()
929
- if len(result) == 0:
930
- return
931
-
932
- object_types = ["DYNAMIC TABLES", "TABLES", "VIEWS", "TASKS"]
933
- for obj_type in object_types:
934
- all_object_rows = self._find_object(obj_type, None)
935
- for row in all_object_rows:
936
- obj_name = self._get_fully_qualified_name(SqlIdentifier(row["name"], case_sensitive=True))
937
- self._session.sql(f"DROP {obj_type[:-1]} {obj_name}").collect()
938
- logger.info(f"Deleted {obj_type[:-1]}: {obj_name}.")
939
-
940
- entity_tags = self._find_object("TAGS", SqlIdentifier(_ENTITY_TAG_PREFIX), prefix_match=True)
941
- all_tags = [
942
- _FEATURE_VIEW_ENTITY_TAG,
943
- _FEATURE_VIEW_TS_COL_TAG,
944
- _FEATURE_STORE_OBJECT_TAG,
945
- ] + [SqlIdentifier(row["name"], case_sensitive=True) for row in entity_tags]
946
- for tag_name in all_tags:
947
- obj_name = self._get_fully_qualified_name(tag_name)
948
- self._session.sql(f"DROP TAG IF EXISTS {obj_name}").collect()
949
- logger.info(f"Deleted TAG: {obj_name}.")
994
+ warnings.warn(
995
+ "It will clear ALL feature views and entities in this Feature Store. Make sure your role"
996
+ " has sufficient access to all feature views and entities. Insufficient access to some feature"
997
+ " views or entities will leave Feature Store in an incomplete state.",
998
+ stacklevel=2,
999
+ category=UserWarning,
1000
+ )
1001
+
1002
+ all_fvs_df = self.list_feature_views()
1003
+ all_entities_df = self.list_entities()
1004
+ all_fvs_rows = all_fvs_df.collect()
1005
+ all_entities_rows = all_entities_df.collect()
1006
+
1007
+ if dryrun:
1008
+ logger.info(
1009
+ "Following feature views and entities will be deleted."
1010
+ + " Set 'dryrun=False' to perform the actual deletion."
1011
+ )
1012
+ logger.info(f"Total {len(all_fvs_rows)} Feature views to be deleted:")
1013
+ all_fvs_df.show(n=len(all_fvs_rows))
1014
+ logger.info(f"\nTotal {len(all_entities_rows)} entities to be deleted:")
1015
+ all_entities_df.show(n=len(all_entities_rows))
1016
+ return
1017
+
1018
+ for fv_row in all_fvs_rows:
1019
+ fv = self.get_feature_view(
1020
+ SqlIdentifier(fv_row["NAME"], case_sensitive=True).identifier(), fv_row["VERSION"]
1021
+ )
1022
+ self.delete_feature_view(fv)
1023
+
1024
+ for entity_row in all_entities_rows:
1025
+ self.delete_entity(SqlIdentifier(entity_row["NAME"], case_sensitive=True).identifier())
950
1026
 
951
- except Exception as e:
952
- raise snowml_exceptions.SnowflakeMLException(
953
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
954
- original_exception=RuntimeError(f"Failed to clear feature store {self._config.full_schema_path}: {e}."),
955
- ) from e
956
1027
  logger.info(f"Feature store {self._config.full_schema_path} has been cleared.")
957
1028
 
958
1029
  def _get_feature_view_if_exists(self, name: str, version: str) -> FeatureView:
@@ -965,37 +1036,47 @@ class FeatureStore:
965
1036
  )
966
1037
  return existing_fv
967
1038
 
1039
+ def _recompose_join_keys(self, join_key: str) -> List[str]:
1040
+ # ALLOWED_VALUES in TAG will follow format ["key_1,key2,..."]
1041
+ # since keys are already resolved following the SQL identifier rule on the write path,
1042
+ # we simply parse the keys back and wrap them with quotes to preserve cases
1043
+ # Example join_key repr from TAG value: "[key1,key2,key3]"
1044
+ join_keys = join_key[2:-2].split(",")
1045
+ res = []
1046
+ for k in join_keys:
1047
+ res.append(f'"{k}"')
1048
+ return res
1049
+
968
1050
  def _create_dynamic_table(
969
1051
  self,
970
1052
  feature_view_name: SqlIdentifier,
971
1053
  feature_view: FeatureView,
972
1054
  fully_qualified_name: str,
973
1055
  column_descs: str,
974
- entities: str,
1056
+ tagging_clause: str,
975
1057
  schedule_task: bool,
976
1058
  warehouse: SqlIdentifier,
977
- timestamp_col: SqlIdentifier,
978
1059
  block: bool,
979
1060
  override: bool,
980
1061
  ) -> None:
981
1062
  # TODO: cluster by join keys once DT supports that
982
- override_clause = " OR REPLACE" if override else ""
983
- query = f"""CREATE{override_clause} DYNAMIC TABLE {fully_qualified_name} ({column_descs})
984
- TARGET_LAG = '{'DOWNSTREAM' if schedule_task else feature_view.refresh_freq}'
985
- COMMENT = '{feature_view.desc}'
986
- TAG (
987
- {self._get_fully_qualified_name(_FEATURE_VIEW_ENTITY_TAG)} = '{entities}',
988
- {self._get_fully_qualified_name(_FEATURE_VIEW_TS_COL_TAG)} = '{timestamp_col}',
989
- {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} =
990
- '{_FeatureStoreObjTypes.FEATURE_VIEW.value}'
991
- )
992
- WAREHOUSE = {warehouse}
993
- AS {feature_view.query}
994
- """
995
1063
  try:
1064
+ override_clause = " OR REPLACE" if override else ""
1065
+ query = f"""CREATE{override_clause} DYNAMIC TABLE {fully_qualified_name} ({column_descs})
1066
+ TARGET_LAG = '{'DOWNSTREAM' if schedule_task else feature_view.refresh_freq}'
1067
+ COMMENT = '{feature_view.desc}'
1068
+ TAG (
1069
+ {tagging_clause}
1070
+ )
1071
+ WAREHOUSE = {warehouse}
1072
+ AS {feature_view.query}
1073
+ """
996
1074
  self._session.sql(query).collect(block=block, statement_params=self._telemetry_stmp)
997
1075
 
998
1076
  if schedule_task:
1077
+ task_obj_info = _FeatureStoreObjInfo(
1078
+ _FeatureStoreObjTypes.FEATURE_VIEW_REFRESH_TASK, snowml_version.VERSION
1079
+ )
999
1080
  try:
1000
1081
  self._session.sql(
1001
1082
  f"""CREATE{override_clause} TASK {fully_qualified_name}
@@ -1007,8 +1088,7 @@ class FeatureStore:
1007
1088
  self._session.sql(
1008
1089
  f"""
1009
1090
  ALTER TASK {fully_qualified_name}
1010
- SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}
1011
- ='{_FeatureStoreObjTypes.FEATURE_VIEW_REFRESH_TASK.value}'
1091
+ SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}='{task_obj_info.to_json()}'
1012
1092
  """
1013
1093
  ).collect(statement_params=self._telemetry_stmp)
1014
1094
  self._session.sql(f"ALTER TASK {fully_qualified_name} RESUME").collect(
@@ -1049,57 +1129,6 @@ class FeatureStore:
1049
1129
  category=UserWarning,
1050
1130
  )
1051
1131
 
1052
- def _dump_dataset(
1053
- self,
1054
- df: DataFrame,
1055
- table_name: str,
1056
- join_keys: List[SqlIdentifier],
1057
- spine_timestamp_col: Optional[SqlIdentifier] = None,
1058
- ) -> None:
1059
- if len(df.queries["queries"]) != 1:
1060
- raise snowml_exceptions.SnowflakeMLException(
1061
- error_code=error_codes.INVALID_ARGUMENT,
1062
- original_exception=ValueError(f"Dataset df must contain only one query. Got: {df.queries['queries']}"),
1063
- )
1064
- schema = ", ".join([f"{c.name} {type_utils.convert_sp_to_sf_type(c.datatype)}" for c in df.schema.fields])
1065
- fully_qualified_name = self._get_fully_qualified_name(table_name)
1066
-
1067
- try:
1068
- self._session.sql(
1069
- f"""CREATE TABLE IF NOT EXISTS {fully_qualified_name} ({schema})
1070
- CLUSTER BY ({', '.join(join_keys)})
1071
- TAG ({self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = '')
1072
- """
1073
- ).collect(block=True, statement_params=self._telemetry_stmp)
1074
- except Exception as e:
1075
- raise snowml_exceptions.SnowflakeMLException(
1076
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1077
- original_exception=RuntimeError(f"Failed to create table {fully_qualified_name}: {e}."),
1078
- ) from e
1079
-
1080
- source_query = df.queries["queries"][0]
1081
-
1082
- if spine_timestamp_col is not None:
1083
- join_keys.append(spine_timestamp_col)
1084
-
1085
- _, _, dest_alias, _ = identifier.parse_schema_level_object_identifier(fully_qualified_name)
1086
- source_alias = f"{dest_alias}_source"
1087
- join_cond = " AND ".join([f"{dest_alias}.{k} = {source_alias}.{k}" for k in join_keys])
1088
- update_clause = ", ".join([f"{dest_alias}.{c} = {source_alias}.{c}" for c in df.columns])
1089
- insert_clause = ", ".join([f"{source_alias}.{c}" for c in df.columns])
1090
- query = f"""
1091
- MERGE INTO {fully_qualified_name} USING ({source_query}) {source_alias} ON {join_cond}
1092
- WHEN MATCHED THEN UPDATE SET {update_clause}
1093
- WHEN NOT MATCHED THEN INSERT ({', '.join(df.columns)}) VALUES ({insert_clause})
1094
- """
1095
- try:
1096
- self._session.sql(query).collect(block=True, statement_params=self._telemetry_stmp)
1097
- except Exception as e:
1098
- raise snowml_exceptions.SnowflakeMLException(
1099
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1100
- original_exception=RuntimeError(f"Failed to create dataset {fully_qualified_name} with merge: {e}."),
1101
- ) from e
1102
-
1103
1132
  def _validate_entity_exists(self, name: SqlIdentifier) -> bool:
1104
1133
  full_entity_tag_name = self._get_entity_name(name)
1105
1134
  found_rows = self._find_object("TAGS", full_entity_tag_name)
@@ -1112,14 +1141,6 @@ class FeatureStore:
1112
1141
  spine_timestamp_col: Optional[SqlIdentifier],
1113
1142
  include_feature_view_timestamp_col: bool,
1114
1143
  ) -> Tuple[DataFrame, List[SqlIdentifier]]:
1115
- if len(spine_df.queries["queries"]) != 1:
1116
- raise snowml_exceptions.SnowflakeMLException(
1117
- error_code=error_codes.INVALID_ARGUMENT,
1118
- original_exception=ValueError(
1119
- f"spine_df must contain only one query. Got: {spine_df.queries['queries']}"
1120
- ),
1121
- )
1122
-
1123
1144
  for f in features:
1124
1145
  f = f.feature_view_ref if isinstance(f, FeatureViewSlice) else f
1125
1146
  if f.status == FeatureViewStatus.DRAFT:
@@ -1141,7 +1162,7 @@ class FeatureStore:
1141
1162
  self._asof_join_enabled = self._is_asof_join_enabled()
1142
1163
 
1143
1164
  # TODO: leverage Snowpark dataframe for more concise syntax once it supports AsOfJoin
1144
- query = spine_df.queries["queries"][0]
1165
+ query = spine_df.queries["queries"][-1]
1145
1166
  layer = 0
1146
1167
  for f in features:
1147
1168
  if isinstance(f, FeatureViewSlice):
@@ -1150,7 +1171,7 @@ class FeatureStore:
1150
1171
  else:
1151
1172
  cols = f.feature_names
1152
1173
 
1153
- join_keys = [k for e in f.entities for k in e.join_keys]
1174
+ join_keys = list({k for e in f.entities for k in e.join_keys})
1154
1175
  join_keys_str = ", ".join(join_keys)
1155
1176
  assert f.version is not None
1156
1177
  join_table_name = f.fully_qualified_name()
@@ -1199,7 +1220,15 @@ class FeatureStore:
1199
1220
  """
1200
1221
  layer += 1
1201
1222
 
1202
- return self._session.sql(query), join_keys
1223
+ # TODO: construct result dataframe with datframe APIs once ASOF join is supported natively.
1224
+ # Below code manually construct result dataframe from private members of spine dataframe, which
1225
+ # likely will cause unintentional issues. This setp is needed because spine_df might contains
1226
+ # prerequisite queries and post actions that must be carried over to result dataframe.
1227
+ result_df = self._session.sql(query)
1228
+ result_df._plan.queries = spine_df._plan.queries[:-1] + result_df._plan.queries
1229
+ result_df._plan.post_actions = spine_df._plan.post_actions
1230
+
1231
+ return result_df, join_keys
1203
1232
 
1204
1233
  def _check_database_exists_or_throw(self) -> None:
1205
1234
  resolved_db_name = self._config.database.resolved()
@@ -1227,8 +1256,7 @@ class FeatureStore:
1227
1256
  for tag_name in to_sql_identifiers(
1228
1257
  [
1229
1258
  _FEATURE_STORE_OBJECT_TAG,
1230
- _FEATURE_VIEW_ENTITY_TAG,
1231
- _FEATURE_VIEW_TS_COL_TAG,
1259
+ _FEATURE_VIEW_METADATA_TAG,
1232
1260
  ]
1233
1261
  ):
1234
1262
  tag_result = self._find_object("TAGS", tag_name)
@@ -1340,7 +1368,8 @@ class FeatureStore:
1340
1368
 
1341
1369
  # Part 4: join original spine table with window table
1342
1370
  prefix_f_only_cols = to_sql_identifiers(
1343
- [f"{temp_prefix}{name.resolved()}" for name in f_only_cols], case_sensitive=True
1371
+ [f"{temp_prefix}{name.resolved()}" for name in f_only_cols],
1372
+ case_sensitive=True,
1344
1373
  )
1345
1374
  last_select = f"""
1346
1375
  SELECT
@@ -1373,7 +1402,10 @@ class FeatureStore:
1373
1402
  return dynamic_table_results + view_results
1374
1403
 
1375
1404
  def _update_feature_view_status(self, feature_view: FeatureView, operation: str) -> FeatureView:
1376
- assert operation in ["RESUME", "SUSPEND"], f"Operation: {operation} not supported"
1405
+ assert operation in [
1406
+ "RESUME",
1407
+ "SUSPEND",
1408
+ ], f"Operation: {operation} not supported"
1377
1409
  if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
1378
1410
  raise snowml_exceptions.SnowflakeMLException(
1379
1411
  error_code=error_codes.NOT_FOUND,
@@ -1397,17 +1429,76 @@ class FeatureStore:
1397
1429
  logger.info(f"Successfully {operation} FeatureView {feature_view.name}/{feature_view.version}.")
1398
1430
  return self.get_feature_view(feature_view.name, feature_view.version)
1399
1431
 
1400
- def _find_feature_views(
1432
+ def _optimized_find_feature_views(
1401
1433
  self, entity_name: SqlIdentifier, feature_view_name: Optional[SqlIdentifier]
1402
- ) -> List[FeatureView]:
1434
+ ) -> DataFrame:
1403
1435
  if not self._validate_entity_exists(entity_name):
1404
- return []
1436
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1405
1437
 
1438
+ # TODO: this can be optimized further by directly getting all possible FVs and filter by tag
1439
+ # it's easier to rewrite the code once we can remove the tag_reference path
1406
1440
  all_fvs = self._get_fv_backend_representations(object_name=None)
1407
1441
  fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
1408
1442
 
1409
1443
  if len(fv_maps.keys()) == 0:
1410
- return []
1444
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1445
+
1446
+ filter_clause = f"WHERE OBJECT_NAME LIKE '{feature_view_name.resolved()}%'" if feature_view_name else ""
1447
+ try:
1448
+ res = self._session.sql(
1449
+ f"""
1450
+ SELECT
1451
+ OBJECT_NAME
1452
+ FROM TABLE(
1453
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1454
+ TAG_NAME => '{self._get_fully_qualified_name(self._get_entity_name(entity_name))}'
1455
+ )
1456
+ ) {filter_clause}"""
1457
+ ).collect(statement_params=self._telemetry_stmp)
1458
+ except Exception as e:
1459
+ raise snowml_exceptions.SnowflakeMLException(
1460
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1461
+ original_exception=RuntimeError(f"Failed to find feature views' by entity {entity_name}: {e}"),
1462
+ ) from e
1463
+
1464
+ output_values: List[List[Any]] = []
1465
+ for r in res:
1466
+ row = fv_maps[SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)]
1467
+ self._extract_feature_view_info(row, output_values)
1468
+
1469
+ return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
1470
+
1471
+ def _extract_feature_view_info(self, row: Row, output_values: List[List[Any]]) -> None:
1472
+ name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
1473
+ m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
1474
+ if m is None:
1475
+ raise snowml_exceptions.SnowflakeMLException(
1476
+ error_code=error_codes.INTERNAL_SNOWML_ERROR,
1477
+ original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
1478
+ )
1479
+
1480
+ fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1481
+
1482
+ values: List[Any] = []
1483
+ values.append(name)
1484
+ values.append(version)
1485
+ values.append(row["database_name"])
1486
+ values.append(row["schema_name"])
1487
+ values.append(row["created_on"])
1488
+ values.append(row["owner"])
1489
+ values.append(row["comment"])
1490
+ values.append(fv_metadata.entities)
1491
+ output_values.append(values)
1492
+
1493
+ def _find_feature_views(self, entity_name: SqlIdentifier, feature_view_name: Optional[SqlIdentifier]) -> DataFrame:
1494
+ if not self._validate_entity_exists(entity_name):
1495
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1496
+
1497
+ all_fvs = self._get_fv_backend_representations(object_name=None)
1498
+ fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
1499
+
1500
+ if len(fv_maps.keys()) == 0:
1501
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1411
1502
 
1412
1503
  # NOTE: querying INFORMATION_SCHEMA for Entity lineage can be expensive depending on how many active
1413
1504
  # FeatureViews there are. If this ever become an issue, consider exploring improvements.
@@ -1424,7 +1515,7 @@ class FeatureStore:
1424
1515
  )
1425
1516
  )
1426
1517
  WHERE LEVEL = 'TABLE'
1427
- AND TAG_NAME = '{_FEATURE_VIEW_ENTITY_TAG}'
1518
+ AND TAG_NAME = '{_FEATURE_VIEW_METADATA_TAG}'
1428
1519
  """
1429
1520
  for fv_name in fv_maps.keys()
1430
1521
  ]
@@ -1436,21 +1527,22 @@ class FeatureStore:
1436
1527
  original_exception=RuntimeError(f"Failed to retrieve feature views' information: {e}"),
1437
1528
  ) from e
1438
1529
 
1439
- entities = self.list_entities().collect()
1440
- outputs = []
1530
+ output_values: List[List[Any]] = []
1441
1531
  for r in results:
1442
- if entity_name == SqlIdentifier(r["TAG_VALUE"], case_sensitive=True):
1443
- fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
1444
- fv_name = SqlIdentifier(fv_name, case_sensitive=True)
1445
- obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
1446
- if feature_view_name is not None:
1447
- if fv_name == feature_view_name:
1448
- outputs.append(self._compose_feature_view(fv_maps[obj_name], entities))
1532
+ fv_metadata = _FeatureViewMetadata.from_json(r["TAG_VALUE"])
1533
+ for retrieved_entity in fv_metadata.entities:
1534
+ if entity_name == SqlIdentifier(retrieved_entity, case_sensitive=True):
1535
+ fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
1536
+ fv_name = SqlIdentifier(fv_name, case_sensitive=True)
1537
+ obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
1538
+ if feature_view_name is not None:
1539
+ if fv_name == feature_view_name:
1540
+ self._extract_feature_view_info(fv_maps[obj_name], output_values)
1541
+ else:
1542
+ continue
1449
1543
  else:
1450
- continue
1451
- else:
1452
- outputs.append(self._compose_feature_view(fv_maps[obj_name], entities))
1453
- return outputs
1544
+ self._extract_feature_view_info(fv_maps[obj_name], output_values)
1545
+ return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
1454
1546
 
1455
1547
  def _compose_feature_view(self, row: Row, entity_list: List[Row]) -> FeatureView:
1456
1548
  def find_and_compose_entity(name: str) -> Entity:
@@ -1459,7 +1551,7 @@ class FeatureStore:
1459
1551
  if e["NAME"] == name:
1460
1552
  return Entity(
1461
1553
  name=SqlIdentifier(e["NAME"], case_sensitive=True).identifier(),
1462
- join_keys=e["JOIN_KEYS"].strip("[]").split(","),
1554
+ join_keys=self._recompose_join_keys(e["JOIN_KEYS"]),
1463
1555
  desc=e["DESC"],
1464
1556
  )
1465
1557
  raise RuntimeError(f"Cannot find entity {name} from retrieved entity list: {entity_list}")
@@ -1473,14 +1565,17 @@ class FeatureStore:
1473
1565
  original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
1474
1566
  )
1475
1567
 
1568
+ fv_name = FeatureView._get_physical_name(name, version)
1569
+ infer_schema_df = self._session.sql(f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}")
1570
+
1476
1571
  if m.group("obj_type") == "DYNAMIC TABLE":
1477
1572
  query = m.group("query")
1478
1573
  df = self._session.sql(query)
1479
1574
  desc = m.group("comment")
1480
- entity_names = m.group("entities")
1481
- entities = [find_and_compose_entity(n) for n in entity_names.split(_FEATURE_VIEW_ENTITY_TAG_DELIMITER)]
1482
- ts_col = m.group("ts_col")
1483
- timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1575
+ fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1576
+ entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1577
+ ts_col = fv_metadata.timestamp_col
1578
+ timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
1484
1579
 
1485
1580
  fv = FeatureView._construct_feature_view(
1486
1581
  name=name,
@@ -1490,9 +1585,7 @@ class FeatureStore:
1490
1585
  desc=desc,
1491
1586
  version=version,
1492
1587
  status=FeatureViewStatus(row["scheduling_state"]),
1493
- feature_descs=self._fetch_column_descs(
1494
- "DYNAMIC TABLE", SqlIdentifier(row["name"], case_sensitive=True)
1495
- ),
1588
+ feature_descs=self._fetch_column_descs("DYNAMIC TABLE", fv_name),
1496
1589
  refresh_freq=row["target_lag"],
1497
1590
  database=self._config.database.identifier(),
1498
1591
  schema=self._config.schema.identifier(),
@@ -1500,16 +1593,17 @@ class FeatureStore:
1500
1593
  refresh_mode=row["refresh_mode"],
1501
1594
  refresh_mode_reason=row["refresh_mode_reason"],
1502
1595
  owner=row["owner"],
1596
+ infer_schema_df=infer_schema_df,
1503
1597
  )
1504
1598
  return fv
1505
1599
  else:
1506
1600
  query = m.group("query")
1507
1601
  df = self._session.sql(query)
1508
1602
  desc = m.group("comment")
1509
- entity_names = m.group("entities")
1510
- entities = [find_and_compose_entity(n) for n in entity_names.split(_FEATURE_VIEW_ENTITY_TAG_DELIMITER)]
1511
- ts_col = m.group("ts_col")
1512
- timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1603
+ fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1604
+ entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1605
+ ts_col = fv_metadata.timestamp_col
1606
+ timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
1513
1607
 
1514
1608
  fv = FeatureView._construct_feature_view(
1515
1609
  name=name,
@@ -1519,7 +1613,7 @@ class FeatureStore:
1519
1613
  desc=desc,
1520
1614
  version=version,
1521
1615
  status=FeatureViewStatus.STATIC,
1522
- feature_descs=self._fetch_column_descs("VIEW", SqlIdentifier(row["name"], case_sensitive=True)),
1616
+ feature_descs=self._fetch_column_descs("VIEW", fv_name),
1523
1617
  refresh_freq=None,
1524
1618
  database=self._config.database.identifier(),
1525
1619
  schema=self._config.schema.identifier(),
@@ -1527,6 +1621,7 @@ class FeatureStore:
1527
1621
  refresh_mode=None,
1528
1622
  refresh_mode_reason=None,
1529
1623
  owner=row["owner"],
1624
+ infer_schema_df=infer_schema_df,
1530
1625
  )
1531
1626
  return fv
1532
1627
 
@@ -1542,7 +1637,10 @@ class FeatureStore:
1542
1637
  return descs
1543
1638
 
1544
1639
  def _find_object(
1545
- self, object_type: str, object_name: Optional[SqlIdentifier], prefix_match: bool = False
1640
+ self,
1641
+ object_type: str,
1642
+ object_name: Optional[SqlIdentifier],
1643
+ prefix_match: bool = False,
1546
1644
  ) -> List[Row]:
1547
1645
  """Try to find an object by given type and name pattern.
1548
1646
 
@@ -1569,7 +1667,7 @@ class FeatureStore:
1569
1667
  search_space, obj_domain = self._obj_search_spaces[object_type]
1570
1668
  all_rows = []
1571
1669
  fs_tag_objects = []
1572
- tag_free_object_types = ["TAGS", "SCHEMAS", "WAREHOUSES"]
1670
+ tag_free_object_types = ["TAGS", "SCHEMAS", "WAREHOUSES", "DATASETS"]
1573
1671
  try:
1574
1672
  search_scope = f"IN {search_space}" if search_space is not None else ""
1575
1673
  all_rows = self._session.sql(f"SHOW {object_type} LIKE '{match_name}' {search_scope}").collect(
@@ -1577,25 +1675,41 @@ class FeatureStore:
1577
1675
  )
1578
1676
  # There could be none-FS objects under FS schema, thus filter on objects with FS special tag.
1579
1677
  if object_type not in tag_free_object_types and len(all_rows) > 0:
1580
- # Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
1581
- # use double quotes to make it case-sensitive.
1582
- queries = [
1583
- f"""
1584
- SELECT OBJECT_NAME
1585
- FROM TABLE(
1586
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1587
- '{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
1588
- '{obj_domain}'
1678
+ if self._use_optimized_tag_ref:
1679
+ fs_obj_rows = self._session.sql(
1680
+ f"""
1681
+ SELECT
1682
+ OBJECT_NAME
1683
+ FROM TABLE(
1684
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1685
+ TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
1686
+ )
1589
1687
  )
1590
- )
1591
- WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
1592
- AND TAG_SCHEMA = '{self._config.schema.resolved()}'
1593
- """
1594
- for row in all_rows
1595
- ]
1596
- fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
1597
- statement_params=self._telemetry_stmp
1598
- )
1688
+ WHERE DOMAIN='{obj_domain}'
1689
+ """
1690
+ ).collect(statement_params=self._telemetry_stmp)
1691
+ else:
1692
+ # TODO: remove this after tag_ref_internal rollout
1693
+ # Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
1694
+ # use double quotes to make it case-sensitive.
1695
+ queries = [
1696
+ f"""
1697
+ SELECT OBJECT_NAME
1698
+ FROM TABLE(
1699
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1700
+ '{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
1701
+ '{obj_domain}'
1702
+ )
1703
+ )
1704
+ WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
1705
+ AND TAG_SCHEMA = '{self._config.schema.resolved()}'
1706
+ """
1707
+ for row in all_rows
1708
+ ]
1709
+ fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
1710
+ statement_params=self._telemetry_stmp
1711
+ )
1712
+
1599
1713
  fs_tag_objects = [row["OBJECT_NAME"] for row in fs_obj_rows]
1600
1714
  except Exception as e:
1601
1715
  raise snowml_exceptions.SnowflakeMLException(
@@ -1641,3 +1755,75 @@ class FeatureStore:
1641
1755
  ),
1642
1756
  )
1643
1757
  return cast(DataFrame, df.drop(exclude_columns))
1758
+
1759
+ def _tag_ref_internal_enabled(self) -> bool:
1760
+ try:
1761
+ self._session.sql(
1762
+ f"""
1763
+ SELECT * FROM TABLE(
1764
+ INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1765
+ TAG_NAME => '{_FEATURE_STORE_OBJECT_TAG}'
1766
+ )
1767
+ ) LIMIT 1;
1768
+ """
1769
+ ).collect()
1770
+ return True
1771
+ except Exception:
1772
+ return False
1773
+
1774
+ def _is_dataset_enabled(self) -> bool:
1775
+ try:
1776
+ self._session.sql(f"SHOW DATASETS IN SCHEMA {self._config.full_schema_path}").collect()
1777
+ return True
1778
+ except SnowparkSQLException as e:
1779
+ if "'DATASETS' does not exist" in e.message:
1780
+ return False
1781
+ raise
1782
+
1783
+ def _check_feature_store_object_versions(self) -> None:
1784
+ versions = self._collapse_object_versions()
1785
+ if len(versions) > 0 and pkg_version.parse(snowml_version.VERSION) < versions[0]:
1786
+ warnings.warn(
1787
+ "The current snowflake-ml-python version out of date, package upgrade recommended "
1788
+ + f"(current={snowml_version.VERSION}, recommended>={str(versions[0])})",
1789
+ stacklevel=2,
1790
+ category=UserWarning,
1791
+ )
1792
+
1793
+ def _collapse_object_versions(self) -> List[pkg_version.Version]:
1794
+ if not self._use_optimized_tag_ref:
1795
+ return []
1796
+
1797
+ query = f"""
1798
+ SELECT
1799
+ TAG_VALUE
1800
+ FROM TABLE(
1801
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1802
+ TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
1803
+ )
1804
+ )
1805
+ """
1806
+ try:
1807
+ res = self._session.sql(query).collect(statement_params=self._telemetry_stmp)
1808
+ except Exception:
1809
+ # since this is a best effort user warning to upgrade pkg versions
1810
+ # we are treating failures as benign error
1811
+ return []
1812
+ versions = set()
1813
+ compatibility_breakage_detected = False
1814
+ for r in res:
1815
+ info = _FeatureStoreObjInfo.from_json(r["TAG_VALUE"])
1816
+ if info.type == _FeatureStoreObjTypes.UNKNOWN:
1817
+ compatibility_breakage_detected = True
1818
+ versions.add(pkg_version.parse(info.pkg_version))
1819
+
1820
+ sorted_versions = sorted(versions, reverse=True)
1821
+ if compatibility_breakage_detected:
1822
+ raise snowml_exceptions.SnowflakeMLException(
1823
+ error_code=error_codes.SNOWML_PACKAGE_OUTDATED,
1824
+ original_exception=RuntimeError(
1825
+ f"The current snowflake-ml-python version {snowml_version.VERSION} is out of date, "
1826
+ + f"please upgrade to at least {sorted_versions[0]}."
1827
+ ),
1828
+ )
1829
+ return sorted_versions