snowflake-ml-python 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. snowflake/ml/_internal/env_utils.py +66 -31
  2. snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
  3. snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
  4. snowflake/ml/_internal/exceptions/error_codes.py +3 -0
  5. snowflake/ml/_internal/lineage/data_source.py +10 -0
  6. snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
  7. snowflake/ml/dataset/__init__.py +10 -0
  8. snowflake/ml/dataset/dataset.py +454 -129
  9. snowflake/ml/dataset/dataset_factory.py +53 -0
  10. snowflake/ml/dataset/dataset_metadata.py +103 -0
  11. snowflake/ml/dataset/dataset_reader.py +202 -0
  12. snowflake/ml/feature_store/feature_store.py +408 -282
  13. snowflake/ml/feature_store/feature_view.py +37 -8
  14. snowflake/ml/fileset/embedded_stage_fs.py +146 -0
  15. snowflake/ml/fileset/sfcfs.py +0 -4
  16. snowflake/ml/fileset/snowfs.py +159 -0
  17. snowflake/ml/fileset/stage_fs.py +1 -4
  18. snowflake/ml/model/__init__.py +2 -2
  19. snowflake/ml/model/_api.py +16 -1
  20. snowflake/ml/model/_client/model/model_impl.py +27 -0
  21. snowflake/ml/model/_client/model/model_version_impl.py +135 -0
  22. snowflake/ml/model/_client/ops/model_ops.py +137 -67
  23. snowflake/ml/model/_client/sql/model.py +16 -14
  24. snowflake/ml/model/_client/sql/model_version.py +109 -1
  25. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
  26. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
  27. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  28. snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
  29. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
  30. snowflake/ml/model/_model_composer/model_composer.py +22 -1
  31. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +22 -0
  32. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +11 -0
  33. snowflake/ml/model/_packager/model_env/model_env.py +41 -0
  34. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -5
  35. snowflake/ml/model/_packager/model_packager.py +0 -3
  36. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
  37. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
  38. snowflake/ml/modeling/_internal/model_trainer.py +7 -0
  39. snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
  40. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +24 -2
  41. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
  42. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -52
  43. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -52
  44. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -52
  45. snowflake/ml/modeling/cluster/birch.py +53 -52
  46. snowflake/ml/modeling/cluster/bisecting_k_means.py +53 -52
  47. snowflake/ml/modeling/cluster/dbscan.py +51 -52
  48. snowflake/ml/modeling/cluster/feature_agglomeration.py +53 -52
  49. snowflake/ml/modeling/cluster/k_means.py +53 -52
  50. snowflake/ml/modeling/cluster/mean_shift.py +51 -52
  51. snowflake/ml/modeling/cluster/mini_batch_k_means.py +53 -52
  52. snowflake/ml/modeling/cluster/optics.py +51 -52
  53. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -52
  54. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -52
  55. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -52
  56. snowflake/ml/modeling/compose/column_transformer.py +53 -52
  57. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -52
  58. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -52
  59. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -52
  60. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -52
  61. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -52
  62. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -52
  63. snowflake/ml/modeling/covariance/min_cov_det.py +51 -52
  64. snowflake/ml/modeling/covariance/oas.py +51 -52
  65. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -52
  66. snowflake/ml/modeling/decomposition/dictionary_learning.py +53 -52
  67. snowflake/ml/modeling/decomposition/factor_analysis.py +53 -52
  68. snowflake/ml/modeling/decomposition/fast_ica.py +53 -52
  69. snowflake/ml/modeling/decomposition/incremental_pca.py +53 -52
  70. snowflake/ml/modeling/decomposition/kernel_pca.py +53 -52
  71. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +53 -52
  72. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +53 -52
  73. snowflake/ml/modeling/decomposition/pca.py +53 -52
  74. snowflake/ml/modeling/decomposition/sparse_pca.py +53 -52
  75. snowflake/ml/modeling/decomposition/truncated_svd.py +53 -52
  76. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +53 -52
  77. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -52
  78. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -52
  79. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -52
  80. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -52
  81. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -52
  82. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -52
  83. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -52
  84. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -52
  85. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -52
  86. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -52
  87. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -52
  88. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -52
  89. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -52
  90. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -52
  91. snowflake/ml/modeling/ensemble/stacking_regressor.py +53 -52
  92. snowflake/ml/modeling/ensemble/voting_classifier.py +53 -52
  93. snowflake/ml/modeling/ensemble/voting_regressor.py +53 -52
  94. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +53 -52
  95. snowflake/ml/modeling/feature_selection/select_fdr.py +53 -52
  96. snowflake/ml/modeling/feature_selection/select_fpr.py +53 -52
  97. snowflake/ml/modeling/feature_selection/select_fwe.py +53 -52
  98. snowflake/ml/modeling/feature_selection/select_k_best.py +53 -52
  99. snowflake/ml/modeling/feature_selection/select_percentile.py +53 -52
  100. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +53 -52
  101. snowflake/ml/modeling/feature_selection/variance_threshold.py +53 -52
  102. snowflake/ml/modeling/framework/base.py +63 -36
  103. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -52
  104. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -52
  105. snowflake/ml/modeling/impute/iterative_imputer.py +53 -52
  106. snowflake/ml/modeling/impute/knn_imputer.py +53 -52
  107. snowflake/ml/modeling/impute/missing_indicator.py +53 -52
  108. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +53 -52
  109. snowflake/ml/modeling/kernel_approximation/nystroem.py +53 -52
  110. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +53 -52
  111. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +53 -52
  112. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +53 -52
  113. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -52
  114. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -52
  115. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -52
  116. snowflake/ml/modeling/linear_model/ard_regression.py +51 -52
  117. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -52
  118. snowflake/ml/modeling/linear_model/elastic_net.py +51 -52
  119. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -52
  120. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -52
  121. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -52
  122. snowflake/ml/modeling/linear_model/lars.py +51 -52
  123. snowflake/ml/modeling/linear_model/lars_cv.py +51 -52
  124. snowflake/ml/modeling/linear_model/lasso.py +51 -52
  125. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -52
  126. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -52
  127. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -52
  128. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -52
  129. snowflake/ml/modeling/linear_model/linear_regression.py +51 -52
  130. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -52
  131. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -52
  132. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -52
  133. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -52
  134. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -52
  135. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -52
  136. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -52
  137. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -52
  138. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -52
  139. snowflake/ml/modeling/linear_model/perceptron.py +51 -52
  140. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -52
  141. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -52
  142. snowflake/ml/modeling/linear_model/ridge.py +51 -52
  143. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -52
  144. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -52
  145. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -52
  146. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -52
  147. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -52
  148. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -52
  149. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -52
  150. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -52
  151. snowflake/ml/modeling/manifold/isomap.py +53 -52
  152. snowflake/ml/modeling/manifold/mds.py +53 -52
  153. snowflake/ml/modeling/manifold/spectral_embedding.py +53 -52
  154. snowflake/ml/modeling/manifold/tsne.py +53 -52
  155. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -52
  156. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -52
  157. snowflake/ml/modeling/model_selection/grid_search_cv.py +21 -23
  158. snowflake/ml/modeling/model_selection/randomized_search_cv.py +38 -20
  159. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -52
  160. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -52
  161. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -52
  162. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -52
  163. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -52
  164. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -52
  165. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -52
  166. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -52
  167. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -52
  168. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -52
  169. snowflake/ml/modeling/neighbors/kernel_density.py +51 -52
  170. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -52
  171. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -52
  172. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -52
  173. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +53 -52
  174. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -52
  175. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -52
  176. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +53 -52
  177. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -52
  178. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -52
  179. snowflake/ml/modeling/pipeline/pipeline.py +514 -32
  180. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +12 -0
  181. snowflake/ml/modeling/preprocessing/polynomial_features.py +53 -52
  182. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -52
  183. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -52
  184. snowflake/ml/modeling/svm/linear_svc.py +51 -52
  185. snowflake/ml/modeling/svm/linear_svr.py +51 -52
  186. snowflake/ml/modeling/svm/nu_svc.py +51 -52
  187. snowflake/ml/modeling/svm/nu_svr.py +51 -52
  188. snowflake/ml/modeling/svm/svc.py +51 -52
  189. snowflake/ml/modeling/svm/svr.py +51 -52
  190. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -52
  191. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -52
  192. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -52
  193. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -52
  194. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -52
  195. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -52
  196. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -52
  197. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -52
  198. snowflake/ml/registry/model_registry.py +3 -149
  199. snowflake/ml/version.py +1 -1
  200. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +63 -2
  201. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/RECORD +204 -196
  202. snowflake/ml/registry/_artifact_manager.py +0 -156
  203. snowflake/ml/registry/artifact.py +0 -46
  204. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
  205. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
  206. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -8,13 +8,17 @@ import re
8
8
  import warnings
9
9
  from dataclasses import dataclass
10
10
  from enum import Enum
11
- from typing import Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast
11
+ from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast
12
12
 
13
+ import packaging.version as pkg_version
14
+ import snowflake.ml.version as snowml_version
13
15
  from pytimeparse.timeparse import timeparse
14
16
  from typing_extensions import Concatenate, ParamSpec
15
17
 
18
+ from snowflake.ml import dataset
16
19
  from snowflake.ml._internal import telemetry
17
20
  from snowflake.ml._internal.exceptions import (
21
+ dataset_errors,
18
22
  error_codes,
19
23
  exceptions as snowml_exceptions,
20
24
  )
@@ -23,12 +27,8 @@ from snowflake.ml._internal.utils.sql_identifier import (
23
27
  SqlIdentifier,
24
28
  to_sql_identifiers,
25
29
  )
26
- from snowflake.ml.dataset.dataset import Dataset, FeatureStoreMetadata
27
- from snowflake.ml.feature_store.entity import (
28
- _ENTITY_NAME_LENGTH_LIMIT,
29
- _FEATURE_VIEW_ENTITY_TAG_DELIMITER,
30
- Entity,
31
- )
30
+ from snowflake.ml.dataset.dataset_metadata import FeatureStoreMetadata
31
+ from snowflake.ml.feature_store.entity import _ENTITY_NAME_LENGTH_LIMIT, Entity
32
32
  from snowflake.ml.feature_store.feature_view import (
33
33
  _FEATURE_OBJ_TYPE,
34
34
  _FEATURE_VIEW_NAME_DELIMITER,
@@ -37,11 +37,17 @@ from snowflake.ml.feature_store.feature_view import (
37
37
  FeatureViewSlice,
38
38
  FeatureViewStatus,
39
39
  FeatureViewVersion,
40
+ _FeatureViewMetadata,
40
41
  )
41
42
  from snowflake.snowpark import DataFrame, Row, Session, functions as F
42
- from snowflake.snowpark._internal import type_utils, utils as snowpark_utils
43
43
  from snowflake.snowpark.exceptions import SnowparkSQLException
44
- from snowflake.snowpark.types import StructField
44
+ from snowflake.snowpark.types import (
45
+ ArrayType,
46
+ StringType,
47
+ StructField,
48
+ StructType,
49
+ TimestampType,
50
+ )
45
51
 
46
52
  _Args = ParamSpec("_Args")
47
53
  _RT = TypeVar("_RT")
@@ -49,38 +55,80 @@ _RT = TypeVar("_RT")
49
55
  logger = logging.getLogger(__name__)
50
56
 
51
57
  _ENTITY_TAG_PREFIX = "SNOWML_FEATURE_STORE_ENTITY_"
52
- _FEATURE_VIEW_ENTITY_TAG = "SNOWML_FEATURE_STORE_FV_ENTITIES"
53
- _FEATURE_VIEW_TS_COL_TAG = "SNOWML_FEATURE_STORE_FV_TS_COL"
54
58
  _FEATURE_STORE_OBJECT_TAG = "SNOWML_FEATURE_STORE_OBJECT"
59
+ _FEATURE_VIEW_METADATA_TAG = "SNOWML_FEATURE_VIEW_METADATA"
60
+
61
+
62
+ @dataclass(frozen=True)
63
+ class _FeatureStoreObjInfo:
64
+ type: _FeatureStoreObjTypes
65
+ pkg_version: str
66
+
67
+ def to_json(self) -> str:
68
+ state_dict = self.__dict__.copy()
69
+ state_dict["type"] = state_dict["type"].value
70
+ return json.dumps(state_dict)
71
+
72
+ @classmethod
73
+ def from_json(cls, json_str: str) -> _FeatureStoreObjInfo:
74
+ json_dict = json.loads(json_str)
75
+ # since we may introduce new fields in the json blob in the future,
76
+ # in order to guarantee compatibility, we need to select ones that can be
77
+ # decoded in the current version
78
+ state_dict = {}
79
+ state_dict["type"] = _FeatureStoreObjTypes.parse(json_dict["type"])
80
+ state_dict["pkg_version"] = json_dict["pkg_version"]
81
+ return cls(**state_dict) # type: ignore[arg-type]
55
82
 
56
83
 
57
84
  # TODO: remove "" after dataset is updated
58
85
  class _FeatureStoreObjTypes(Enum):
59
- FEATURE_VIEW = "FEATURE_VIEW"
86
+ UNKNOWN = "UNKNOWN" # for forward compatibility
87
+ MANAGED_FEATURE_VIEW = "MANAGED_FEATURE_VIEW"
88
+ EXTERNAL_FEATURE_VIEW = "EXTERNAL_FEATURE_VIEW"
60
89
  FEATURE_VIEW_REFRESH_TASK = "FEATURE_VIEW_REFRESH_TASK"
61
90
  TRAINING_DATA = ""
62
91
 
92
+ @classmethod
93
+ def parse(cls, val: str) -> _FeatureStoreObjTypes:
94
+ try:
95
+ return cls(val)
96
+ except ValueError:
97
+ return cls.UNKNOWN
98
+
63
99
 
64
100
  _PROJECT = "FeatureStore"
65
101
  _DT_OR_VIEW_QUERY_PATTERN = re.compile(
66
102
  r"""CREATE\ (OR\ REPLACE\ )?(?P<obj_type>(DYNAMIC\ TABLE|VIEW))\ .*
67
103
  COMMENT\ =\ '(?P<comment>.*)'\s*
68
- TAG.*?{entity_tag}\ =\ '(?P<entities>.*?)',\n
69
- .*?{ts_col_tag}\ =\ '(?P<ts_col>.*?)',?.*?
104
+ TAG.*?{fv_metadata_tag}\ =\ '(?P<fv_metadata>.*?)',?.*?
70
105
  AS\ (?P<query>.*)
71
106
  """.format(
72
- entity_tag=_FEATURE_VIEW_ENTITY_TAG, ts_col_tag=_FEATURE_VIEW_TS_COL_TAG
107
+ fv_metadata_tag=_FEATURE_VIEW_METADATA_TAG,
73
108
  ),
74
109
  flags=re.DOTALL | re.IGNORECASE | re.X,
75
110
  )
76
111
 
112
+ _LIST_FEATURE_VIEW_SCHEMA = StructType(
113
+ [
114
+ StructField("name", StringType()),
115
+ StructField("version", StringType()),
116
+ StructField("database_name", StringType()),
117
+ StructField("schema_name", StringType()),
118
+ StructField("created_on", TimestampType()),
119
+ StructField("owner", StringType()),
120
+ StructField("desc", StringType()),
121
+ StructField("entities", ArrayType(StringType())),
122
+ ]
123
+ )
124
+
77
125
 
78
126
  class CreationMode(Enum):
79
127
  FAIL_IF_NOT_EXIST = 1
80
128
  CREATE_IF_NOT_EXIST = 2
81
129
 
82
130
 
83
- @dataclass
131
+ @dataclass(frozen=True)
84
132
  class _FeatureStoreConfig:
85
133
  database: SqlIdentifier
86
134
  schema: SqlIdentifier
@@ -111,14 +159,14 @@ def switch_warehouse(
111
159
  return wrapper
112
160
 
113
161
 
114
- def dispatch_decorator(
115
- prpr_version: str,
116
- ) -> Callable[[Callable[Concatenate[FeatureStore, _Args], _RT]], Callable[Concatenate[FeatureStore, _Args], _RT],]:
162
+ def dispatch_decorator() -> Callable[
163
+ [Callable[Concatenate[FeatureStore, _Args], _RT]],
164
+ Callable[Concatenate[FeatureStore, _Args], _RT],
165
+ ]:
117
166
  def decorator(
118
167
  f: Callable[Concatenate[FeatureStore, _Args], _RT]
119
168
  ) -> Callable[Concatenate[FeatureStore, _Args], _RT]:
120
169
  @telemetry.send_api_usage_telemetry(project=_PROJECT)
121
- @snowpark_utils.private_preview(version=prpr_version)
122
170
  @switch_warehouse
123
171
  @functools.wraps(f)
124
172
  def wrap(self: FeatureStore, /, *args: _Args.args, **kargs: _Args.kwargs) -> _RT:
@@ -135,7 +183,6 @@ class FeatureStore:
135
183
  """
136
184
 
137
185
  @telemetry.send_api_usage_telemetry(project=_PROJECT)
138
- @snowpark_utils.private_preview(version="1.0.8")
139
186
  def __init__(
140
187
  self,
141
188
  session: Session,
@@ -178,7 +225,7 @@ class FeatureStore:
178
225
  # search space used in query "SHOW <object_TYPE> LIKE <object_name> IN <search_space>"
179
226
  # object domain used in query "TAG_REFERENCE(<object_name>, <object_domain>)"
180
227
  self._obj_search_spaces = {
181
- "TABLES": (self._config.full_schema_path, "TABLE"),
228
+ "DATASETS": (self._config.full_schema_path, "DATASET"),
182
229
  "DYNAMIC TABLES": (self._config.full_schema_path, "TABLE"),
183
230
  "VIEWS": (self._config.full_schema_path, "TABLE"),
184
231
  "SCHEMAS": (f"DATABASE {self._config.database}", "SCHEMA"),
@@ -200,8 +247,7 @@ class FeatureStore:
200
247
  )
201
248
  for tag in to_sql_identifiers(
202
249
  [
203
- _FEATURE_VIEW_ENTITY_TAG,
204
- _FEATURE_VIEW_TS_COL_TAG,
250
+ _FEATURE_VIEW_METADATA_TAG,
205
251
  ]
206
252
  ):
207
253
  self._session.sql(f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(tag)}").collect(
@@ -209,8 +255,7 @@ class FeatureStore:
209
255
  )
210
256
 
211
257
  self._session.sql(
212
- f"""CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}
213
- ALLOWED_VALUES {','.join([f"'{v.value}'" for v in _FeatureStoreObjTypes])}"""
258
+ f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}"
214
259
  ).collect(statement_params=self._telemetry_stmp)
215
260
  except Exception as e:
216
261
  self.clear()
@@ -219,10 +264,12 @@ class FeatureStore:
219
264
  original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
220
265
  )
221
266
 
267
+ # TODO: remove this after tag_ref_internal rollout
268
+ self._use_optimized_tag_ref = self._tag_ref_internal_enabled()
269
+ self._check_feature_store_object_versions()
222
270
  logger.info(f"Successfully connected to feature store: {self._config.full_schema_path}.")
223
271
 
224
272
  @telemetry.send_api_usage_telemetry(project=_PROJECT)
225
- @snowpark_utils.private_preview(version="1.0.12")
226
273
  def update_default_warehouse(self, warehouse_name: str) -> None:
227
274
  """Update default warehouse for feature store.
228
275
 
@@ -242,7 +289,7 @@ class FeatureStore:
242
289
 
243
290
  self._default_warehouse = warehouse
244
291
 
245
- @dispatch_decorator(prpr_version="1.0.8")
292
+ @dispatch_decorator()
246
293
  def register_entity(self, entity: Entity) -> Entity:
247
294
  """
248
295
  Register Entity in the FeatureStore.
@@ -268,13 +315,13 @@ class FeatureStore:
268
315
  return entity
269
316
 
270
317
  # allowed_values will add double-quotes around each value, thus use resolved str here.
271
- join_keys = [f"'{key.resolved()}'" for key in entity.join_keys]
318
+ join_keys = [f"{key.resolved()}" for key in entity.join_keys]
272
319
  join_keys_str = ",".join(join_keys)
273
320
  full_tag_name = self._get_fully_qualified_name(tag_name)
274
321
  try:
275
322
  self._session.sql(
276
323
  f"""CREATE TAG IF NOT EXISTS {full_tag_name}
277
- ALLOWED_VALUES {join_keys_str}
324
+ ALLOWED_VALUES '{join_keys_str}'
278
325
  COMMENT = '{entity.desc}'
279
326
  """
280
327
  ).collect(statement_params=self._telemetry_stmp)
@@ -289,7 +336,7 @@ class FeatureStore:
289
336
  return self.get_entity(entity.name)
290
337
 
291
338
  # TODO: add support to update column desc once SNOW-894249 is fixed
292
- @dispatch_decorator(prpr_version="1.0.8")
339
+ @dispatch_decorator()
293
340
  def register_feature_view(
294
341
  self,
295
342
  feature_view: FeatureView,
@@ -342,7 +389,6 @@ class FeatureStore:
342
389
  ),
343
390
  )
344
391
 
345
- # TODO: ideally we should move this to FeatureView creation time
346
392
  for e in feature_view.entities:
347
393
  if not self._validate_entity_exists(e.name):
348
394
  raise snowml_exceptions.SnowflakeMLException(
@@ -358,12 +404,23 @@ class FeatureStore:
358
404
  pass
359
405
 
360
406
  fully_qualified_name = self._get_fully_qualified_name(feature_view_name)
361
- entities = _FEATURE_VIEW_ENTITY_TAG_DELIMITER.join([e.name for e in feature_view.entities])
362
- timestamp_col = (
363
- feature_view.timestamp_col
364
- if feature_view.timestamp_col is not None
365
- else SqlIdentifier(_TIMESTAMP_COL_PLACEHOLDER)
366
- )
407
+ refresh_freq = feature_view.refresh_freq
408
+
409
+ if refresh_freq is not None:
410
+ obj_info = _FeatureStoreObjInfo(_FeatureStoreObjTypes.MANAGED_FEATURE_VIEW, snowml_version.VERSION)
411
+ else:
412
+ obj_info = _FeatureStoreObjInfo(_FeatureStoreObjTypes.EXTERNAL_FEATURE_VIEW, snowml_version.VERSION)
413
+
414
+ tagging_clause = [
415
+ f"{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = '{obj_info.to_json()}'",
416
+ f"{self._get_fully_qualified_name(_FEATURE_VIEW_METADATA_TAG)} = '{feature_view._metadata().to_json()}'",
417
+ ]
418
+ for e in feature_view.entities:
419
+ join_keys = [f"{key.resolved()}" for key in e.join_keys]
420
+ tagging_clause.append(
421
+ f"{self._get_fully_qualified_name(self._get_entity_name(e.name))} = '{','.join(join_keys)}'"
422
+ )
423
+ tagging_clause_str = ",\n".join(tagging_clause)
367
424
 
368
425
  def create_col_desc(col: StructField) -> str:
369
426
  desc = feature_view.feature_descs.get(SqlIdentifier(col.name), None)
@@ -371,7 +428,6 @@ class FeatureStore:
371
428
  return f"{col.name} {desc}"
372
429
 
373
430
  column_descs = ", ".join([f"{create_col_desc(col)}" for col in feature_view.output_schema.fields])
374
- refresh_freq = feature_view.refresh_freq
375
431
 
376
432
  if refresh_freq is not None:
377
433
  schedule_task = refresh_freq != "DOWNSTREAM" and timeparse(refresh_freq) is None
@@ -380,10 +436,9 @@ class FeatureStore:
380
436
  feature_view,
381
437
  fully_qualified_name,
382
438
  column_descs,
383
- entities,
439
+ tagging_clause_str,
384
440
  schedule_task,
385
441
  self._default_warehouse,
386
- timestamp_col,
387
442
  block,
388
443
  overwrite,
389
444
  )
@@ -393,9 +448,7 @@ class FeatureStore:
393
448
  query = f"""CREATE{overwrite_clause} VIEW {fully_qualified_name} ({column_descs})
394
449
  COMMENT = '{feature_view.desc}'
395
450
  TAG (
396
- {_FEATURE_VIEW_ENTITY_TAG} = '{entities}',
397
- {_FEATURE_VIEW_TS_COL_TAG} = '{timestamp_col}',
398
- {_FEATURE_STORE_OBJECT_TAG} = '{_FeatureStoreObjTypes.FEATURE_VIEW.value}'
451
+ {tagging_clause_str}
399
452
  )
400
453
  AS {feature_view.query}
401
454
  """
@@ -406,10 +459,10 @@ class FeatureStore:
406
459
  original_exception=RuntimeError(f"Create view {fully_qualified_name} [\n{query}\n] failed: {e}"),
407
460
  ) from e
408
461
 
409
- logger.info(f"Registered FeatureView {feature_view.name}/{version}.")
462
+ logger.info(f"Registered FeatureView {feature_view.name}/{version} successfully.")
410
463
  return self.get_feature_view(feature_view.name, str(version))
411
464
 
412
- @dispatch_decorator(prpr_version="1.1.0")
465
+ @dispatch_decorator()
413
466
  def update_feature_view(
414
467
  self, name: str, version: str, refresh_freq: Optional[str] = None, warehouse: Optional[str] = None
415
468
  ) -> FeatureView:
@@ -456,7 +509,7 @@ class FeatureStore:
456
509
  ) from e
457
510
  return self.get_feature_view(name=name, version=version)
458
511
 
459
- @dispatch_decorator(prpr_version="1.0.8")
512
+ @dispatch_decorator()
460
513
  def read_feature_view(self, feature_view: FeatureView) -> DataFrame:
461
514
  """
462
515
  Read FeatureView data.
@@ -478,13 +531,12 @@ class FeatureStore:
478
531
 
479
532
  return self._session.sql(f"SELECT * FROM {feature_view.fully_qualified_name()}")
480
533
 
481
- @dispatch_decorator(prpr_version="1.0.8")
534
+ @dispatch_decorator()
482
535
  def list_feature_views(
483
536
  self,
484
537
  entity_name: Optional[str] = None,
485
538
  feature_view_name: Optional[str] = None,
486
- as_dataframe: bool = True,
487
- ) -> Union[Optional[DataFrame], List[FeatureView]]:
539
+ ) -> DataFrame:
488
540
  """
489
541
  List FeatureViews in the FeatureStore.
490
542
  If entity_name is specified, FeatureViews associated with that Entity will be listed.
@@ -493,34 +545,26 @@ class FeatureStore:
493
545
  Args:
494
546
  entity_name: Entity name.
495
547
  feature_view_name: FeatureView name.
496
- as_dataframe: whether the return type should be a DataFrame.
497
548
 
498
549
  Returns:
499
- List of FeatureViews or in a DataFrame representation.
550
+ FeatureViews information as a Snowpark DataFrame.
500
551
  """
501
- if entity_name is not None:
502
- entity_name = SqlIdentifier(entity_name)
503
552
  if feature_view_name is not None:
504
553
  feature_view_name = SqlIdentifier(feature_view_name)
505
554
 
506
555
  if entity_name is not None:
507
- fvs = self._find_feature_views(entity_name, feature_view_name)
556
+ entity_name = SqlIdentifier(entity_name)
557
+ if self._use_optimized_tag_ref:
558
+ return self._optimized_find_feature_views(entity_name, feature_view_name)
559
+ else:
560
+ return self._find_feature_views(entity_name, feature_view_name)
508
561
  else:
509
- fvs = []
510
- entities = self.list_entities().collect()
562
+ output_values: List[List[Any]] = []
511
563
  for row in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
512
- fvs.append(self._compose_feature_view(row, entities))
513
-
514
- if as_dataframe:
515
- result = None
516
- for fv in fvs:
517
- fv_df = fv.to_df(self._session)
518
- result = fv_df if result is None else result.union(fv_df) # type: ignore[attr-defined]
519
- return result
520
- else:
521
- return fvs
564
+ self._extract_feature_view_info(row, output_values)
565
+ return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
522
566
 
523
- @dispatch_decorator(prpr_version="1.0.8")
567
+ @dispatch_decorator()
524
568
  def get_feature_view(self, name: str, version: str) -> FeatureView:
525
569
  """
526
570
  Retrieve previously registered FeatureView.
@@ -549,7 +593,7 @@ class FeatureStore:
549
593
 
550
594
  return self._compose_feature_view(results[0], self.list_entities().collect())
551
595
 
552
- @dispatch_decorator(prpr_version="1.0.8")
596
+ @dispatch_decorator()
553
597
  def resume_feature_view(self, feature_view: FeatureView) -> FeatureView:
554
598
  """
555
599
  Resume a previously suspended FeatureView.
@@ -562,7 +606,7 @@ class FeatureStore:
562
606
  """
563
607
  return self._update_feature_view_status(feature_view, "RESUME")
564
608
 
565
- @dispatch_decorator(prpr_version="1.0.8")
609
+ @dispatch_decorator()
566
610
  def suspend_feature_view(self, feature_view: FeatureView) -> FeatureView:
567
611
  """
568
612
  Suspend an active FeatureView.
@@ -575,7 +619,7 @@ class FeatureStore:
575
619
  """
576
620
  return self._update_feature_view_status(feature_view, "SUSPEND")
577
621
 
578
- @dispatch_decorator(prpr_version="1.0.8")
622
+ @dispatch_decorator()
579
623
  def delete_feature_view(self, feature_view: FeatureView) -> None:
580
624
  """
581
625
  Delete a FeatureView.
@@ -586,6 +630,8 @@ class FeatureStore:
586
630
  Raises:
587
631
  SnowflakeMLException: [ValueError] FeatureView is not registered.
588
632
  """
633
+ # TODO: we should leverage lineage graph to check downstream deps, and block the deletion
634
+ # if there're other FVs depending on this
589
635
  if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
590
636
  raise snowml_exceptions.SnowflakeMLException(
591
637
  error_code=error_codes.NOT_FOUND,
@@ -608,7 +654,7 @@ class FeatureStore:
608
654
 
609
655
  logger.info(f"Deleted FeatureView {feature_view.name}/{feature_view.version}.")
610
656
 
611
- @dispatch_decorator(prpr_version="1.0.8")
657
+ @dispatch_decorator()
612
658
  def list_entities(self) -> DataFrame:
613
659
  """
614
660
  List all Entities in the FeatureStore.
@@ -629,7 +675,7 @@ class FeatureStore:
629
675
  ),
630
676
  )
631
677
 
632
- @dispatch_decorator(prpr_version="1.0.8")
678
+ @dispatch_decorator()
633
679
  def get_entity(self, name: str) -> Entity:
634
680
  """
635
681
  Retrieve previously registered Entity object.
@@ -659,8 +705,7 @@ class FeatureStore:
659
705
  original_exception=ValueError(f"Cannot find Entity with name: {name}."),
660
706
  )
661
707
 
662
- raw_join_keys = result[0]["JOIN_KEYS"]
663
- join_keys = raw_join_keys.strip("[]").split(",")
708
+ join_keys = self._recompose_join_keys(result[0]["JOIN_KEYS"])
664
709
 
665
710
  return Entity._construct_entity(
666
711
  name=SqlIdentifier(result[0]["NAME"], case_sensitive=True).identifier(),
@@ -669,7 +714,7 @@ class FeatureStore:
669
714
  owner=result[0]["OWNER"],
670
715
  )
671
716
 
672
- @dispatch_decorator(prpr_version="1.0.8")
717
+ @dispatch_decorator()
673
718
  def delete_entity(self, name: str) -> None:
674
719
  """
675
720
  Delete a previously registered Entity.
@@ -690,13 +735,13 @@ class FeatureStore:
690
735
  original_exception=ValueError(f"Entity {name} does not exist."),
691
736
  )
692
737
 
693
- active_feature_views = cast(List[FeatureView], self.list_feature_views(entity_name=name, as_dataframe=False))
738
+ active_feature_views = self.list_feature_views(entity_name=name).collect(statement_params=self._telemetry_stmp)
739
+
694
740
  if len(active_feature_views) > 0:
741
+ active_fvs = [r["NAME"] for r in active_feature_views]
695
742
  raise snowml_exceptions.SnowflakeMLException(
696
743
  error_code=error_codes.SNOWML_DELETE_FAILED,
697
- original_exception=ValueError(
698
- f"Cannot delete Entity {name} due to active FeatureViews: {[f.name for f in active_feature_views]}."
699
- ),
744
+ original_exception=ValueError(f"Cannot delete Entity {name} due to active FeatureViews: {active_fvs}."),
700
745
  )
701
746
 
702
747
  tag_name = self._get_fully_qualified_name(self._get_entity_name(name))
@@ -709,7 +754,7 @@ class FeatureStore:
709
754
  ) from e
710
755
  logger.info(f"Deleted Entity {name}.")
711
756
 
712
- @dispatch_decorator(prpr_version="1.0.8")
757
+ @dispatch_decorator()
713
758
  def retrieve_feature_values(
714
759
  self,
715
760
  spine_df: DataFrame,
@@ -757,39 +802,35 @@ class FeatureStore:
757
802
 
758
803
  return df
759
804
 
760
- @dispatch_decorator(prpr_version="1.0.8")
805
+ @dispatch_decorator()
761
806
  def generate_dataset(
762
807
  self,
808
+ name: str,
763
809
  spine_df: DataFrame,
764
810
  features: List[Union[FeatureView, FeatureViewSlice]],
765
- materialized_table: Optional[str] = None,
811
+ version: Optional[str] = None,
766
812
  spine_timestamp_col: Optional[str] = None,
767
813
  spine_label_cols: Optional[List[str]] = None,
768
814
  exclude_columns: Optional[List[str]] = None,
769
- save_mode: str = "errorifexists",
770
815
  include_feature_view_timestamp_col: bool = False,
771
816
  desc: str = "",
772
- ) -> Dataset:
817
+ ) -> dataset.Dataset:
773
818
  """
774
819
  Generate dataset by given source table and feature views.
775
820
 
776
821
  Args:
822
+ name: The name of the Dataset to be generated. Datasets are uniquely identified within a schema
823
+ by their name and version.
777
824
  spine_df: The fact table contains the raw dataset.
778
825
  features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
779
- materialized_table: The destination table where produced result will be stored. If it's none, then result
780
- won't be registered. If materialized_table is provided, then produced result will be written into
781
- the provided table. Note result dataset will be a snowflake clone of registered table.
782
- New data can append on same registered table and previously generated dataset won't be affected.
783
- Default result table name will be a concatenation of materialized_table name and current timestamp.
826
+ version: The version of the Dataset to be generated. If none specified, the current timestamp
827
+ will be used instead.
784
828
  spine_timestamp_col: Name of timestamp column in spine_df that will be used to join
785
829
  time-series features. If spine_timestamp_col is not none, the input features also must have
786
830
  timestamp_col.
787
831
  spine_label_cols: Name of column(s) in spine_df that contains labels.
788
832
  exclude_columns: Column names to exclude from the result dataframe.
789
833
  The underlying storage will still contain the columns.
790
- save_mode: How new data is saved. currently support:
791
- errorifexists: Raise error if registered table already exists.
792
- merge: Merge new data if registered table already exists.
793
834
  include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
794
835
  (if feature view has timestamp column) if set true. Default to false.
795
836
  desc: A description about this dataset.
@@ -798,10 +839,8 @@ class FeatureStore:
798
839
  A Dataset object.
799
840
 
800
841
  Raises:
801
- SnowflakeMLException: [ValueError] save_mode is invalid.
802
842
  SnowflakeMLException: [ValueError] spine_df contains more than one query.
803
- SnowflakeMLException: [ValueError] Materialized_table contains invalid char `.`.
804
- SnowflakeMLException: [ValueError] Materialized_table already exists with save_mode `errorifexists`.
843
+ SnowflakeMLException: [ValueError] Dataset name/version already exists
805
844
  SnowflakeMLException: [ValueError] Snapshot creation failed.
806
845
  SnowflakeMLException: [RuntimeError] Failed to create clone from table.
807
846
  SnowflakeMLException: [RuntimeError] Failed to find resources.
@@ -811,15 +850,6 @@ class FeatureStore:
811
850
  if spine_label_cols is not None:
812
851
  spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
813
852
 
814
- allowed_save_mode = {"errorifexists", "merge"}
815
- if save_mode.lower() not in allowed_save_mode:
816
- raise snowml_exceptions.SnowflakeMLException(
817
- error_code=error_codes.INVALID_ARGUMENT,
818
- original_exception=ValueError(
819
- f"'{save_mode}' is not supported. Current supported save modes: {','.join(allowed_save_mode)}"
820
- ),
821
- )
822
-
823
853
  if len(spine_df.queries["queries"]) != 1:
824
854
  raise snowml_exceptions.SnowflakeMLException(
825
855
  error_code=error_codes.INVALID_ARGUMENT,
@@ -832,70 +862,55 @@ class FeatureStore:
832
862
  spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
833
863
  )
834
864
 
835
- snapshot_table = None
836
- if materialized_table is not None:
837
- if "." in materialized_table:
838
- raise snowml_exceptions.SnowflakeMLException(
839
- error_code=error_codes.INVALID_ARGUMENT,
840
- original_exception=ValueError(f"materialized_table {materialized_table} contains invalid char `.`"),
841
- )
842
-
843
- # TODO (wezhou) change materialized_table to SqlIdentifier
844
- found_rows = self._find_object("TABLES", SqlIdentifier(materialized_table))
845
- if save_mode.lower() == "errorifexists" and len(found_rows) > 0:
846
- raise snowml_exceptions.SnowflakeMLException(
847
- error_code=error_codes.OBJECT_ALREADY_EXISTS,
848
- original_exception=ValueError(f"Dataset table {materialized_table} already exists."),
849
- )
850
-
851
- self._dump_dataset(result_df, materialized_table, join_keys, spine_timestamp_col)
852
-
853
- snapshot_table = f"{materialized_table}_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}"
854
- snapshot_table = self._get_fully_qualified_name(snapshot_table)
855
- materialized_table = self._get_fully_qualified_name(materialized_table)
856
-
857
- try:
858
- self._session.sql(f"CREATE TABLE {snapshot_table} CLONE {materialized_table}").collect(
859
- statement_params=self._telemetry_stmp
860
- )
861
- except Exception as e:
862
- raise snowml_exceptions.SnowflakeMLException(
863
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
864
- original_exception=RuntimeError(
865
- f"Failed to create clone {materialized_table} from table {snapshot_table}: {e}."
866
- ),
867
- ) from e
868
-
869
- result_df = self._session.sql(f"SELECT * FROM {snapshot_table}")
865
+ # Convert name to fully qualified name if not already fully qualified
866
+ db_name, schema_name, object_name, _ = identifier.parse_schema_level_object_identifier(name)
867
+ name = "{}.{}.{}".format(
868
+ db_name or self._config.database,
869
+ schema_name or self._config.schema,
870
+ object_name,
871
+ )
872
+ version = version or datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
870
873
 
871
874
  if exclude_columns is not None:
872
875
  result_df = self._exclude_columns(result_df, exclude_columns)
873
876
 
874
877
  fs_meta = FeatureStoreMetadata(
875
878
  spine_query=spine_df.queries["queries"][0],
876
- connection_params=vars(self._config),
877
- features=[fv.to_json() for fv in features],
879
+ serialized_feature_views=[fv.to_json() for fv in features],
880
+ spine_timestamp_col=spine_timestamp_col,
878
881
  )
879
882
 
880
- dataset = Dataset(
881
- self._session,
882
- df=result_df,
883
- materialized_table=materialized_table,
884
- snapshot_table=snapshot_table,
885
- timestamp_col=spine_timestamp_col,
886
- label_cols=spine_label_cols,
887
- feature_store_metadata=fs_meta,
888
- desc=desc,
889
- )
890
- return dataset
883
+ try:
884
+ ds: dataset.Dataset = dataset.create_from_dataframe(
885
+ self._session,
886
+ name,
887
+ version,
888
+ input_dataframe=result_df,
889
+ exclude_cols=[spine_timestamp_col],
890
+ label_cols=spine_label_cols,
891
+ properties=fs_meta,
892
+ comment=desc,
893
+ )
894
+ return ds
891
895
 
892
- @dispatch_decorator(prpr_version="1.0.8")
893
- def load_feature_views_from_dataset(self, dataset: Dataset) -> List[Union[FeatureView, FeatureViewSlice]]:
896
+ except dataset_errors.DatasetExistError as e:
897
+ raise snowml_exceptions.SnowflakeMLException(
898
+ error_code=error_codes.OBJECT_ALREADY_EXISTS,
899
+ original_exception=ValueError(str(e)),
900
+ ) from e
901
+ except SnowparkSQLException as e:
902
+ raise snowml_exceptions.SnowflakeMLException(
903
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
904
+ original_exception=RuntimeError(f"An error occurred during Dataset generation: {e}."),
905
+ ) from e
906
+
907
+ @dispatch_decorator()
908
+ def load_feature_views_from_dataset(self, ds: dataset.Dataset) -> List[Union[FeatureView, FeatureViewSlice]]:
894
909
  """
895
910
  Retrieve FeatureViews used during Dataset construction.
896
911
 
897
912
  Args:
898
- dataset: Dataset object created from feature store.
913
+ ds: Dataset object created from feature store.
899
914
 
900
915
  Returns:
901
916
  List of FeatureViews used during Dataset construction.
@@ -903,13 +918,18 @@ class FeatureStore:
903
918
  Raises:
904
919
  ValueError: if dataset object is not generated from feature store.
905
920
  """
906
- serialized_objs = dataset.load_features()
907
- if serialized_objs is None:
908
- raise ValueError(f"Dataset {dataset} does not contain valid feature view information.")
921
+ assert ds.selected_version is not None
922
+ source_meta = ds.selected_version._get_metadata()
923
+ if (
924
+ source_meta is None
925
+ or not isinstance(source_meta.properties, FeatureStoreMetadata)
926
+ or source_meta.properties.serialized_feature_views is None
927
+ ):
928
+ raise ValueError(f"Dataset {ds} does not contain valid feature view information.")
909
929
 
910
- return self._load_serialized_feature_objects(serialized_objs)
930
+ return self._load_serialized_feature_objects(source_meta.properties.serialized_feature_views)
911
931
 
912
- @dispatch_decorator(prpr_version="1.0.8")
932
+ @dispatch_decorator()
913
933
  def clear(self) -> None:
914
934
  """
915
935
  Clear all feature store internal objects including feature views, entities etc. Note feature store
@@ -929,7 +949,11 @@ class FeatureStore:
929
949
  if len(result) == 0:
930
950
  return
931
951
 
932
- object_types = ["DYNAMIC TABLES", "TABLES", "VIEWS", "TASKS"]
952
+ fs_obj_tag = self._find_object("TAGS", SqlIdentifier(_FEATURE_STORE_OBJECT_TAG))
953
+ if len(fs_obj_tag) == 0:
954
+ return
955
+
956
+ object_types = ["DYNAMIC TABLES", "DATASETS", "VIEWS", "TASKS"]
933
957
  for obj_type in object_types:
934
958
  all_object_rows = self._find_object(obj_type, None)
935
959
  for row in all_object_rows:
@@ -939,9 +963,8 @@ class FeatureStore:
939
963
 
940
964
  entity_tags = self._find_object("TAGS", SqlIdentifier(_ENTITY_TAG_PREFIX), prefix_match=True)
941
965
  all_tags = [
942
- _FEATURE_VIEW_ENTITY_TAG,
943
- _FEATURE_VIEW_TS_COL_TAG,
944
966
  _FEATURE_STORE_OBJECT_TAG,
967
+ _FEATURE_VIEW_METADATA_TAG,
945
968
  ] + [SqlIdentifier(row["name"], case_sensitive=True) for row in entity_tags]
946
969
  for tag_name in all_tags:
947
970
  obj_name = self._get_fully_qualified_name(tag_name)
@@ -965,37 +988,47 @@ class FeatureStore:
965
988
  )
966
989
  return existing_fv
967
990
 
991
+ def _recompose_join_keys(self, join_key: str) -> List[str]:
992
+ # ALLOWED_VALUES in TAG will follow format ["key_1,key2,..."]
993
+ # since keys are already resolved following the SQL identifier rule on the write path,
994
+ # we simply parse the keys back and wrap them with quotes to preserve cases
995
+ # Example join_key repr from TAG value: "[key1,key2,key3]"
996
+ join_keys = join_key[2:-2].split(",")
997
+ res = []
998
+ for k in join_keys:
999
+ res.append(f'"{k}"')
1000
+ return res
1001
+
968
1002
  def _create_dynamic_table(
969
1003
  self,
970
1004
  feature_view_name: SqlIdentifier,
971
1005
  feature_view: FeatureView,
972
1006
  fully_qualified_name: str,
973
1007
  column_descs: str,
974
- entities: str,
1008
+ tagging_clause: str,
975
1009
  schedule_task: bool,
976
1010
  warehouse: SqlIdentifier,
977
- timestamp_col: SqlIdentifier,
978
1011
  block: bool,
979
1012
  override: bool,
980
1013
  ) -> None:
981
1014
  # TODO: cluster by join keys once DT supports that
982
- override_clause = " OR REPLACE" if override else ""
983
- query = f"""CREATE{override_clause} DYNAMIC TABLE {fully_qualified_name} ({column_descs})
984
- TARGET_LAG = '{'DOWNSTREAM' if schedule_task else feature_view.refresh_freq}'
985
- COMMENT = '{feature_view.desc}'
986
- TAG (
987
- {self._get_fully_qualified_name(_FEATURE_VIEW_ENTITY_TAG)} = '{entities}',
988
- {self._get_fully_qualified_name(_FEATURE_VIEW_TS_COL_TAG)} = '{timestamp_col}',
989
- {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} =
990
- '{_FeatureStoreObjTypes.FEATURE_VIEW.value}'
991
- )
992
- WAREHOUSE = {warehouse}
993
- AS {feature_view.query}
994
- """
995
1015
  try:
1016
+ override_clause = " OR REPLACE" if override else ""
1017
+ query = f"""CREATE{override_clause} DYNAMIC TABLE {fully_qualified_name} ({column_descs})
1018
+ TARGET_LAG = '{'DOWNSTREAM' if schedule_task else feature_view.refresh_freq}'
1019
+ COMMENT = '{feature_view.desc}'
1020
+ TAG (
1021
+ {tagging_clause}
1022
+ )
1023
+ WAREHOUSE = {warehouse}
1024
+ AS {feature_view.query}
1025
+ """
996
1026
  self._session.sql(query).collect(block=block, statement_params=self._telemetry_stmp)
997
1027
 
998
1028
  if schedule_task:
1029
+ task_obj_info = _FeatureStoreObjInfo(
1030
+ _FeatureStoreObjTypes.FEATURE_VIEW_REFRESH_TASK, snowml_version.VERSION
1031
+ )
999
1032
  try:
1000
1033
  self._session.sql(
1001
1034
  f"""CREATE{override_clause} TASK {fully_qualified_name}
@@ -1007,8 +1040,7 @@ class FeatureStore:
1007
1040
  self._session.sql(
1008
1041
  f"""
1009
1042
  ALTER TASK {fully_qualified_name}
1010
- SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}
1011
- ='{_FeatureStoreObjTypes.FEATURE_VIEW_REFRESH_TASK.value}'
1043
+ SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}='{task_obj_info.to_json()}'
1012
1044
  """
1013
1045
  ).collect(statement_params=self._telemetry_stmp)
1014
1046
  self._session.sql(f"ALTER TASK {fully_qualified_name} RESUME").collect(
@@ -1049,57 +1081,6 @@ class FeatureStore:
1049
1081
  category=UserWarning,
1050
1082
  )
1051
1083
 
1052
- def _dump_dataset(
1053
- self,
1054
- df: DataFrame,
1055
- table_name: str,
1056
- join_keys: List[SqlIdentifier],
1057
- spine_timestamp_col: Optional[SqlIdentifier] = None,
1058
- ) -> None:
1059
- if len(df.queries["queries"]) != 1:
1060
- raise snowml_exceptions.SnowflakeMLException(
1061
- error_code=error_codes.INVALID_ARGUMENT,
1062
- original_exception=ValueError(f"Dataset df must contain only one query. Got: {df.queries['queries']}"),
1063
- )
1064
- schema = ", ".join([f"{c.name} {type_utils.convert_sp_to_sf_type(c.datatype)}" for c in df.schema.fields])
1065
- fully_qualified_name = self._get_fully_qualified_name(table_name)
1066
-
1067
- try:
1068
- self._session.sql(
1069
- f"""CREATE TABLE IF NOT EXISTS {fully_qualified_name} ({schema})
1070
- CLUSTER BY ({', '.join(join_keys)})
1071
- TAG ({self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = '')
1072
- """
1073
- ).collect(block=True, statement_params=self._telemetry_stmp)
1074
- except Exception as e:
1075
- raise snowml_exceptions.SnowflakeMLException(
1076
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1077
- original_exception=RuntimeError(f"Failed to create table {fully_qualified_name}: {e}."),
1078
- ) from e
1079
-
1080
- source_query = df.queries["queries"][0]
1081
-
1082
- if spine_timestamp_col is not None:
1083
- join_keys.append(spine_timestamp_col)
1084
-
1085
- _, _, dest_alias, _ = identifier.parse_schema_level_object_identifier(fully_qualified_name)
1086
- source_alias = f"{dest_alias}_source"
1087
- join_cond = " AND ".join([f"{dest_alias}.{k} = {source_alias}.{k}" for k in join_keys])
1088
- update_clause = ", ".join([f"{dest_alias}.{c} = {source_alias}.{c}" for c in df.columns])
1089
- insert_clause = ", ".join([f"{source_alias}.{c}" for c in df.columns])
1090
- query = f"""
1091
- MERGE INTO {fully_qualified_name} USING ({source_query}) {source_alias} ON {join_cond}
1092
- WHEN MATCHED THEN UPDATE SET {update_clause}
1093
- WHEN NOT MATCHED THEN INSERT ({', '.join(df.columns)}) VALUES ({insert_clause})
1094
- """
1095
- try:
1096
- self._session.sql(query).collect(block=True, statement_params=self._telemetry_stmp)
1097
- except Exception as e:
1098
- raise snowml_exceptions.SnowflakeMLException(
1099
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1100
- original_exception=RuntimeError(f"Failed to create dataset {fully_qualified_name} with merge: {e}."),
1101
- ) from e
1102
-
1103
1084
  def _validate_entity_exists(self, name: SqlIdentifier) -> bool:
1104
1085
  full_entity_tag_name = self._get_entity_name(name)
1105
1086
  found_rows = self._find_object("TAGS", full_entity_tag_name)
@@ -1150,7 +1131,7 @@ class FeatureStore:
1150
1131
  else:
1151
1132
  cols = f.feature_names
1152
1133
 
1153
- join_keys = [k for e in f.entities for k in e.join_keys]
1134
+ join_keys = list({k for e in f.entities for k in e.join_keys})
1154
1135
  join_keys_str = ", ".join(join_keys)
1155
1136
  assert f.version is not None
1156
1137
  join_table_name = f.fully_qualified_name()
@@ -1227,8 +1208,7 @@ class FeatureStore:
1227
1208
  for tag_name in to_sql_identifiers(
1228
1209
  [
1229
1210
  _FEATURE_STORE_OBJECT_TAG,
1230
- _FEATURE_VIEW_ENTITY_TAG,
1231
- _FEATURE_VIEW_TS_COL_TAG,
1211
+ _FEATURE_VIEW_METADATA_TAG,
1232
1212
  ]
1233
1213
  ):
1234
1214
  tag_result = self._find_object("TAGS", tag_name)
@@ -1340,7 +1320,8 @@ class FeatureStore:
1340
1320
 
1341
1321
  # Part 4: join original spine table with window table
1342
1322
  prefix_f_only_cols = to_sql_identifiers(
1343
- [f"{temp_prefix}{name.resolved()}" for name in f_only_cols], case_sensitive=True
1323
+ [f"{temp_prefix}{name.resolved()}" for name in f_only_cols],
1324
+ case_sensitive=True,
1344
1325
  )
1345
1326
  last_select = f"""
1346
1327
  SELECT
@@ -1373,7 +1354,10 @@ class FeatureStore:
1373
1354
  return dynamic_table_results + view_results
1374
1355
 
1375
1356
  def _update_feature_view_status(self, feature_view: FeatureView, operation: str) -> FeatureView:
1376
- assert operation in ["RESUME", "SUSPEND"], f"Operation: {operation} not supported"
1357
+ assert operation in [
1358
+ "RESUME",
1359
+ "SUSPEND",
1360
+ ], f"Operation: {operation} not supported"
1377
1361
  if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
1378
1362
  raise snowml_exceptions.SnowflakeMLException(
1379
1363
  error_code=error_codes.NOT_FOUND,
@@ -1397,17 +1381,76 @@ class FeatureStore:
1397
1381
  logger.info(f"Successfully {operation} FeatureView {feature_view.name}/{feature_view.version}.")
1398
1382
  return self.get_feature_view(feature_view.name, feature_view.version)
1399
1383
 
1400
- def _find_feature_views(
1384
+ def _optimized_find_feature_views(
1401
1385
  self, entity_name: SqlIdentifier, feature_view_name: Optional[SqlIdentifier]
1402
- ) -> List[FeatureView]:
1386
+ ) -> DataFrame:
1403
1387
  if not self._validate_entity_exists(entity_name):
1404
- return []
1388
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1405
1389
 
1390
+ # TODO: this can be optimized further by directly getting all possible FVs and filter by tag
1391
+ # it's easier to rewrite the code once we can remove the tag_reference path
1406
1392
  all_fvs = self._get_fv_backend_representations(object_name=None)
1407
1393
  fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
1408
1394
 
1409
1395
  if len(fv_maps.keys()) == 0:
1410
- return []
1396
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1397
+
1398
+ filter_clause = f"WHERE OBJECT_NAME LIKE '{feature_view_name.resolved()}%'" if feature_view_name else ""
1399
+ try:
1400
+ res = self._session.sql(
1401
+ f"""
1402
+ SELECT
1403
+ OBJECT_NAME
1404
+ FROM TABLE(
1405
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1406
+ TAG_NAME => '{self._get_fully_qualified_name(self._get_entity_name(entity_name))}'
1407
+ )
1408
+ ) {filter_clause}"""
1409
+ ).collect(statement_params=self._telemetry_stmp)
1410
+ except Exception as e:
1411
+ raise snowml_exceptions.SnowflakeMLException(
1412
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1413
+ original_exception=RuntimeError(f"Failed to find feature views' by entity {entity_name}: {e}"),
1414
+ ) from e
1415
+
1416
+ output_values: List[List[Any]] = []
1417
+ for r in res:
1418
+ row = fv_maps[SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)]
1419
+ self._extract_feature_view_info(row, output_values)
1420
+
1421
+ return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
1422
+
1423
+ def _extract_feature_view_info(self, row: Row, output_values: List[List[Any]]) -> None:
1424
+ name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
1425
+ m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
1426
+ if m is None:
1427
+ raise snowml_exceptions.SnowflakeMLException(
1428
+ error_code=error_codes.INTERNAL_SNOWML_ERROR,
1429
+ original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
1430
+ )
1431
+
1432
+ fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1433
+
1434
+ values: List[Any] = []
1435
+ values.append(name)
1436
+ values.append(version)
1437
+ values.append(row["database_name"])
1438
+ values.append(row["schema_name"])
1439
+ values.append(row["created_on"])
1440
+ values.append(row["owner"])
1441
+ values.append(row["comment"])
1442
+ values.append(fv_metadata.entities)
1443
+ output_values.append(values)
1444
+
1445
+ def _find_feature_views(self, entity_name: SqlIdentifier, feature_view_name: Optional[SqlIdentifier]) -> DataFrame:
1446
+ if not self._validate_entity_exists(entity_name):
1447
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1448
+
1449
+ all_fvs = self._get_fv_backend_representations(object_name=None)
1450
+ fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
1451
+
1452
+ if len(fv_maps.keys()) == 0:
1453
+ return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1411
1454
 
1412
1455
  # NOTE: querying INFORMATION_SCHEMA for Entity lineage can be expensive depending on how many active
1413
1456
  # FeatureViews there are. If this ever become an issue, consider exploring improvements.
@@ -1424,7 +1467,7 @@ class FeatureStore:
1424
1467
  )
1425
1468
  )
1426
1469
  WHERE LEVEL = 'TABLE'
1427
- AND TAG_NAME = '{_FEATURE_VIEW_ENTITY_TAG}'
1470
+ AND TAG_NAME = '{_FEATURE_VIEW_METADATA_TAG}'
1428
1471
  """
1429
1472
  for fv_name in fv_maps.keys()
1430
1473
  ]
@@ -1436,21 +1479,22 @@ class FeatureStore:
1436
1479
  original_exception=RuntimeError(f"Failed to retrieve feature views' information: {e}"),
1437
1480
  ) from e
1438
1481
 
1439
- entities = self.list_entities().collect()
1440
- outputs = []
1482
+ output_values: List[List[Any]] = []
1441
1483
  for r in results:
1442
- if entity_name == SqlIdentifier(r["TAG_VALUE"], case_sensitive=True):
1443
- fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
1444
- fv_name = SqlIdentifier(fv_name, case_sensitive=True)
1445
- obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
1446
- if feature_view_name is not None:
1447
- if fv_name == feature_view_name:
1448
- outputs.append(self._compose_feature_view(fv_maps[obj_name], entities))
1484
+ fv_metadata = _FeatureViewMetadata.from_json(r["TAG_VALUE"])
1485
+ for retrieved_entity in fv_metadata.entities:
1486
+ if entity_name == SqlIdentifier(retrieved_entity, case_sensitive=True):
1487
+ fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
1488
+ fv_name = SqlIdentifier(fv_name, case_sensitive=True)
1489
+ obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
1490
+ if feature_view_name is not None:
1491
+ if fv_name == feature_view_name:
1492
+ self._extract_feature_view_info(fv_maps[obj_name], output_values)
1493
+ else:
1494
+ continue
1449
1495
  else:
1450
- continue
1451
- else:
1452
- outputs.append(self._compose_feature_view(fv_maps[obj_name], entities))
1453
- return outputs
1496
+ self._extract_feature_view_info(fv_maps[obj_name], output_values)
1497
+ return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
1454
1498
 
1455
1499
  def _compose_feature_view(self, row: Row, entity_list: List[Row]) -> FeatureView:
1456
1500
  def find_and_compose_entity(name: str) -> Entity:
@@ -1459,7 +1503,7 @@ class FeatureStore:
1459
1503
  if e["NAME"] == name:
1460
1504
  return Entity(
1461
1505
  name=SqlIdentifier(e["NAME"], case_sensitive=True).identifier(),
1462
- join_keys=e["JOIN_KEYS"].strip("[]").split(","),
1506
+ join_keys=self._recompose_join_keys(e["JOIN_KEYS"]),
1463
1507
  desc=e["DESC"],
1464
1508
  )
1465
1509
  raise RuntimeError(f"Cannot find entity {name} from retrieved entity list: {entity_list}")
@@ -1477,9 +1521,9 @@ class FeatureStore:
1477
1521
  query = m.group("query")
1478
1522
  df = self._session.sql(query)
1479
1523
  desc = m.group("comment")
1480
- entity_names = m.group("entities")
1481
- entities = [find_and_compose_entity(n) for n in entity_names.split(_FEATURE_VIEW_ENTITY_TAG_DELIMITER)]
1482
- ts_col = m.group("ts_col")
1524
+ fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1525
+ entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1526
+ ts_col = fv_metadata.timestamp_col
1483
1527
  timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1484
1528
 
1485
1529
  fv = FeatureView._construct_feature_view(
@@ -1506,9 +1550,9 @@ class FeatureStore:
1506
1550
  query = m.group("query")
1507
1551
  df = self._session.sql(query)
1508
1552
  desc = m.group("comment")
1509
- entity_names = m.group("entities")
1510
- entities = [find_and_compose_entity(n) for n in entity_names.split(_FEATURE_VIEW_ENTITY_TAG_DELIMITER)]
1511
- ts_col = m.group("ts_col")
1553
+ fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1554
+ entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1555
+ ts_col = fv_metadata.timestamp_col
1512
1556
  timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1513
1557
 
1514
1558
  fv = FeatureView._construct_feature_view(
@@ -1542,7 +1586,10 @@ class FeatureStore:
1542
1586
  return descs
1543
1587
 
1544
1588
  def _find_object(
1545
- self, object_type: str, object_name: Optional[SqlIdentifier], prefix_match: bool = False
1589
+ self,
1590
+ object_type: str,
1591
+ object_name: Optional[SqlIdentifier],
1592
+ prefix_match: bool = False,
1546
1593
  ) -> List[Row]:
1547
1594
  """Try to find an object by given type and name pattern.
1548
1595
 
@@ -1569,7 +1616,7 @@ class FeatureStore:
1569
1616
  search_space, obj_domain = self._obj_search_spaces[object_type]
1570
1617
  all_rows = []
1571
1618
  fs_tag_objects = []
1572
- tag_free_object_types = ["TAGS", "SCHEMAS", "WAREHOUSES"]
1619
+ tag_free_object_types = ["TAGS", "SCHEMAS", "WAREHOUSES", "DATASETS"]
1573
1620
  try:
1574
1621
  search_scope = f"IN {search_space}" if search_space is not None else ""
1575
1622
  all_rows = self._session.sql(f"SHOW {object_type} LIKE '{match_name}' {search_scope}").collect(
@@ -1577,25 +1624,41 @@ class FeatureStore:
1577
1624
  )
1578
1625
  # There could be none-FS objects under FS schema, thus filter on objects with FS special tag.
1579
1626
  if object_type not in tag_free_object_types and len(all_rows) > 0:
1580
- # Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
1581
- # use double quotes to make it case-sensitive.
1582
- queries = [
1583
- f"""
1584
- SELECT OBJECT_NAME
1585
- FROM TABLE(
1586
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1587
- '{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
1588
- '{obj_domain}'
1627
+ if self._use_optimized_tag_ref:
1628
+ fs_obj_rows = self._session.sql(
1629
+ f"""
1630
+ SELECT
1631
+ OBJECT_NAME
1632
+ FROM TABLE(
1633
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1634
+ TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
1635
+ )
1589
1636
  )
1590
- )
1591
- WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
1592
- AND TAG_SCHEMA = '{self._config.schema.resolved()}'
1593
- """
1594
- for row in all_rows
1595
- ]
1596
- fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
1597
- statement_params=self._telemetry_stmp
1598
- )
1637
+ WHERE DOMAIN='{obj_domain}'
1638
+ """
1639
+ ).collect(statement_params=self._telemetry_stmp)
1640
+ else:
1641
+ # TODO: remove this after tag_ref_internal rollout
1642
+ # Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
1643
+ # use double quotes to make it case-sensitive.
1644
+ queries = [
1645
+ f"""
1646
+ SELECT OBJECT_NAME
1647
+ FROM TABLE(
1648
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1649
+ '{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
1650
+ '{obj_domain}'
1651
+ )
1652
+ )
1653
+ WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
1654
+ AND TAG_SCHEMA = '{self._config.schema.resolved()}'
1655
+ """
1656
+ for row in all_rows
1657
+ ]
1658
+ fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
1659
+ statement_params=self._telemetry_stmp
1660
+ )
1661
+
1599
1662
  fs_tag_objects = [row["OBJECT_NAME"] for row in fs_obj_rows]
1600
1663
  except Exception as e:
1601
1664
  raise snowml_exceptions.SnowflakeMLException(
@@ -1641,3 +1704,66 @@ class FeatureStore:
1641
1704
  ),
1642
1705
  )
1643
1706
  return cast(DataFrame, df.drop(exclude_columns))
1707
+
1708
+ def _tag_ref_internal_enabled(self) -> bool:
1709
+ try:
1710
+ self._session.sql(
1711
+ f"""
1712
+ SELECT * FROM TABLE(
1713
+ INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1714
+ TAG_NAME => '{_FEATURE_STORE_OBJECT_TAG}'
1715
+ )
1716
+ ) LIMIT 1;
1717
+ """
1718
+ ).collect()
1719
+ return True
1720
+ except Exception:
1721
+ return False
1722
+
1723
+ def _check_feature_store_object_versions(self) -> None:
1724
+ versions = self._collapse_object_versions()
1725
+ if len(versions) > 0 and pkg_version.parse(snowml_version.VERSION) < versions[0]:
1726
+ warnings.warn(
1727
+ "The current snowflake-ml-python version out of date, package upgrade recommended "
1728
+ + f"(current={snowml_version.VERSION}, recommended>={str(versions[0])})",
1729
+ stacklevel=2,
1730
+ category=UserWarning,
1731
+ )
1732
+
1733
+ def _collapse_object_versions(self) -> List[pkg_version.Version]:
1734
+ if not self._use_optimized_tag_ref:
1735
+ return []
1736
+
1737
+ query = f"""
1738
+ SELECT
1739
+ TAG_VALUE
1740
+ FROM TABLE(
1741
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1742
+ TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
1743
+ )
1744
+ )
1745
+ """
1746
+ try:
1747
+ res = self._session.sql(query).collect(statement_params=self._telemetry_stmp)
1748
+ except Exception:
1749
+ # since this is a best effort user warning to upgrade pkg versions
1750
+ # we are treating failures as benign error
1751
+ return []
1752
+ versions = set()
1753
+ compatibility_breakage_detected = False
1754
+ for r in res:
1755
+ info = _FeatureStoreObjInfo.from_json(r["TAG_VALUE"])
1756
+ if info.type == _FeatureStoreObjTypes.UNKNOWN:
1757
+ compatibility_breakage_detected = True
1758
+ versions.add(pkg_version.parse(info.pkg_version))
1759
+
1760
+ sorted_versions = sorted(versions, reverse=True)
1761
+ if compatibility_breakage_detected:
1762
+ raise snowml_exceptions.SnowflakeMLException(
1763
+ error_code=error_codes.SNOWML_PACKAGE_OUTDATED,
1764
+ original_exception=RuntimeError(
1765
+ f"The current snowflake-ml-python version {snowml_version.VERSION} is out of date, "
1766
+ + f"please upgrade to at least {sorted_versions[0]}."
1767
+ ),
1768
+ )
1769
+ return sorted_versions