snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. snowflake/ml/_internal/env_utils.py +16 -13
  2. snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
  3. snowflake/ml/_internal/telemetry.py +19 -0
  4. snowflake/ml/feature_store/__init__.py +9 -0
  5. snowflake/ml/feature_store/entity.py +73 -0
  6. snowflake/ml/feature_store/feature_store.py +1657 -0
  7. snowflake/ml/feature_store/feature_view.py +459 -0
  8. snowflake/ml/model/_client/ops/model_ops.py +16 -38
  9. snowflake/ml/model/_client/sql/model.py +1 -7
  10. snowflake/ml/model/_client/sql/model_version.py +20 -15
  11. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
  12. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
  14. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
  15. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
  16. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
  17. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
  18. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
  19. snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
  20. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
  21. snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
  22. snowflake/ml/model/model_signature.py +72 -16
  23. snowflake/ml/model/type_hints.py +12 -0
  24. snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
  25. snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
  26. snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
  27. snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
  28. snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
  29. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
  30. snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
  31. snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
  32. snowflake/ml/modeling/cluster/birch.py +19 -3
  33. snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
  34. snowflake/ml/modeling/cluster/dbscan.py +19 -3
  35. snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
  36. snowflake/ml/modeling/cluster/k_means.py +19 -3
  37. snowflake/ml/modeling/cluster/mean_shift.py +19 -3
  38. snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
  39. snowflake/ml/modeling/cluster/optics.py +19 -3
  40. snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
  41. snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
  42. snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
  43. snowflake/ml/modeling/compose/column_transformer.py +19 -3
  44. snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
  45. snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
  46. snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
  47. snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
  48. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
  49. snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
  50. snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
  51. snowflake/ml/modeling/covariance/oas.py +19 -3
  52. snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
  53. snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
  54. snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
  55. snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
  56. snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
  57. snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
  58. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
  59. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
  60. snowflake/ml/modeling/decomposition/pca.py +19 -3
  61. snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
  62. snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
  63. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
  64. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
  65. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
  66. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
  67. snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
  68. snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
  69. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
  70. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
  71. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
  72. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
  73. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
  75. snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
  76. snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
  77. snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
  78. snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
  79. snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
  80. snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
  81. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
  82. snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
  83. snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
  84. snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
  85. snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
  86. snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
  87. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
  88. snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
  89. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
  91. snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
  92. snowflake/ml/modeling/impute/knn_imputer.py +19 -3
  93. snowflake/ml/modeling/impute/missing_indicator.py +19 -3
  94. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
  95. snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
  96. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
  97. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
  98. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
  99. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
  100. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
  101. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
  102. snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
  103. snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
  104. snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
  105. snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
  106. snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
  107. snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
  108. snowflake/ml/modeling/linear_model/lars.py +19 -3
  109. snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
  110. snowflake/ml/modeling/linear_model/lasso.py +19 -3
  111. snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
  112. snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
  113. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
  114. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
  115. snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
  116. snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
  117. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
  118. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
  120. snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
  121. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
  122. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
  123. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
  124. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
  125. snowflake/ml/modeling/linear_model/perceptron.py +19 -3
  126. snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
  127. snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
  128. snowflake/ml/modeling/linear_model/ridge.py +19 -3
  129. snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
  130. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
  131. snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
  132. snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
  133. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
  134. snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
  135. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
  136. snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
  137. snowflake/ml/modeling/manifold/isomap.py +19 -3
  138. snowflake/ml/modeling/manifold/mds.py +19 -3
  139. snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
  140. snowflake/ml/modeling/manifold/tsne.py +19 -3
  141. snowflake/ml/modeling/metrics/classification.py +5 -6
  142. snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
  143. snowflake/ml/modeling/metrics/ranking.py +7 -3
  144. snowflake/ml/modeling/metrics/regression.py +6 -3
  145. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
  146. snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
  147. snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
  148. snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
  149. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
  150. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
  151. snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
  152. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
  153. snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
  154. snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
  155. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
  156. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
  157. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
  158. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
  159. snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
  160. snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
  161. snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
  162. snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
  163. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
  164. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
  165. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
  166. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
  167. snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
  168. snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
  169. snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
  170. snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
  171. snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
  172. snowflake/ml/modeling/svm/linear_svc.py +19 -3
  173. snowflake/ml/modeling/svm/linear_svr.py +19 -3
  174. snowflake/ml/modeling/svm/nu_svc.py +19 -3
  175. snowflake/ml/modeling/svm/nu_svr.py +19 -3
  176. snowflake/ml/modeling/svm/svc.py +19 -3
  177. snowflake/ml/modeling/svm/svr.py +19 -3
  178. snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
  179. snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
  180. snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
  181. snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
  182. snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
  183. snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
  184. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
  185. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
  186. snowflake/ml/registry/registry.py +2 -0
  187. snowflake/ml/version.py +1 -1
  188. snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
  189. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
  190. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
  191. {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
  192. snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
  193. /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
  194. /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -0,0 +1,1657 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import functools
5
+ import json
6
+ import logging
7
+ import re
8
+ import warnings
9
+ from dataclasses import dataclass
10
+ from enum import Enum
11
+ from typing import Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast
12
+
13
+ from pytimeparse.timeparse import timeparse
14
+ from typing_extensions import Concatenate, ParamSpec
15
+
16
+ from snowflake.ml._internal import telemetry
17
+ from snowflake.ml._internal.exceptions import (
18
+ error_codes,
19
+ exceptions as snowml_exceptions,
20
+ )
21
+ from snowflake.ml._internal.utils import identifier
22
+ from snowflake.ml._internal.utils.sql_identifier import (
23
+ SqlIdentifier,
24
+ to_sql_identifiers,
25
+ )
26
+ from snowflake.ml.dataset.dataset import Dataset, FeatureStoreMetadata
27
+ from snowflake.ml.feature_store.entity import (
28
+ _ENTITY_NAME_LENGTH_LIMIT,
29
+ _FEATURE_VIEW_ENTITY_TAG_DELIMITER,
30
+ Entity,
31
+ )
32
+ from snowflake.ml.feature_store.feature_view import (
33
+ _FEATURE_OBJ_TYPE,
34
+ _FEATURE_VIEW_NAME_DELIMITER,
35
+ _TIMESTAMP_COL_PLACEHOLDER,
36
+ FeatureView,
37
+ FeatureViewSlice,
38
+ FeatureViewStatus,
39
+ FeatureViewVersion,
40
+ )
41
+ from snowflake.snowpark import DataFrame, Row, Session, functions as F
42
+ from snowflake.snowpark._internal import type_utils, utils as snowpark_utils
43
+ from snowflake.snowpark.exceptions import SnowparkSQLException
44
+ from snowflake.snowpark.types import StructField
45
+
46
+ _Args = ParamSpec("_Args")
47
+ _RT = TypeVar("_RT")
48
+
49
+ logger = logging.getLogger(__name__)
50
+
51
+ _ENTITY_TAG_PREFIX = "SNOWML_FEATURE_STORE_ENTITY_"
52
+ _FEATURE_VIEW_ENTITY_TAG = "SNOWML_FEATURE_STORE_FV_ENTITIES"
53
+ _FEATURE_VIEW_TS_COL_TAG = "SNOWML_FEATURE_STORE_FV_TS_COL"
54
+ _FEATURE_STORE_OBJECT_TAG = "SNOWML_FEATURE_STORE_OBJECT"
55
+ _PROJECT = "FeatureStore"
56
+ _DT_OR_VIEW_QUERY_PATTERN = re.compile(
57
+ r"""CREATE\ (OR\ REPLACE\ )?(?P<obj_type>(DYNAMIC\ TABLE|VIEW))\ .*
58
+ COMMENT\ =\ '(?P<comment>.*)'\s*
59
+ TAG.*?{entity_tag}\ =\ '(?P<entities>.*?)',\n
60
+ .*?{ts_col_tag}\ =\ '(?P<ts_col>.*?)',?.*?
61
+ AS\ (?P<query>.*)
62
+ """.format(
63
+ entity_tag=_FEATURE_VIEW_ENTITY_TAG, ts_col_tag=_FEATURE_VIEW_TS_COL_TAG
64
+ ),
65
+ flags=re.DOTALL | re.IGNORECASE | re.X,
66
+ )
67
+
68
+
69
+ class CreationMode(Enum):
70
+ FAIL_IF_NOT_EXIST = 1
71
+ CREATE_IF_NOT_EXIST = 2
72
+
73
+
74
+ @dataclass
75
+ class _FeatureStoreConfig:
76
+ database: SqlIdentifier
77
+ schema: SqlIdentifier
78
+
79
+ @property
80
+ def full_schema_path(self) -> str:
81
+ return f"{self.database}.{self.schema}"
82
+
83
+
84
+ def switch_warehouse(
85
+ f: Callable[Concatenate[FeatureStore, _Args], _RT]
86
+ ) -> Callable[Concatenate[FeatureStore, _Args], _RT]:
87
+ @functools.wraps(f)
88
+ def wrapper(self: FeatureStore, /, *args: _Args.args, **kargs: _Args.kwargs) -> _RT:
89
+ original_warehouse = self._session.get_current_warehouse()
90
+ try:
91
+ if self._default_warehouse is not None:
92
+ self._session.use_warehouse(self._default_warehouse)
93
+ return f(self, *args, **kargs)
94
+ finally:
95
+ self._session.use_warehouse(original_warehouse) # type: ignore[arg-type]
96
+
97
+ return wrapper
98
+
99
+
100
+ def dispatch_decorator(
101
+ prpr_version: str,
102
+ ) -> Callable[[Callable[Concatenate[FeatureStore, _Args], _RT]], Callable[Concatenate[FeatureStore, _Args], _RT],]:
103
+ def decorator(
104
+ f: Callable[Concatenate[FeatureStore, _Args], _RT]
105
+ ) -> Callable[Concatenate[FeatureStore, _Args], _RT]:
106
+ @telemetry.send_api_usage_telemetry(project=_PROJECT)
107
+ @snowpark_utils.private_preview(version=prpr_version)
108
+ @switch_warehouse
109
+ @functools.wraps(f)
110
+ def wrap(self: FeatureStore, /, *args: _Args.args, **kargs: _Args.kwargs) -> _RT:
111
+ return f(self, *args, **kargs)
112
+
113
+ return wrap
114
+
115
+ return decorator
116
+
117
+
118
+ class FeatureStore:
119
+ """
120
+ FeatureStore provides APIs to create, materialize, retrieve and manage feature pipelines.
121
+ """
122
+
123
+ @telemetry.send_api_usage_telemetry(project=_PROJECT)
124
+ @snowpark_utils.private_preview(version="1.0.8")
125
+ def __init__(
126
+ self,
127
+ session: Session,
128
+ database: str,
129
+ name: str,
130
+ default_warehouse: str,
131
+ creation_mode: CreationMode = CreationMode.FAIL_IF_NOT_EXIST,
132
+ ) -> None:
133
+ """
134
+ Creates a FeatureStore instance.
135
+
136
+ Args:
137
+ session: Snowpark Session to interact with Snowflake backend.
138
+ database: Database to create the FeatureStore instance.
139
+ name: Target FeatureStore name, maps to a schema in the database.
140
+ default_warehouse: Default warehouse for feature store compute.
141
+ creation_mode: Create new backend or fail if not exist upon feature store creation.
142
+
143
+ Raises:
144
+ SnowflakeMLException: [ValueError] default_warehouse does not exist.
145
+ SnowflakeMLException: [ValueError] FAIL_IF_NOT_EXIST is set and feature store not exists.
146
+ SnowflakeMLException: [RuntimeError] Failed to find resources.
147
+ SnowflakeMLException: [RuntimeError] Failed to create feature store.
148
+ """
149
+ database = SqlIdentifier(database)
150
+ name = SqlIdentifier(name)
151
+
152
+ self._telemetry_stmp = telemetry.get_function_usage_statement_params(_PROJECT)
153
+ self._session: Session = session
154
+ self._config = _FeatureStoreConfig(
155
+ database=database,
156
+ schema=name,
157
+ )
158
+ self._asof_join_enabled = None
159
+
160
+ # A dict from object name to tuple of search space and object domain.
161
+ # search space used in query "SHOW <object_TYPE> LIKE <object_name> IN <search_space>"
162
+ # object domain used in query "TAG_REFERENCE(<object_name>, <object_domain>)"
163
+ self._obj_search_spaces = {
164
+ "TABLES": (self._config.full_schema_path, "TABLE"),
165
+ "DYNAMIC TABLES": (self._config.full_schema_path, "TABLE"),
166
+ "VIEWS": (self._config.full_schema_path, "TABLE"),
167
+ "SCHEMAS": (f"DATABASE {self._config.database}", "SCHEMA"),
168
+ "TAGS": (self._config.full_schema_path, None),
169
+ "TASKS": (self._config.full_schema_path, "TASK"),
170
+ "WAREHOUSES": (None, None),
171
+ }
172
+
173
+ self.update_default_warehouse(default_warehouse)
174
+
175
+ if creation_mode == CreationMode.FAIL_IF_NOT_EXIST:
176
+ schema_result = self._find_object("SCHEMAS", self._config.schema)
177
+ if len(schema_result) == 0:
178
+ raise snowml_exceptions.SnowflakeMLException(
179
+ error_code=error_codes.NOT_FOUND,
180
+ original_exception=ValueError(f"Feature store {name} does not exist."),
181
+ )
182
+ else:
183
+ try:
184
+ self._session.sql(f"CREATE DATABASE IF NOT EXISTS {self._config.database}").collect(
185
+ statement_params=self._telemetry_stmp
186
+ )
187
+ self._session.sql(f"CREATE SCHEMA IF NOT EXISTS {self._config.full_schema_path}").collect(
188
+ statement_params=self._telemetry_stmp
189
+ )
190
+ for tag in to_sql_identifiers(
191
+ [
192
+ _FEATURE_VIEW_ENTITY_TAG,
193
+ _FEATURE_VIEW_TS_COL_TAG,
194
+ _FEATURE_STORE_OBJECT_TAG,
195
+ ]
196
+ ):
197
+ self._session.sql(f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(tag)}").collect(
198
+ statement_params=self._telemetry_stmp
199
+ )
200
+ except Exception as e:
201
+ self.clear()
202
+ raise snowml_exceptions.SnowflakeMLException(
203
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
204
+ original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
205
+ )
206
+
207
+ logger.info(f"Successfully connected to feature store: {self._config.full_schema_path}.")
208
+
209
+ @telemetry.send_api_usage_telemetry(project=_PROJECT)
210
+ @snowpark_utils.private_preview(version="1.0.12")
211
+ def update_default_warehouse(self, warehouse_name: str) -> None:
212
+ """Update default warehouse for feature store.
213
+
214
+ Args:
215
+ warehouse_name: Name of warehouse.
216
+
217
+ Raises:
218
+ SnowflakeMLException: If warehouse does not exists.
219
+ """
220
+ warehouse = SqlIdentifier(warehouse_name)
221
+ warehouse_result = self._find_object("WAREHOUSES", warehouse)
222
+ if len(warehouse_result) == 0:
223
+ raise snowml_exceptions.SnowflakeMLException(
224
+ error_code=error_codes.NOT_FOUND,
225
+ original_exception=ValueError(f"Cannot find warehouse {warehouse}"),
226
+ )
227
+
228
+ self._default_warehouse = warehouse
229
+
230
+ @dispatch_decorator(prpr_version="1.0.8")
231
+ def register_entity(self, entity: Entity) -> None:
232
+ """
233
+ Register Entity in the FeatureStore.
234
+
235
+ Args:
236
+ entity: Entity object to register.
237
+
238
+ Raises:
239
+ SnowflakeMLException: [ValueError] Entity with same name is already registered.
240
+ SnowflakeMLException: [RuntimeError] Failed to find resources.
241
+ """
242
+ tag_name = self._get_entity_name(entity.name)
243
+ found_rows = self._find_object("TAGS", tag_name)
244
+ if len(found_rows) > 0:
245
+ raise snowml_exceptions.SnowflakeMLException(
246
+ error_code=error_codes.OBJECT_ALREADY_EXISTS,
247
+ original_exception=ValueError(f"Entity {entity.name} already exists."),
248
+ suppress_source_trace=True,
249
+ )
250
+
251
+ # allowed_values will add double-quotes around each value, thus use resolved str here.
252
+ join_keys = [f"'{key.resolved()}'" for key in entity.join_keys]
253
+ join_keys_str = ",".join(join_keys)
254
+ full_tag_name = self._get_fully_qualified_name(tag_name)
255
+ try:
256
+ self._session.sql(
257
+ f"""CREATE TAG IF NOT EXISTS {full_tag_name}
258
+ ALLOWED_VALUES {join_keys_str}
259
+ COMMENT = '{entity.desc}'
260
+ """
261
+ ).collect(statement_params=self._telemetry_stmp)
262
+ except Exception as e:
263
+ raise snowml_exceptions.SnowflakeMLException(
264
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
265
+ original_exception=RuntimeError(f"Failed to register entity `{entity.name}`: {e}."),
266
+ ) from e
267
+ logger.info(f"Registered Entity {entity}.")
268
+
269
+ # TODO: add support to update column desc once SNOW-894249 is fixed
270
+ @dispatch_decorator(prpr_version="1.0.8")
271
+ def register_feature_view(
272
+ self,
273
+ feature_view: FeatureView,
274
+ version: str,
275
+ block: bool = False,
276
+ override: bool = False,
277
+ ) -> FeatureView:
278
+ """
279
+ Materialize a FeatureView to Snowflake backend.
280
+ Incremental maintenance for updates on the source data will be automated if refresh_freq is set.
281
+
282
+ NOTE: Each new materialization will trigger a full FeatureView history refresh for the data included in the
283
+ FeatureView.
284
+
285
+ Args:
286
+ feature_view: FeatureView instance to materialize.
287
+ version: version of the registered FeatureView.
288
+ NOTE: Version only accepts letters, numbers and underscore. Also version will be capitalized.
289
+ block: Specify whether the FeatureView backend materialization should be blocking or not. If blocking then
290
+ the API will wait until the initial FeatureView data is generated.
291
+ override: Override the existing FeatureView with same version. This is the same as dropping the FeatureView
292
+ first then recreate. NOTE: there will be backfill cost associated if the FeatureView is being
293
+ continuously maintained.
294
+
295
+ Returns:
296
+ A materialized FeatureView object.
297
+
298
+ Raises:
299
+ SnowflakeMLException: [ValueError] FeatureView is already registered, or duplicate name and version
300
+ are detected.
301
+ SnowflakeMLException: [ValueError] FeatureView entity has not been registered.
302
+ SnowflakeMLException: [ValueError] Warehouse or default warehouse is not specified.
303
+ SnowflakeMLException: [RuntimeError] Failed to create dynamic table, task, or view.
304
+ SnowflakeMLException: [RuntimeError] Failed to find resources.
305
+ """
306
+ version = FeatureViewVersion(version)
307
+
308
+ if feature_view.status != FeatureViewStatus.DRAFT:
309
+ raise snowml_exceptions.SnowflakeMLException(
310
+ error_code=error_codes.OBJECT_ALREADY_EXISTS,
311
+ original_exception=ValueError(
312
+ f"FeatureView {feature_view.name}/{feature_view.version} has already been registered."
313
+ ),
314
+ )
315
+
316
+ # TODO: ideally we should move this to FeatureView creation time
317
+ for e in feature_view.entities:
318
+ if not self._validate_entity_exists(e.name):
319
+ raise snowml_exceptions.SnowflakeMLException(
320
+ error_code=error_codes.NOT_FOUND,
321
+ original_exception=ValueError(f"Entity {e.name} has not been registered."),
322
+ )
323
+
324
+ feature_view_name = FeatureView._get_physical_name(feature_view.name, version)
325
+ if not override:
326
+ dynamic_table_results = self._find_object("DYNAMIC TABLES", feature_view_name)
327
+ view_results = self._find_object("VIEWS", feature_view_name)
328
+ if len(dynamic_table_results) > 0 or len(view_results) > 0:
329
+ raise snowml_exceptions.SnowflakeMLException(
330
+ error_code=error_codes.OBJECT_ALREADY_EXISTS,
331
+ original_exception=ValueError(f"FeatureView {feature_view.name}/{version} already exists."),
332
+ suppress_source_trace=True,
333
+ )
334
+
335
+ fully_qualified_name = self._get_fully_qualified_name(feature_view_name)
336
+ entities = _FEATURE_VIEW_ENTITY_TAG_DELIMITER.join([e.name for e in feature_view.entities])
337
+ timestamp_col = (
338
+ feature_view.timestamp_col
339
+ if feature_view.timestamp_col is not None
340
+ else SqlIdentifier(_TIMESTAMP_COL_PLACEHOLDER)
341
+ )
342
+
343
+ def create_col_desc(col: StructField) -> str:
344
+ desc = feature_view.feature_descs.get(SqlIdentifier(col.name), None)
345
+ desc = "" if desc is None else f"COMMENT '{desc}'"
346
+ return f"{col.name} {desc}"
347
+
348
+ column_descs = ", ".join([f"{create_col_desc(col)}" for col in feature_view.output_schema.fields])
349
+ refresh_freq = feature_view.refresh_freq
350
+
351
+ if refresh_freq is not None:
352
+ schedule_task = refresh_freq != "DOWNSTREAM" and timeparse(refresh_freq) is None
353
+ self._create_dynamic_table(
354
+ feature_view_name,
355
+ feature_view,
356
+ fully_qualified_name,
357
+ column_descs,
358
+ entities,
359
+ schedule_task,
360
+ self._default_warehouse,
361
+ timestamp_col,
362
+ block,
363
+ override,
364
+ )
365
+ else:
366
+ try:
367
+ override_clause = " OR REPLACE" if override else ""
368
+ query = f"""CREATE{override_clause} VIEW {fully_qualified_name} ({column_descs})
369
+ COMMENT = '{feature_view.desc}'
370
+ TAG (
371
+ {_FEATURE_VIEW_ENTITY_TAG} = '{entities}',
372
+ {_FEATURE_VIEW_TS_COL_TAG} = '{timestamp_col}',
373
+ {_FEATURE_STORE_OBJECT_TAG} = ''
374
+ )
375
+ AS {feature_view.query}
376
+ """
377
+ self._session.sql(query).collect(statement_params=self._telemetry_stmp)
378
+ except Exception as e:
379
+ raise snowml_exceptions.SnowflakeMLException(
380
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
381
+ original_exception=RuntimeError(f"Create view {fully_qualified_name} [\n{query}\n] failed: {e}"),
382
+ ) from e
383
+
384
+ logger.info(f"Registered FeatureView {feature_view.name}/{version}.")
385
+ return self.get_feature_view(feature_view.name, str(version))
386
+
387
+ @dispatch_decorator(prpr_version="1.1.0")
388
+ def update_feature_view(self, feature_view: FeatureView) -> None:
389
+ """Update a registered feature view.
390
+ Check feature_view.py for which fields are allowed to be updated after registration.
391
+
392
+ Args:
393
+ feature_view: The feature view to be updated.
394
+
395
+ Raises:
396
+ SnowflakeMLException: [RuntimeError] Feature view must be registered before updating.
397
+ SnowflakeMLException: [RuntimeError] Failed to update feature view.
398
+ """
399
+ if feature_view.status == FeatureViewStatus.DRAFT or feature_view.status == FeatureViewStatus.STATIC:
400
+ full_name = f"{feature_view.name}/{feature_view.version}"
401
+ raise snowml_exceptions.SnowflakeMLException(
402
+ error_code=error_codes.INVALID_ARGUMENT,
403
+ original_exception=RuntimeError(
404
+ f"Feature view {full_name} must be registered and non-static so that can be updated."
405
+ ),
406
+ )
407
+
408
+ if feature_view.refresh_freq is not None:
409
+ try:
410
+ self._session.sql(
411
+ f"""ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
412
+ TARGET_LAG = '{feature_view.refresh_freq}'
413
+ WAREHOUSE = {feature_view.warehouse}
414
+ """
415
+ ).collect()
416
+ except Exception as e:
417
+ raise snowml_exceptions.SnowflakeMLException(
418
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
419
+ original_exception=RuntimeError(
420
+ f"Update feature view {feature_view.name}/{feature_view.version} failed: {e}"
421
+ ),
422
+ ) from e
423
+
424
+ @dispatch_decorator(prpr_version="1.0.8")
425
+ def read_feature_view(self, feature_view: FeatureView) -> DataFrame:
426
+ """
427
+ Read FeatureView data.
428
+
429
+ Args:
430
+ feature_view: FeatureView to retrieve data from.
431
+
432
+ Returns:
433
+ Snowpark DataFrame(lazy mode) containing the FeatureView data.
434
+
435
+ Raises:
436
+ SnowflakeMLException: [ValueError] FeatureView is not registered.
437
+ """
438
+ if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
439
+ raise snowml_exceptions.SnowflakeMLException(
440
+ error_code=error_codes.NOT_FOUND,
441
+ original_exception=ValueError(f"FeatureView {feature_view.name} has not been registered."),
442
+ )
443
+
444
+ return self._session.sql(f"SELECT * FROM {feature_view.fully_qualified_name()}")
445
+
446
+ @dispatch_decorator(prpr_version="1.0.8")
447
+ def list_feature_views(
448
+ self,
449
+ entity_name: Optional[str] = None,
450
+ feature_view_name: Optional[str] = None,
451
+ as_dataframe: bool = True,
452
+ ) -> Union[Optional[DataFrame], List[FeatureView]]:
453
+ """
454
+ List FeatureViews in the FeatureStore.
455
+ If entity_name is specified, FeatureViews associated with that Entity will be listed.
456
+ If feature_view_name is specified, further reducing the results to only match the specified name.
457
+
458
+ Args:
459
+ entity_name: Entity name.
460
+ feature_view_name: FeatureView name.
461
+ as_dataframe: whether the return type should be a DataFrame.
462
+
463
+ Returns:
464
+ List of FeatureViews or in a DataFrame representation.
465
+ """
466
+ if entity_name is not None:
467
+ entity_name = SqlIdentifier(entity_name)
468
+ if feature_view_name is not None:
469
+ feature_view_name = SqlIdentifier(feature_view_name)
470
+
471
+ if entity_name is not None:
472
+ fvs = self._find_feature_views(entity_name, feature_view_name)
473
+ else:
474
+ fvs = []
475
+ for row in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
476
+ fvs.append(self._compose_feature_view(row))
477
+
478
+ if as_dataframe:
479
+ result = None
480
+ for fv in fvs:
481
+ fv_df = fv.to_df(self._session)
482
+ result = fv_df if result is None else result.union(fv_df) # type: ignore[attr-defined]
483
+ return result
484
+ else:
485
+ return fvs
486
+
487
+ @dispatch_decorator(prpr_version="1.0.8")
488
+ def get_feature_view(self, name: str, version: str) -> FeatureView:
489
+ """
490
+ Retrieve previously registered FeatureView.
491
+
492
+ Args:
493
+ name: FeatureView name.
494
+ version: FeatureView version.
495
+
496
+ Returns:
497
+ FeatureView object.
498
+
499
+ Raises:
500
+ SnowflakeMLException: [ValueError] FeatureView with name and version is not found,
501
+ or incurred exception when reconstructing the FeatureView object.
502
+ """
503
+ name = SqlIdentifier(name)
504
+ version = FeatureViewVersion(version)
505
+
506
+ fv_name = FeatureView._get_physical_name(name, version)
507
+ results = self._get_fv_backend_representations(fv_name)
508
+ if len(results) != 1:
509
+ raise snowml_exceptions.SnowflakeMLException(
510
+ error_code=error_codes.NOT_FOUND,
511
+ original_exception=ValueError(f"Failed to find FeatureView {name}/{version}: {results}"),
512
+ )
513
+
514
+ return self._compose_feature_view(results[0])
515
+
516
+ @dispatch_decorator(prpr_version="1.0.8")
517
+ def merge_features(
518
+ self,
519
+ features: List[Union[FeatureView, FeatureViewSlice]],
520
+ name: str,
521
+ desc: str = "",
522
+ ) -> FeatureView:
523
+ """
524
+ Merge multiple registered FeatureView or FeatureViewSlice to form a new FeatureView.
525
+ This is typically used to add new features to existing FeatureViews since registered FeatureView is immutable.
526
+ The FeatureViews or FeatureViewSlices to merge should have same Entity and timestamp column setup.
527
+
528
+ Args:
529
+ features: List of FeatureViews or FeatureViewSlices to merge
530
+ name: name of the new constructed FeatureView
531
+ desc: description of the new constructed FeatureView
532
+
533
+ Returns:
534
+ a new FeatureView with features merged.
535
+
536
+ Raises:
537
+ SnowflakeMLException: [ValueError] Features length is not valid or if Entitis and timestamp_col is
538
+ inconsistent.
539
+ SnowflakeMLException: [ValueError] FeatureView has not been registered.
540
+ SnowflakeMLException: [ValueError] FeatureView merge failed.
541
+ """
542
+ name = SqlIdentifier(name)
543
+
544
+ if len(features) < 2:
545
+ raise snowml_exceptions.SnowflakeMLException(
546
+ error_code=error_codes.INVALID_ARGUMENT,
547
+ original_exception=ValueError("features should have at least two entries"),
548
+ )
549
+
550
+ left = features[0]
551
+ left_columns = None
552
+ if isinstance(left, FeatureViewSlice):
553
+ left_columns = ", ".join(left.names)
554
+ left = left.feature_view_ref
555
+
556
+ if left.status == FeatureViewStatus.DRAFT:
557
+ raise snowml_exceptions.SnowflakeMLException(
558
+ error_code=error_codes.NOT_FOUND,
559
+ original_exception=ValueError(f"FeatureView {left.name} has not been registered."),
560
+ )
561
+
562
+ join_keys = [k for e in left.entities for k in e.join_keys]
563
+
564
+ ts_col_expr = "" if left.timestamp_col is None else f" , {left.timestamp_col}"
565
+ left_columns = "*" if left_columns is None else f"{', '.join(join_keys)}, {left_columns}{ts_col_expr}"
566
+ left_df = self._session.sql(f"SELECT {left_columns} FROM {left.fully_qualified_name()}")
567
+
568
+ for right in features[1:]:
569
+ right_columns = None
570
+ if isinstance(right, FeatureViewSlice):
571
+ right_columns = ", ".join(right.names)
572
+ right = right.feature_view_ref
573
+
574
+ if left.entities != right.entities:
575
+ raise snowml_exceptions.SnowflakeMLException(
576
+ error_code=error_codes.INVALID_ARGUMENT,
577
+ original_exception=ValueError(
578
+ f"Cannot merge FeatureView {left.name} and {right.name} with different Entities: "
579
+ f"{left.entities} vs {right.entities}" # noqa: E501
580
+ ),
581
+ )
582
+ if left.timestamp_col != right.timestamp_col:
583
+ raise snowml_exceptions.SnowflakeMLException(
584
+ error_code=error_codes.INVALID_ARGUMENT,
585
+ original_exception=ValueError(
586
+ f"Cannot merge FeatureView {left.name} and {right.name} with different timestamp_col: "
587
+ f"{left.timestamp_col} vs {right.timestamp_col}" # noqa: E501
588
+ ),
589
+ )
590
+ if right.status == FeatureViewStatus.DRAFT:
591
+ raise snowml_exceptions.SnowflakeMLException(
592
+ error_code=error_codes.NOT_FOUND,
593
+ original_exception=ValueError(f"FeatureView {right.name} has not been registered."),
594
+ )
595
+
596
+ right_columns = "*" if right_columns is None else f"{', '.join(join_keys)}, {right_columns}"
597
+ exclude_ts_expr = (
598
+ "" if right.timestamp_col is None or right_columns != "*" else f"EXCLUDE {right.timestamp_col}"
599
+ )
600
+ right_df = self._session.sql(
601
+ f"SELECT {right_columns} {exclude_ts_expr} FROM {right.fully_qualified_name()}"
602
+ )
603
+
604
+ left_df = left_df.join(right=right_df, on=join_keys)
605
+
606
+ return FeatureView(
607
+ name=name,
608
+ entities=left.entities,
609
+ feature_df=left_df,
610
+ timestamp_col=left.timestamp_col,
611
+ desc=desc,
612
+ )
613
+
614
+ @dispatch_decorator(prpr_version="1.0.8")
615
+ def resume_feature_view(self, feature_view: FeatureView) -> FeatureView:
616
+ """
617
+ Resume a previously suspended FeatureView.
618
+
619
+ Args:
620
+ feature_view: FeatureView to resume.
621
+
622
+ Returns:
623
+ FeatureView with updated status.
624
+
625
+ Raises:
626
+ SnowflakeMLException: [ValueError] FeatureView is not in suspended status.
627
+ SnowflakeMLException: [RuntimeError] Failed to update feature view status.
628
+ """
629
+ if feature_view.status != FeatureViewStatus.SUSPENDED:
630
+ raise snowml_exceptions.SnowflakeMLException(
631
+ error_code=error_codes.SNOWML_UPDATE_FAILED,
632
+ original_exception=ValueError(
633
+ f"FeatureView {feature_view.name}/{feature_view.version} is not in suspended status. "
634
+ f"Actual status: {feature_view.status}"
635
+ ),
636
+ )
637
+
638
+ return self._update_feature_view_status(feature_view, "RESUME")
639
+
640
+ @dispatch_decorator(prpr_version="1.0.8")
641
+ def suspend_feature_view(self, feature_view: FeatureView) -> FeatureView:
642
+ """
643
+ Suspend a running FeatureView.
644
+
645
+ Args:
646
+ feature_view: FeatureView to suspend.
647
+
648
+ Returns:
649
+ FeatureView with updated status.
650
+
651
+ Raises:
652
+ SnowflakeMLException: [ValueError] FeatureView is not in running status.
653
+ SnowflakeMLException: [RuntimeError] Failed to update feature view status.
654
+ """
655
+ if feature_view.status != FeatureViewStatus.RUNNING:
656
+ raise snowml_exceptions.SnowflakeMLException(
657
+ error_code=error_codes.SNOWML_UPDATE_FAILED,
658
+ original_exception=ValueError(
659
+ f"FeatureView {feature_view.name}/{feature_view.version} is not in running status. "
660
+ f"Actual status: {feature_view.status}"
661
+ ),
662
+ )
663
+ return self._update_feature_view_status(feature_view, "SUSPEND")
664
+
665
+ @dispatch_decorator(prpr_version="1.0.8")
666
+ def delete_feature_view(self, feature_view: FeatureView) -> None:
667
+ """
668
+ Delete a FeatureView.
669
+
670
+ Args:
671
+ feature_view: FeatureView to delete.
672
+
673
+ Raises:
674
+ SnowflakeMLException: [ValueError] FeatureView is not registered.
675
+ """
676
+ if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
677
+ raise snowml_exceptions.SnowflakeMLException(
678
+ error_code=error_codes.NOT_FOUND,
679
+ original_exception=ValueError(f"FeatureView {feature_view.name} has not been registered."),
680
+ )
681
+
682
+ fully_qualified_name = feature_view.fully_qualified_name()
683
+ if feature_view.status == FeatureViewStatus.STATIC:
684
+ self._session.sql(f"DROP VIEW IF EXISTS {fully_qualified_name}").collect(
685
+ statement_params=self._telemetry_stmp
686
+ )
687
+ else:
688
+ self._session.sql(f"DROP DYNAMIC TABLE IF EXISTS {fully_qualified_name}").collect(
689
+ statement_params=self._telemetry_stmp
690
+ )
691
+ if feature_view.refresh_freq == "DOWNSTREAM":
692
+ self._session.sql(f"DROP TASK IF EXISTS {fully_qualified_name}").collect(
693
+ statement_params=self._telemetry_stmp
694
+ )
695
+
696
+ logger.info(f"Deleted FeatureView {feature_view.name}/{feature_view.version}.")
697
+
698
+ @dispatch_decorator(prpr_version="1.0.8")
699
+ def list_entities(self) -> DataFrame:
700
+ """
701
+ List all Entities in the FeatureStore.
702
+
703
+ Returns:
704
+ Snowpark DataFrame containing the results.
705
+ """
706
+ prefix_len = len(_ENTITY_TAG_PREFIX) + 1
707
+ return cast(
708
+ DataFrame,
709
+ self._session.sql(
710
+ f"SHOW TAGS LIKE '{_ENTITY_TAG_PREFIX}%' IN SCHEMA {self._config.full_schema_path}"
711
+ ).select(
712
+ F.col('"name"').substr(prefix_len, _ENTITY_NAME_LENGTH_LIMIT).alias("NAME"),
713
+ F.col('"allowed_values"').alias("JOIN_KEYS"),
714
+ F.col('"comment"').alias("DESC"),
715
+ ),
716
+ )
717
+
718
+ @dispatch_decorator(prpr_version="1.0.8")
719
+ def get_entity(self, name: str) -> Entity:
720
+ """
721
+ Retrieve previously registered Entity object.
722
+
723
+ Args:
724
+ name: Entity name.
725
+
726
+ Returns:
727
+ Entity object.
728
+
729
+ Raises:
730
+ SnowflakeMLException: [ValueError] Entity is not found.
731
+ SnowflakeMLException: [RuntimeError] Failed to retrieve tag reference information.
732
+ SnowflakeMLException: [RuntimeError] Failed to find resources.
733
+ """
734
+ name = SqlIdentifier(name)
735
+ try:
736
+ result = self.list_entities().filter(F.col("NAME") == name.resolved()).collect()
737
+ except Exception as e:
738
+ raise snowml_exceptions.SnowflakeMLException(
739
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
740
+ original_exception=RuntimeError(f"Failed to list entities: {e}"),
741
+ ) from e
742
+ if len(result) == 0:
743
+ raise snowml_exceptions.SnowflakeMLException(
744
+ error_code=error_codes.NOT_FOUND,
745
+ original_exception=ValueError(f"Cannot find Entity with name: {name}."),
746
+ )
747
+
748
+ raw_join_keys = result[0]["JOIN_KEYS"]
749
+ join_keys = raw_join_keys.strip("[]").split(",")
750
+ return Entity(
751
+ name=result[0]["NAME"],
752
+ join_keys=join_keys,
753
+ desc=result[0]["DESC"],
754
+ )
755
+
756
+ @dispatch_decorator(prpr_version="1.0.8")
757
+ def delete_entity(self, name: str) -> None:
758
+ """
759
+ Delete a previously registered Entity.
760
+
761
+ Args:
762
+ name: Entity name.
763
+
764
+ Raises:
765
+ SnowflakeMLException: [ValueError] Entity with given name not exists.
766
+ SnowflakeMLException: [RuntimeError] Failed to alter schema or drop tag.
767
+ SnowflakeMLException: [RuntimeError] Failed to find resources.
768
+ """
769
+ name = SqlIdentifier(name)
770
+
771
+ if not self._validate_entity_exists(name):
772
+ raise snowml_exceptions.SnowflakeMLException(
773
+ error_code=error_codes.NOT_FOUND,
774
+ original_exception=ValueError(f"Entity {name} does not exist."),
775
+ )
776
+
777
+ active_feature_views = cast(List[FeatureView], self.list_feature_views(entity_name=name, as_dataframe=False))
778
+ if len(active_feature_views) > 0:
779
+ raise snowml_exceptions.SnowflakeMLException(
780
+ error_code=error_codes.SNOWML_DELETE_FAILED,
781
+ original_exception=ValueError(
782
+ f"Cannot delete Entity {name} due to active FeatureViews: {[f.name for f in active_feature_views]}."
783
+ ),
784
+ )
785
+
786
+ tag_name = self._get_fully_qualified_name(self._get_entity_name(name))
787
+ try:
788
+ self._session.sql(f"DROP TAG IF EXISTS {tag_name}").collect(statement_params=self._telemetry_stmp)
789
+ except Exception as e:
790
+ raise snowml_exceptions.SnowflakeMLException(
791
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
792
+ original_exception=RuntimeError(f"Failed to alter schema or drop tag: {e}."),
793
+ ) from e
794
+ logger.info(f"Deleted Entity {name}.")
795
+
796
+ @dispatch_decorator(prpr_version="1.0.8")
797
+ def retrieve_feature_values(
798
+ self,
799
+ spine_df: DataFrame,
800
+ features: Union[List[Union[FeatureView, FeatureViewSlice]], List[str]],
801
+ spine_timestamp_col: Optional[str] = None,
802
+ exclude_columns: Optional[List[str]] = None,
803
+ include_feature_view_timestamp_col: bool = False,
804
+ ) -> DataFrame:
805
+ """
806
+ Enrich spine dataframe with feature values. Mainly used to generate inference data input.
807
+ If spine_timestamp_col is specified, point-in-time feature values will be fetched.
808
+
809
+ Args:
810
+ spine_df: Snowpark DataFrame to join features into.
811
+ features: List of features to join into the spine_df. Can be a list of FeatureView or FeatureViewSlice,
812
+ or a list of serialized feature objects from Dataset.
813
+ spine_timestamp_col: Timestamp column in spine_df for point-in-time feature value lookup.
814
+ exclude_columns: Column names to exclude from the result dataframe.
815
+ include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
816
+ (if feature view has timestamp column) if set true. Default to false.
817
+
818
+ Returns:
819
+ Snowpark DataFrame containing the joined results.
820
+
821
+ Raises:
822
+ ValueError: if features is empty.
823
+ """
824
+ if spine_timestamp_col is not None:
825
+ spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
826
+
827
+ if len(features) == 0:
828
+ raise ValueError("features cannot be empty")
829
+ if isinstance(features[0], str):
830
+ features = self._load_serialized_feature_objects(cast(List[str], features))
831
+
832
+ df, _ = self._join_features(
833
+ spine_df,
834
+ cast(List[Union[FeatureView, FeatureViewSlice]], features),
835
+ spine_timestamp_col,
836
+ include_feature_view_timestamp_col,
837
+ )
838
+
839
+ if exclude_columns is not None:
840
+ df = self._exclude_columns(df, exclude_columns)
841
+
842
+ return df
843
+
844
+ @dispatch_decorator(prpr_version="1.0.8")
845
+ def generate_dataset(
846
+ self,
847
+ spine_df: DataFrame,
848
+ features: List[Union[FeatureView, FeatureViewSlice]],
849
+ materialized_table: Optional[str] = None,
850
+ spine_timestamp_col: Optional[str] = None,
851
+ spine_label_cols: Optional[List[str]] = None,
852
+ exclude_columns: Optional[List[str]] = None,
853
+ save_mode: str = "errorifexists",
854
+ include_feature_view_timestamp_col: bool = False,
855
+ desc: str = "",
856
+ ) -> Dataset:
857
+ """
858
+ Generate dataset by given source table and feature views.
859
+
860
+ Args:
861
+ spine_df: The fact table contains the raw dataset.
862
+ features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
863
+ materialized_table: The destination table where produced result will be stored. If it's none, then result
864
+ won't be registered. If materialized_table is provided, then produced result will be written into
865
+ the provided table. Note result dataset will be a snowflake clone of registered table.
866
+ New data can append on same registered table and previously generated dataset won't be affected.
867
+ Default result table name will be a concatenation of materialized_table name and current timestamp.
868
+ spine_timestamp_col: Name of timestamp column in spine_df that will be used to join
869
+ time-series features. If spine_timestamp_col is not none, the input features also must have
870
+ timestamp_col.
871
+ spine_label_cols: Name of column(s) in spine_df that contains labels.
872
+ exclude_columns: Column names to exclude from the result dataframe.
873
+ The underlying storage will still contain the columns.
874
+ save_mode: How new data is saved. currently support:
875
+ errorifexists: Raise error if registered table already exists.
876
+ merge: Merge new data if registered table already exists.
877
+ include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
878
+ (if feature view has timestamp column) if set true. Default to false.
879
+ desc: A description about this dataset.
880
+
881
+ Returns:
882
+ A Dataset object.
883
+
884
+ Raises:
885
+ SnowflakeMLException: [ValueError] save_mode is invalid.
886
+ SnowflakeMLException: [ValueError] spine_df contains more than one query.
887
+ SnowflakeMLException: [ValueError] Materialized_table contains invalid char `.`.
888
+ SnowflakeMLException: [ValueError] Materialized_table already exists with save_mode `errorifexists`.
889
+ SnowflakeMLException: [ValueError] Snapshot creation failed.
890
+ SnowflakeMLException: [RuntimeError] Failed to create clone from table.
891
+ SnowflakeMLException: [RuntimeError] Failed to find resources.
892
+ """
893
+ if spine_timestamp_col is not None:
894
+ spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
895
+ if spine_label_cols is not None:
896
+ spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
897
+
898
+ allowed_save_mode = {"errorifexists", "merge"}
899
+ if save_mode.lower() not in allowed_save_mode:
900
+ raise snowml_exceptions.SnowflakeMLException(
901
+ error_code=error_codes.INVALID_ARGUMENT,
902
+ original_exception=ValueError(
903
+ f"'{save_mode}' is not supported. Current supported save modes: {','.join(allowed_save_mode)}"
904
+ ),
905
+ )
906
+
907
+ if len(spine_df.queries["queries"]) != 1:
908
+ raise snowml_exceptions.SnowflakeMLException(
909
+ error_code=error_codes.INVALID_ARGUMENT,
910
+ original_exception=ValueError(
911
+ f"spine_df must contain only one query. Got: {spine_df.queries['queries']}"
912
+ ),
913
+ )
914
+
915
+ result_df, join_keys = self._join_features(
916
+ spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
917
+ )
918
+
919
+ snapshot_table = None
920
+ if materialized_table is not None:
921
+ if "." in materialized_table:
922
+ raise snowml_exceptions.SnowflakeMLException(
923
+ error_code=error_codes.INVALID_ARGUMENT,
924
+ original_exception=ValueError(f"materialized_table {materialized_table} contains invalid char `.`"),
925
+ )
926
+
927
+ # TODO (wezhou) change materialized_table to SqlIdentifier
928
+ found_rows = self._find_object("TABLES", SqlIdentifier(materialized_table))
929
+ if save_mode.lower() == "errorifexists" and len(found_rows) > 0:
930
+ raise snowml_exceptions.SnowflakeMLException(
931
+ error_code=error_codes.OBJECT_ALREADY_EXISTS,
932
+ original_exception=ValueError(f"Dataset table {materialized_table} already exists."),
933
+ )
934
+
935
+ self._dump_dataset(result_df, materialized_table, join_keys, spine_timestamp_col)
936
+
937
+ snapshot_table = f"{materialized_table}_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}"
938
+ snapshot_table = self._get_fully_qualified_name(snapshot_table)
939
+ materialized_table = self._get_fully_qualified_name(materialized_table)
940
+
941
+ try:
942
+ self._session.sql(f"CREATE TABLE {snapshot_table} CLONE {materialized_table}").collect(
943
+ statement_params=self._telemetry_stmp
944
+ )
945
+ except Exception as e:
946
+ raise snowml_exceptions.SnowflakeMLException(
947
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
948
+ original_exception=RuntimeError(
949
+ f"Failed to create clone {materialized_table} from table {snapshot_table}: {e}."
950
+ ),
951
+ ) from e
952
+
953
+ result_df = self._session.sql(f"SELECT * FROM {snapshot_table}")
954
+
955
+ if exclude_columns is not None:
956
+ result_df = self._exclude_columns(result_df, exclude_columns)
957
+
958
+ fs_meta = FeatureStoreMetadata(
959
+ spine_query=spine_df.queries["queries"][0],
960
+ connection_params=vars(self._config),
961
+ features=[fv.to_json() for fv in features],
962
+ )
963
+
964
+ dataset = Dataset(
965
+ self._session,
966
+ df=result_df,
967
+ materialized_table=materialized_table,
968
+ snapshot_table=snapshot_table,
969
+ timestamp_col=spine_timestamp_col,
970
+ label_cols=spine_label_cols,
971
+ feature_store_metadata=fs_meta,
972
+ desc=desc,
973
+ )
974
+ return dataset
975
+
976
+ @dispatch_decorator(prpr_version="1.0.8")
977
+ def load_feature_views_from_dataset(self, dataset: Dataset) -> List[Union[FeatureView, FeatureViewSlice]]:
978
+ """
979
+ Retrieve FeatureViews used during Dataset construction.
980
+
981
+ Args:
982
+ dataset: Dataset object created from feature store.
983
+
984
+ Returns:
985
+ List of FeatureViews used during Dataset construction.
986
+
987
+ Raises:
988
+ ValueError: if dataset object is not generated from feature store.
989
+ """
990
+ serialized_objs = dataset.load_features()
991
+ if serialized_objs is None:
992
+ raise ValueError(f"Dataset {dataset} does not contain valid feature view information.")
993
+
994
+ return self._load_serialized_feature_objects(serialized_objs)
995
+
996
+ @dispatch_decorator(prpr_version="1.0.8")
997
+ def clear(self) -> None:
998
+ """
999
+ Clear all feature store internal objects including feature views, entities etc. Note feature store
1000
+ instance (snowflake schema) won't be deleted. Use snowflake to delete feature store instance.
1001
+
1002
+ Raises:
1003
+ SnowflakeMLException: [RuntimeError] Failed to clear feature store.
1004
+ """
1005
+ try:
1006
+ result = self._session.sql(
1007
+ f"""
1008
+ SELECT *
1009
+ FROM {self._config.database}.INFORMATION_SCHEMA.SCHEMATA
1010
+ WHERE SCHEMA_NAME = '{self._config.schema.resolved()}'
1011
+ """
1012
+ ).collect()
1013
+ if len(result) == 0:
1014
+ return
1015
+
1016
+ object_types = ["DYNAMIC TABLES", "TABLES", "VIEWS", "TASKS"]
1017
+ for obj_type in object_types:
1018
+ all_object_rows = self._find_object(obj_type, None)
1019
+ for row in all_object_rows:
1020
+ obj_name = self._get_fully_qualified_name(SqlIdentifier(row["name"], case_sensitive=True))
1021
+ self._session.sql(f"DROP {obj_type[:-1]} {obj_name}").collect()
1022
+ logger.info(f"Deleted {obj_type[:-1]}: {obj_name}.")
1023
+
1024
+ entity_tags = self._find_object("TAGS", SqlIdentifier(_ENTITY_TAG_PREFIX), prefix_match=True)
1025
+ all_tags = [
1026
+ _FEATURE_VIEW_ENTITY_TAG,
1027
+ _FEATURE_VIEW_TS_COL_TAG,
1028
+ _FEATURE_STORE_OBJECT_TAG,
1029
+ ] + [SqlIdentifier(row["name"], case_sensitive=True) for row in entity_tags]
1030
+ for tag_name in all_tags:
1031
+ obj_name = self._get_fully_qualified_name(tag_name)
1032
+ self._session.sql(f"DROP TAG IF EXISTS {obj_name}").collect()
1033
+ logger.info(f"Deleted TAG: {obj_name}.")
1034
+
1035
+ except Exception as e:
1036
+ raise snowml_exceptions.SnowflakeMLException(
1037
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1038
+ original_exception=RuntimeError(f"Failed to clear feature store {self._config.full_schema_path}: {e}."),
1039
+ ) from e
1040
+ logger.info(f"Feature store {self._config.full_schema_path} has been cleared.")
1041
+
1042
+ def _create_dynamic_table(
1043
+ self,
1044
+ feature_view_name: SqlIdentifier,
1045
+ feature_view: FeatureView,
1046
+ fully_qualified_name: str,
1047
+ column_descs: str,
1048
+ entities: str,
1049
+ schedule_task: bool,
1050
+ warehouse: SqlIdentifier,
1051
+ timestamp_col: SqlIdentifier,
1052
+ block: bool,
1053
+ override: bool,
1054
+ ) -> None:
1055
+ # TODO: cluster by join keys once DT supports that
1056
+ try:
1057
+ override_clause = " OR REPLACE" if override else ""
1058
+ query = f"""CREATE{override_clause} DYNAMIC TABLE {fully_qualified_name} ({column_descs})
1059
+ TARGET_LAG = '{'DOWNSTREAM' if schedule_task else feature_view.refresh_freq}'
1060
+ COMMENT = '{feature_view.desc}'
1061
+ TAG (
1062
+ {self._get_fully_qualified_name(_FEATURE_VIEW_ENTITY_TAG)} = '{entities}',
1063
+ {self._get_fully_qualified_name(_FEATURE_VIEW_TS_COL_TAG)} = '{timestamp_col}',
1064
+ {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = ''
1065
+ )
1066
+ WAREHOUSE = {warehouse}
1067
+ AS {feature_view.query}
1068
+ """
1069
+ self._session.sql(query).collect(statement_params=self._telemetry_stmp)
1070
+ self._session.sql(f"ALTER DYNAMIC TABLE {fully_qualified_name} REFRESH").collect(
1071
+ block=block, statement_params=self._telemetry_stmp
1072
+ )
1073
+
1074
+ if schedule_task:
1075
+ try:
1076
+ self._session.sql(
1077
+ f"""CREATE{override_clause} TASK {fully_qualified_name}
1078
+ WAREHOUSE = {warehouse}
1079
+ SCHEDULE = 'USING CRON {feature_view.refresh_freq}'
1080
+ AS ALTER DYNAMIC TABLE {fully_qualified_name} REFRESH
1081
+ """
1082
+ ).collect(statement_params=self._telemetry_stmp)
1083
+ self._session.sql(
1084
+ f"""
1085
+ ALTER TASK {fully_qualified_name}
1086
+ SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = ''
1087
+ """
1088
+ ).collect(statement_params=self._telemetry_stmp)
1089
+ self._session.sql(f"ALTER TASK {fully_qualified_name} RESUME").collect(
1090
+ statement_params=self._telemetry_stmp
1091
+ )
1092
+ except Exception:
1093
+ self._session.sql(f"DROP DYNAMIC TABLE IF EXISTS {fully_qualified_name}").collect(
1094
+ statement_params=self._telemetry_stmp
1095
+ )
1096
+ raise
1097
+ except Exception as e:
1098
+ raise snowml_exceptions.SnowflakeMLException(
1099
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1100
+ original_exception=RuntimeError(
1101
+ f"Create dynamic table [\n{query}\n] or task {fully_qualified_name} failed: {e}."
1102
+ ),
1103
+ ) from e
1104
+
1105
+ found_dts = self._find_object("DYNAMIC TABLES", feature_view_name)
1106
+ if len(found_dts) != 1:
1107
+ raise snowml_exceptions.SnowflakeMLException(
1108
+ error_code=error_codes.NOT_FOUND,
1109
+ original_exception=ValueError(f"Can not find dynamic table: `{feature_view_name}`."),
1110
+ )
1111
+ if found_dts[0]["refresh_mode"] != "INCREMENTAL":
1112
+ warnings.warn(
1113
+ "Your pipeline won't be incrementally refreshed due to: "
1114
+ + f"\"{found_dts[0]['refresh_mode_reason']}\". "
1115
+ + "It will likely incurr higher cost.",
1116
+ stacklevel=2,
1117
+ category=UserWarning,
1118
+ )
1119
+
1120
+ def _dump_dataset(
1121
+ self,
1122
+ df: DataFrame,
1123
+ table_name: str,
1124
+ join_keys: List[SqlIdentifier],
1125
+ spine_timestamp_col: Optional[SqlIdentifier] = None,
1126
+ ) -> None:
1127
+ if len(df.queries["queries"]) != 1:
1128
+ raise snowml_exceptions.SnowflakeMLException(
1129
+ error_code=error_codes.INVALID_ARGUMENT,
1130
+ original_exception=ValueError(f"Dataset df must contain only one query. Got: {df.queries['queries']}"),
1131
+ )
1132
+ schema = ", ".join([f"{c.name} {type_utils.convert_sp_to_sf_type(c.datatype)}" for c in df.schema.fields])
1133
+ fully_qualified_name = self._get_fully_qualified_name(table_name)
1134
+
1135
+ try:
1136
+ self._session.sql(
1137
+ f"""CREATE TABLE IF NOT EXISTS {fully_qualified_name} ({schema})
1138
+ CLUSTER BY ({', '.join(join_keys)})
1139
+ TAG ({self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = '')
1140
+ """
1141
+ ).collect(block=True, statement_params=self._telemetry_stmp)
1142
+ except Exception as e:
1143
+ raise snowml_exceptions.SnowflakeMLException(
1144
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1145
+ original_exception=RuntimeError(f"Failed to create table {fully_qualified_name}: {e}."),
1146
+ ) from e
1147
+
1148
+ source_query = df.queries["queries"][0]
1149
+
1150
+ if spine_timestamp_col is not None:
1151
+ join_keys.append(spine_timestamp_col)
1152
+
1153
+ _, _, dest_alias, _ = identifier.parse_schema_level_object_identifier(fully_qualified_name)
1154
+ source_alias = f"{dest_alias}_source"
1155
+ join_cond = " AND ".join([f"{dest_alias}.{k} = {source_alias}.{k}" for k in join_keys])
1156
+ update_clause = ", ".join([f"{dest_alias}.{c} = {source_alias}.{c}" for c in df.columns])
1157
+ insert_clause = ", ".join([f"{source_alias}.{c}" for c in df.columns])
1158
+ query = f"""
1159
+ MERGE INTO {fully_qualified_name} USING ({source_query}) {source_alias} ON {join_cond}
1160
+ WHEN MATCHED THEN UPDATE SET {update_clause}
1161
+ WHEN NOT MATCHED THEN INSERT ({', '.join(df.columns)}) VALUES ({insert_clause})
1162
+ """
1163
+ try:
1164
+ self._session.sql(query).collect(block=True, statement_params=self._telemetry_stmp)
1165
+ except Exception as e:
1166
+ raise snowml_exceptions.SnowflakeMLException(
1167
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1168
+ original_exception=RuntimeError(f"Failed to create dataset {fully_qualified_name} with merge: {e}."),
1169
+ ) from e
1170
+
1171
+ def _validate_entity_exists(self, name: SqlIdentifier) -> bool:
1172
+ full_entity_tag_name = self._get_entity_name(name)
1173
+ found_rows = self._find_object("TAGS", full_entity_tag_name)
1174
+ return len(found_rows) > 0
1175
+
1176
+ def _join_features(
1177
+ self,
1178
+ spine_df: DataFrame,
1179
+ features: List[Union[FeatureView, FeatureViewSlice]],
1180
+ spine_timestamp_col: Optional[SqlIdentifier],
1181
+ include_feature_view_timestamp_col: bool,
1182
+ ) -> Tuple[DataFrame, List[SqlIdentifier]]:
1183
+ if len(spine_df.queries["queries"]) != 1:
1184
+ raise snowml_exceptions.SnowflakeMLException(
1185
+ error_code=error_codes.INVALID_ARGUMENT,
1186
+ original_exception=ValueError(
1187
+ f"spine_df must contain only one query. Got: {spine_df.queries['queries']}"
1188
+ ),
1189
+ )
1190
+
1191
+ for f in features:
1192
+ f = f.feature_view_ref if isinstance(f, FeatureViewSlice) else f
1193
+ if f.status == FeatureViewStatus.DRAFT:
1194
+ raise snowml_exceptions.SnowflakeMLException(
1195
+ error_code=error_codes.NOT_FOUND,
1196
+ original_exception=ValueError(f"FeatureView {f.name} has not been registered."),
1197
+ )
1198
+ for e in f.entities:
1199
+ for k in e.join_keys:
1200
+ if k not in to_sql_identifiers(spine_df.columns):
1201
+ raise snowml_exceptions.SnowflakeMLException(
1202
+ error_code=error_codes.INVALID_ARGUMENT,
1203
+ original_exception=ValueError(
1204
+ f"join_key {k} from Entity {e.name} in FeatureView {f.name} is not found in spine_df."
1205
+ ),
1206
+ )
1207
+
1208
+ if self._asof_join_enabled is None:
1209
+ self._asof_join_enabled = self._is_asof_join_enabled()
1210
+
1211
+ # TODO: leverage Snowpark dataframe for more concise syntax once it supports AsOfJoin
1212
+ query = spine_df.queries["queries"][0]
1213
+ layer = 0
1214
+ for f in features:
1215
+ if isinstance(f, FeatureViewSlice):
1216
+ cols = f.names
1217
+ f = f.feature_view_ref
1218
+ else:
1219
+ cols = f.feature_names
1220
+
1221
+ join_keys = [k for e in f.entities for k in e.join_keys]
1222
+ join_keys_str = ", ".join(join_keys)
1223
+ assert f.version is not None
1224
+ join_table_name = f.fully_qualified_name()
1225
+
1226
+ if spine_timestamp_col is not None and f.timestamp_col is not None:
1227
+ if self._asof_join_enabled:
1228
+ if include_feature_view_timestamp_col:
1229
+ f_ts_col_alias = identifier.concat_names([f.name, "_", f.version, "_", f.timestamp_col])
1230
+ f_ts_col_str = f"r_{layer}.{f.timestamp_col} AS {f_ts_col_alias},"
1231
+ else:
1232
+ f_ts_col_str = ""
1233
+ query = f"""
1234
+ SELECT
1235
+ l_{layer}.*,
1236
+ {f_ts_col_str}
1237
+ r_{layer}.* EXCLUDE ({join_keys_str}, {f.timestamp_col})
1238
+ FROM ({query}) l_{layer}
1239
+ ASOF JOIN (
1240
+ SELECT {join_keys_str}, {f.timestamp_col}, {', '.join(cols)}
1241
+ FROM {join_table_name}
1242
+ ) r_{layer}
1243
+ MATCH_CONDITION (l_{layer}.{spine_timestamp_col} >= r_{layer}.{f.timestamp_col})
1244
+ ON {' AND '.join([f'l_{layer}.{k} = r_{layer}.{k}' for k in join_keys])}
1245
+ """
1246
+ else:
1247
+ query = self._composed_union_window_join_query(
1248
+ layer=layer,
1249
+ s_query=query,
1250
+ s_ts_col=spine_timestamp_col,
1251
+ f_df=f.feature_df,
1252
+ f_table_name=join_table_name,
1253
+ f_ts_col=f.timestamp_col,
1254
+ join_keys=join_keys,
1255
+ )
1256
+ else:
1257
+ query = f"""
1258
+ SELECT
1259
+ l_{layer}.*,
1260
+ r_{layer}.* EXCLUDE {join_keys_str}
1261
+ FROM ({query}) l_{layer}
1262
+ LEFT JOIN (
1263
+ SELECT {join_keys_str}, {', '.join(cols)}
1264
+ FROM {join_table_name}
1265
+ ) r_{layer}
1266
+ ON {' AND '.join([f'l_{layer}.{k} = r_{layer}.{k}' for k in join_keys])}
1267
+ """
1268
+ layer += 1
1269
+
1270
+ return self._session.sql(query), join_keys
1271
+
1272
+ def _is_asof_join_enabled(self) -> bool:
1273
+ result = None
1274
+ try:
1275
+ result = self._session.sql(
1276
+ """
1277
+ WITH
1278
+ spine AS (
1279
+ SELECT "ID", "TS" FROM ( SELECT $1 AS "ID", $2 AS "TS" FROM VALUES (1 :: INT, 100 :: INT))
1280
+ ),
1281
+ feature AS (
1282
+ SELECT "ID", "TS" FROM ( SELECT $1 AS "ID", $2 AS "TS" FROM VALUES (1 :: INT, 100 :: INT))
1283
+ )
1284
+ SELECT * FROM spine
1285
+ ASOF JOIN feature
1286
+ MATCH_CONDITION ( spine.ts >= feature.ts )
1287
+ ON spine.id = feature.id;
1288
+ """
1289
+ ).collect()
1290
+ except SnowparkSQLException:
1291
+ return False
1292
+ return result is not None and len(result) == 1
1293
+
1294
+ # Visualize how the query works:
1295
+ # https://docs.google.com/presentation/d/15fT2F34OFp5RPv2-hZirHw6wliPRVRlPHvoCMIB00oY/edit#slide=id.g25ab53e6c8d_0_32
1296
+ def _composed_union_window_join_query(
1297
+ self,
1298
+ layer: int,
1299
+ s_query: str,
1300
+ s_ts_col: SqlIdentifier,
1301
+ f_df: DataFrame,
1302
+ f_table_name: str,
1303
+ f_ts_col: SqlIdentifier,
1304
+ join_keys: List[SqlIdentifier],
1305
+ ) -> str:
1306
+ s_df = self._session.sql(s_query)
1307
+ s_only_cols = [col for col in to_sql_identifiers(s_df.columns) if col not in [*join_keys, s_ts_col]]
1308
+ f_only_cols = [col for col in to_sql_identifiers(f_df.columns) if col not in [*join_keys, f_ts_col]]
1309
+ join_keys_str = ", ".join(join_keys)
1310
+ temp_prefix = "_FS_TEMP_"
1311
+
1312
+ def join_cols(cols: List[SqlIdentifier], end_comma: bool, rename: bool, prefix: str = "") -> str:
1313
+ if not cols:
1314
+ return ""
1315
+ cols = [f"{prefix}{col}" for col in cols] # type: ignore[misc]
1316
+ if rename:
1317
+ cols = [f"{col} AS {col.replace(temp_prefix, '')}" for col in cols] # type: ignore[misc]
1318
+ line_end = "," if end_comma else ""
1319
+ return ", ".join(cols) + line_end
1320
+
1321
+ # Part 1: CTE of spine query
1322
+ spine_cte = f"""
1323
+ WITH spine_{layer} AS (
1324
+ {s_query}
1325
+ ),"""
1326
+
1327
+ # Part 2: create union of spine table and feature tables
1328
+ s_select = f"""
1329
+ SELECT
1330
+ 'SPINE' {temp_prefix}src,
1331
+ {s_ts_col},
1332
+ {join_keys_str},
1333
+ {join_cols(s_only_cols, end_comma=True, rename=False)}
1334
+ {join_cols(f_only_cols, end_comma=False, rename=False, prefix='null AS ')}
1335
+ FROM ({s_query})"""
1336
+ f_select = f"""
1337
+ SELECT
1338
+ 'FEATURE' {temp_prefix}src,
1339
+ {f_ts_col} {s_ts_col},
1340
+ {join_keys_str},
1341
+ {join_cols(s_only_cols, end_comma=True, rename=False, prefix='null AS ')}
1342
+ {join_cols(f_only_cols,end_comma=False, rename=False)}
1343
+ FROM {f_table_name}"""
1344
+ union_cte = f"""
1345
+ unioned_{layer} AS (
1346
+ {s_select}
1347
+ UNION ALL
1348
+ {f_select}
1349
+ ),"""
1350
+
1351
+ # Part 3: create window cte and add window column
1352
+ window_select = f"SELECT {temp_prefix}src, {s_ts_col}, {join_keys_str}"
1353
+ for f_col in f_only_cols:
1354
+ window_select = (
1355
+ window_select
1356
+ + f"""
1357
+ ,last_value({f_col}) IGNORE NULLS OVER (
1358
+ PARTITION BY {join_keys_str}
1359
+ ORDER BY {s_ts_col} ASC, {temp_prefix}src ASC
1360
+ ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
1361
+ ) AS {temp_prefix}{f_col}"""
1362
+ )
1363
+ window_select = window_select + f" FROM unioned_{layer}"
1364
+ window_cte = f"""
1365
+ windowed_{layer} AS (
1366
+ {window_select}
1367
+ )"""
1368
+
1369
+ # Part 4: join original spine table with window table
1370
+ prefix_f_only_cols = to_sql_identifiers(
1371
+ [f"{temp_prefix}{name.resolved()}" for name in f_only_cols], case_sensitive=True
1372
+ )
1373
+ last_select = f"""
1374
+ SELECT
1375
+ {join_keys_str},
1376
+ {s_ts_col},
1377
+ {join_cols(s_only_cols, end_comma=True, rename=False)}
1378
+ {join_cols(prefix_f_only_cols, end_comma=False, rename=True)}
1379
+ FROM spine_{layer}
1380
+ JOIN windowed_{layer}
1381
+ USING ({join_keys_str}, {s_ts_col})
1382
+ WHERE windowed_{layer}.{temp_prefix}src = 'SPINE'"""
1383
+
1384
+ # Part 5: complete query
1385
+ complete_query = spine_cte + union_cte + window_cte + last_select
1386
+
1387
+ return complete_query
1388
+
1389
+ def _get_entity_name(self, raw_name: SqlIdentifier) -> SqlIdentifier:
1390
+ return SqlIdentifier(identifier.concat_names([_ENTITY_TAG_PREFIX, raw_name]))
1391
+
1392
+ def _get_fully_qualified_name(self, name: Union[SqlIdentifier, str]) -> str:
1393
+ return f"{self._config.full_schema_path}.{name}"
1394
+
1395
+ # TODO: SHOW DYNAMIC TABLES is very slow while other show objects are fast, investigate with DT in SNOW-902804.
1396
+ def _get_fv_backend_representations(
1397
+ self, object_name: Optional[SqlIdentifier], prefix_match: bool = False
1398
+ ) -> List[Row]:
1399
+ dynamic_table_results = self._find_object("DYNAMIC TABLES", object_name, prefix_match)
1400
+ view_results = self._find_object("VIEWS", object_name, prefix_match)
1401
+ return dynamic_table_results + view_results
1402
+
1403
+ def _update_feature_view_status(self, feature_view: FeatureView, operation: str) -> FeatureView:
1404
+ assert operation in ["RESUME", "SUSPEND"], f"Operation: {operation} not supported"
1405
+ if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
1406
+ raise snowml_exceptions.SnowflakeMLException(
1407
+ error_code=error_codes.NOT_FOUND,
1408
+ original_exception=ValueError(f"FeatureView {feature_view.name} has not been registered."),
1409
+ )
1410
+
1411
+ fully_qualified_name = feature_view.fully_qualified_name()
1412
+ try:
1413
+ self._session.sql(f"ALTER DYNAMIC TABLE {fully_qualified_name} {operation}").collect(
1414
+ statement_params=self._telemetry_stmp
1415
+ )
1416
+ self._session.sql(f"ALTER TASK IF EXISTS {fully_qualified_name} {operation}").collect(
1417
+ statement_params=self._telemetry_stmp
1418
+ )
1419
+ except Exception as e:
1420
+ raise snowml_exceptions.SnowflakeMLException(
1421
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1422
+ original_exception=RuntimeError(f"Failed to update feature view {fully_qualified_name}'s status: {e}"),
1423
+ ) from e
1424
+
1425
+ feature_view._status = self.get_feature_view(feature_view.name, feature_view.version).status
1426
+ logger.info(f"Successfully {operation} FeatureView {feature_view.name}/{feature_view.version}.")
1427
+ return feature_view
1428
+
1429
+ def _find_feature_views(
1430
+ self, entity_name: SqlIdentifier, feature_view_name: Optional[SqlIdentifier]
1431
+ ) -> List[FeatureView]:
1432
+ if not self._validate_entity_exists(entity_name):
1433
+ return []
1434
+
1435
+ all_fvs = self._get_fv_backend_representations(object_name=None)
1436
+ fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
1437
+
1438
+ if len(fv_maps.keys()) == 0:
1439
+ return []
1440
+
1441
+ # NOTE: querying INFORMATION_SCHEMA for Entity lineage can be expensive depending on how many active
1442
+ # FeatureViews there are. If this ever become an issue, consider exploring improvements.
1443
+ try:
1444
+ queries = [
1445
+ f"""
1446
+ SELECT
1447
+ TAG_VALUE,
1448
+ OBJECT_NAME
1449
+ FROM TABLE(
1450
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1451
+ '{self._get_fully_qualified_name(fv_name)}',
1452
+ 'table'
1453
+ )
1454
+ )
1455
+ WHERE LEVEL = 'TABLE'
1456
+ AND TAG_NAME = '{_FEATURE_VIEW_ENTITY_TAG}'
1457
+ """
1458
+ for fv_name in fv_maps.keys()
1459
+ ]
1460
+
1461
+ results = self._session.sql("\nUNION\n".join(queries)).collect(statement_params=self._telemetry_stmp)
1462
+ except Exception as e:
1463
+ raise snowml_exceptions.SnowflakeMLException(
1464
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1465
+ original_exception=RuntimeError(f"Failed to retrieve feature views' information: {e}"),
1466
+ ) from e
1467
+ outputs = []
1468
+ for r in results:
1469
+ if entity_name == SqlIdentifier(r["TAG_VALUE"], case_sensitive=True):
1470
+ fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
1471
+ fv_name = SqlIdentifier(fv_name, case_sensitive=True)
1472
+ obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
1473
+ if feature_view_name is not None:
1474
+ if fv_name == feature_view_name:
1475
+ outputs.append(self._compose_feature_view(fv_maps[obj_name]))
1476
+ else:
1477
+ continue
1478
+ else:
1479
+ outputs.append(self._compose_feature_view(fv_maps[obj_name]))
1480
+ return outputs
1481
+
1482
+ def _compose_feature_view(self, row: Row) -> FeatureView:
1483
+ name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
1484
+ name = SqlIdentifier(name, case_sensitive=True)
1485
+ m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
1486
+ if m is None:
1487
+ raise snowml_exceptions.SnowflakeMLException(
1488
+ error_code=error_codes.INTERNAL_SNOWML_ERROR,
1489
+ original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
1490
+ )
1491
+
1492
+ if m.group("obj_type") == "DYNAMIC TABLE":
1493
+ query = m.group("query")
1494
+ df = self._session.sql(query)
1495
+ desc = m.group("comment")
1496
+ entity_names = m.group("entities")
1497
+ entities = [self.get_entity(n) for n in entity_names.split(_FEATURE_VIEW_ENTITY_TAG_DELIMITER)]
1498
+ ts_col = m.group("ts_col")
1499
+ timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1500
+
1501
+ fv = FeatureView._construct_feature_view(
1502
+ name=name,
1503
+ entities=entities,
1504
+ feature_df=df,
1505
+ timestamp_col=timestamp_col,
1506
+ desc=desc,
1507
+ version=version,
1508
+ status=FeatureViewStatus(row["scheduling_state"]),
1509
+ feature_descs=self._fetch_column_descs(
1510
+ "DYNAMIC TABLE", SqlIdentifier(row["name"], case_sensitive=True)
1511
+ ),
1512
+ refresh_freq=row["target_lag"],
1513
+ database=self._config.database.identifier(),
1514
+ schema=self._config.schema.identifier(),
1515
+ warehouse=SqlIdentifier(row["warehouse"], case_sensitive=True).identifier(),
1516
+ refresh_mode=row["refresh_mode"],
1517
+ refresh_mode_reason=row["refresh_mode_reason"],
1518
+ )
1519
+ return fv
1520
+ else:
1521
+ query = m.group("query")
1522
+ df = self._session.sql(query)
1523
+ desc = m.group("comment")
1524
+ entity_names = m.group("entities")
1525
+ entities = [self.get_entity(n) for n in entity_names.split(_FEATURE_VIEW_ENTITY_TAG_DELIMITER)]
1526
+ ts_col = m.group("ts_col")
1527
+ timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1528
+
1529
+ fv = FeatureView._construct_feature_view(
1530
+ name=name,
1531
+ entities=entities,
1532
+ feature_df=df,
1533
+ timestamp_col=timestamp_col,
1534
+ desc=desc,
1535
+ version=version,
1536
+ status=FeatureViewStatus.STATIC,
1537
+ feature_descs=self._fetch_column_descs("VIEW", SqlIdentifier(row["name"], case_sensitive=True)),
1538
+ refresh_freq=None,
1539
+ database=self._config.database.identifier(),
1540
+ schema=self._config.schema.identifier(),
1541
+ warehouse=None,
1542
+ refresh_mode=None,
1543
+ refresh_mode_reason=None,
1544
+ )
1545
+ return fv
1546
+
1547
+ def _fetch_column_descs(self, obj_type: str, obj_name: SqlIdentifier) -> Dict[str, str]:
1548
+ res = self._session.sql(f"DESC {obj_type} {self._get_fully_qualified_name(obj_name)}").collect(
1549
+ statement_params=self._telemetry_stmp
1550
+ )
1551
+
1552
+ descs = {}
1553
+ for r in res:
1554
+ if r["comment"] is not None:
1555
+ descs[SqlIdentifier(r["name"], case_sensitive=True).identifier()] = r["comment"]
1556
+ return descs
1557
+
1558
+ def _find_object(
1559
+ self, object_type: str, object_name: Optional[SqlIdentifier], prefix_match: bool = False
1560
+ ) -> List[Row]:
1561
+ """Try to find an object by given type and name pattern.
1562
+
1563
+ Args:
1564
+ object_type: Type of the object. Could be TABLES, TAGS etc.
1565
+ object_name: Name of object. It will match everything of object_type is object_name is None.
1566
+ prefix_match: Will search all objects with object_name as prefix if set True. Otherwise
1567
+ will do exact on object_name. Default to false. If object_name is empty and prefix_match is
1568
+ True, then it will match everything of object_type.
1569
+
1570
+ Raises:
1571
+ SnowflakeMLException: [RuntimeError] Failed to find resource.
1572
+
1573
+ Returns:
1574
+ Return a list of rows round.
1575
+ """
1576
+ if object_name is None:
1577
+ match_name = "%"
1578
+ elif prefix_match:
1579
+ match_name = object_name.resolved() + "%"
1580
+ else:
1581
+ match_name = object_name.resolved()
1582
+
1583
+ search_space, obj_domain = self._obj_search_spaces[object_type]
1584
+ all_rows = []
1585
+ fs_tag_objects = []
1586
+ tag_free_object_types = ["TAGS", "SCHEMAS", "WAREHOUSES"]
1587
+ try:
1588
+ search_scope = f"IN {search_space}" if search_space is not None else ""
1589
+ all_rows = self._session.sql(f"SHOW {object_type} LIKE '{match_name}' {search_scope}").collect(
1590
+ statement_params=self._telemetry_stmp
1591
+ )
1592
+ # There could be none-FS objects under FS schema, thus filter on objects with FS special tag.
1593
+ if object_type not in tag_free_object_types and len(all_rows) > 0:
1594
+ # Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
1595
+ # use double quotes to make it case-sensitive.
1596
+ queries = [
1597
+ f"""
1598
+ SELECT OBJECT_NAME
1599
+ FROM TABLE(
1600
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1601
+ '{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
1602
+ '{obj_domain}'
1603
+ )
1604
+ )
1605
+ WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
1606
+ AND TAG_SCHEMA = '{self._config.schema.resolved()}'
1607
+ """
1608
+ for row in all_rows
1609
+ ]
1610
+ fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
1611
+ statement_params=self._telemetry_stmp
1612
+ )
1613
+ fs_tag_objects = [row["OBJECT_NAME"] for row in fs_obj_rows]
1614
+ except Exception as e:
1615
+ raise snowml_exceptions.SnowflakeMLException(
1616
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1617
+ original_exception=RuntimeError(f"Failed to find object : {e}"),
1618
+ ) from e
1619
+
1620
+ result = []
1621
+ for row in all_rows:
1622
+ found_name = row["name"]
1623
+ prefix = object_name.resolved() if object_name is not None else ""
1624
+ if found_name.startswith(prefix) and (object_type in tag_free_object_types or found_name in fs_tag_objects):
1625
+ result.append(row)
1626
+ return result
1627
+
1628
+ def _load_serialized_feature_objects(
1629
+ self, serialized_feature_objs: List[str]
1630
+ ) -> List[Union[FeatureView, FeatureViewSlice]]:
1631
+ results: List[Union[FeatureView, FeatureViewSlice]] = []
1632
+ for obj in serialized_feature_objs:
1633
+ try:
1634
+ obj_type = json.loads(obj)[_FEATURE_OBJ_TYPE]
1635
+ except Exception as e:
1636
+ raise ValueError(f"Malformed serialized feature object: {obj}") from e
1637
+
1638
+ if obj_type == FeatureView.__name__:
1639
+ results.append(FeatureView.from_json(obj, self._session))
1640
+ elif obj_type == FeatureViewSlice.__name__:
1641
+ results.append(FeatureViewSlice.from_json(obj, self._session))
1642
+ else:
1643
+ raise ValueError(f"Unsupported feature object type: {obj_type}")
1644
+ return results
1645
+
1646
+ def _exclude_columns(self, df: DataFrame, exclude_columns: List[str]) -> DataFrame:
1647
+ exclude_columns = to_sql_identifiers(exclude_columns) # type: ignore[assignment]
1648
+ df_cols = to_sql_identifiers(df.columns)
1649
+ for col in exclude_columns:
1650
+ if col not in df_cols:
1651
+ raise snowml_exceptions.SnowflakeMLException(
1652
+ error_code=error_codes.INVALID_ARGUMENT,
1653
+ original_exception=ValueError(
1654
+ f"{col} in exclude_columns not exists in dataframe columns: {df_cols}"
1655
+ ),
1656
+ )
1657
+ return cast(DataFrame, df.drop(exclude_columns))