snowflake-ml-python 1.5.2__py3-none-any.whl → 1.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. snowflake/cortex/__init__.py +2 -1
  2. snowflake/cortex/_complete.py +240 -16
  3. snowflake/cortex/_extract_answer.py +0 -1
  4. snowflake/cortex/_sentiment.py +0 -1
  5. snowflake/cortex/_sse_client.py +81 -0
  6. snowflake/cortex/_summarize.py +0 -1
  7. snowflake/cortex/_translate.py +0 -1
  8. snowflake/cortex/_util.py +34 -10
  9. snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
  10. snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
  11. snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
  12. snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
  13. snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
  14. snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
  15. snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
  16. snowflake/ml/_internal/telemetry.py +26 -0
  17. snowflake/ml/_internal/utils/identifier.py +14 -0
  18. snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
  19. snowflake/ml/dataset/dataset.py +54 -32
  20. snowflake/ml/dataset/dataset_factory.py +3 -4
  21. snowflake/ml/feature_store/feature_store.py +440 -243
  22. snowflake/ml/feature_store/feature_view.py +61 -9
  23. snowflake/ml/fileset/embedded_stage_fs.py +25 -21
  24. snowflake/ml/fileset/fileset.py +2 -2
  25. snowflake/ml/fileset/snowfs.py +4 -15
  26. snowflake/ml/fileset/stage_fs.py +6 -8
  27. snowflake/ml/lineage/__init__.py +3 -0
  28. snowflake/ml/lineage/lineage_node.py +139 -0
  29. snowflake/ml/model/_client/model/model_impl.py +47 -14
  30. snowflake/ml/model/_client/model/model_version_impl.py +82 -2
  31. snowflake/ml/model/_client/ops/model_ops.py +77 -5
  32. snowflake/ml/model/_client/sql/model.py +1 -0
  33. snowflake/ml/model/_client/sql/model_version.py +47 -4
  34. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +2 -3
  35. snowflake/ml/model/_model_composer/model_composer.py +7 -6
  36. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +7 -1
  37. snowflake/ml/model/_model_composer/model_method/function_generator.py +17 -1
  38. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +79 -0
  39. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +5 -3
  40. snowflake/ml/model/_model_composer/model_method/model_method.py +5 -5
  41. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  42. snowflake/ml/model/_packager/model_handlers/_utils.py +1 -0
  43. snowflake/ml/model/_packager/model_handlers/catboost.py +2 -2
  44. snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
  45. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
  46. snowflake/ml/model/_packager/model_handlers/lightgbm.py +2 -2
  47. snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
  48. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
  49. snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
  50. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
  51. snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
  52. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
  53. snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
  54. snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
  55. snowflake/ml/model/_packager/model_handlers/xgboost.py +2 -2
  56. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  57. snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
  58. snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
  59. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  60. snowflake/ml/model/_packager/model_packager.py +9 -4
  61. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  62. snowflake/ml/model/_signatures/builtins_handler.py +2 -1
  63. snowflake/ml/model/_signatures/core.py +13 -1
  64. snowflake/ml/model/_signatures/pandas_handler.py +2 -0
  65. snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
  66. snowflake/ml/model/custom_model.py +22 -2
  67. snowflake/ml/model/model_signature.py +2 -0
  68. snowflake/ml/model/type_hints.py +74 -4
  69. snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
  70. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +158 -121
  71. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +2 -0
  72. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +39 -18
  73. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +88 -134
  74. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +22 -17
  75. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -1
  76. snowflake/ml/modeling/cluster/affinity_propagation.py +5 -3
  77. snowflake/ml/modeling/cluster/agglomerative_clustering.py +5 -3
  78. snowflake/ml/modeling/cluster/birch.py +5 -3
  79. snowflake/ml/modeling/cluster/bisecting_k_means.py +5 -3
  80. snowflake/ml/modeling/cluster/dbscan.py +5 -3
  81. snowflake/ml/modeling/cluster/feature_agglomeration.py +5 -3
  82. snowflake/ml/modeling/cluster/k_means.py +5 -3
  83. snowflake/ml/modeling/cluster/mean_shift.py +5 -3
  84. snowflake/ml/modeling/cluster/mini_batch_k_means.py +5 -3
  85. snowflake/ml/modeling/cluster/optics.py +5 -3
  86. snowflake/ml/modeling/cluster/spectral_biclustering.py +5 -3
  87. snowflake/ml/modeling/cluster/spectral_clustering.py +5 -3
  88. snowflake/ml/modeling/cluster/spectral_coclustering.py +5 -3
  89. snowflake/ml/modeling/compose/column_transformer.py +5 -3
  90. snowflake/ml/modeling/compose/transformed_target_regressor.py +1 -1
  91. snowflake/ml/modeling/covariance/elliptic_envelope.py +5 -3
  92. snowflake/ml/modeling/covariance/empirical_covariance.py +5 -3
  93. snowflake/ml/modeling/covariance/graphical_lasso.py +5 -3
  94. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +5 -3
  95. snowflake/ml/modeling/covariance/ledoit_wolf.py +5 -3
  96. snowflake/ml/modeling/covariance/min_cov_det.py +5 -3
  97. snowflake/ml/modeling/covariance/oas.py +5 -3
  98. snowflake/ml/modeling/covariance/shrunk_covariance.py +5 -3
  99. snowflake/ml/modeling/decomposition/dictionary_learning.py +5 -3
  100. snowflake/ml/modeling/decomposition/factor_analysis.py +5 -3
  101. snowflake/ml/modeling/decomposition/fast_ica.py +5 -3
  102. snowflake/ml/modeling/decomposition/incremental_pca.py +5 -3
  103. snowflake/ml/modeling/decomposition/kernel_pca.py +5 -3
  104. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +5 -3
  105. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +5 -3
  106. snowflake/ml/modeling/decomposition/pca.py +5 -3
  107. snowflake/ml/modeling/decomposition/sparse_pca.py +5 -3
  108. snowflake/ml/modeling/decomposition/truncated_svd.py +5 -3
  109. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +1 -1
  110. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +1 -1
  111. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -1
  112. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -1
  113. snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -1
  114. snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -1
  115. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +1 -1
  116. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +1 -1
  117. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +1 -1
  118. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +1 -1
  119. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +1 -1
  120. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +1 -1
  121. snowflake/ml/modeling/ensemble/isolation_forest.py +5 -3
  122. snowflake/ml/modeling/ensemble/random_forest_classifier.py +1 -1
  123. snowflake/ml/modeling/ensemble/random_forest_regressor.py +1 -1
  124. snowflake/ml/modeling/ensemble/stacking_regressor.py +1 -1
  125. snowflake/ml/modeling/ensemble/voting_classifier.py +1 -1
  126. snowflake/ml/modeling/ensemble/voting_regressor.py +1 -1
  127. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +1 -1
  128. snowflake/ml/modeling/feature_selection/select_fdr.py +1 -1
  129. snowflake/ml/modeling/feature_selection/select_fpr.py +1 -1
  130. snowflake/ml/modeling/feature_selection/select_fwe.py +1 -1
  131. snowflake/ml/modeling/feature_selection/select_k_best.py +1 -1
  132. snowflake/ml/modeling/feature_selection/select_percentile.py +1 -1
  133. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +5 -3
  134. snowflake/ml/modeling/feature_selection/variance_threshold.py +5 -3
  135. snowflake/ml/modeling/framework/base.py +3 -8
  136. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +1 -1
  137. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +1 -1
  138. snowflake/ml/modeling/impute/iterative_imputer.py +5 -3
  139. snowflake/ml/modeling/impute/knn_imputer.py +5 -3
  140. snowflake/ml/modeling/impute/missing_indicator.py +5 -3
  141. snowflake/ml/modeling/impute/simple_imputer.py +8 -4
  142. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +5 -3
  143. snowflake/ml/modeling/kernel_approximation/nystroem.py +5 -3
  144. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +5 -3
  145. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +5 -3
  146. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +5 -3
  147. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +1 -1
  148. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +1 -1
  149. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +1 -1
  150. snowflake/ml/modeling/linear_model/ard_regression.py +1 -1
  151. snowflake/ml/modeling/linear_model/bayesian_ridge.py +1 -1
  152. snowflake/ml/modeling/linear_model/elastic_net.py +1 -1
  153. snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
  154. snowflake/ml/modeling/linear_model/gamma_regressor.py +1 -1
  155. snowflake/ml/modeling/linear_model/huber_regressor.py +1 -1
  156. snowflake/ml/modeling/linear_model/lars.py +1 -1
  157. snowflake/ml/modeling/linear_model/lars_cv.py +1 -1
  158. snowflake/ml/modeling/linear_model/lasso.py +1 -1
  159. snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
  160. snowflake/ml/modeling/linear_model/lasso_lars.py +1 -1
  161. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -1
  162. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +1 -1
  163. snowflake/ml/modeling/linear_model/linear_regression.py +1 -1
  164. snowflake/ml/modeling/linear_model/logistic_regression.py +1 -1
  165. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +1 -1
  166. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +1 -1
  167. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
  168. snowflake/ml/modeling/linear_model/multi_task_lasso.py +1 -1
  169. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
  170. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +1 -1
  171. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +1 -1
  172. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
  173. snowflake/ml/modeling/linear_model/perceptron.py +1 -1
  174. snowflake/ml/modeling/linear_model/poisson_regressor.py +1 -1
  175. snowflake/ml/modeling/linear_model/ransac_regressor.py +1 -1
  176. snowflake/ml/modeling/linear_model/ridge.py +1 -1
  177. snowflake/ml/modeling/linear_model/ridge_classifier.py +1 -1
  178. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +1 -1
  179. snowflake/ml/modeling/linear_model/ridge_cv.py +1 -1
  180. snowflake/ml/modeling/linear_model/sgd_classifier.py +1 -1
  181. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +5 -3
  182. snowflake/ml/modeling/linear_model/sgd_regressor.py +1 -1
  183. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +1 -1
  184. snowflake/ml/modeling/linear_model/tweedie_regressor.py +1 -1
  185. snowflake/ml/modeling/manifold/isomap.py +5 -3
  186. snowflake/ml/modeling/manifold/mds.py +5 -3
  187. snowflake/ml/modeling/manifold/spectral_embedding.py +5 -3
  188. snowflake/ml/modeling/manifold/tsne.py +5 -3
  189. snowflake/ml/modeling/metrics/ranking.py +3 -0
  190. snowflake/ml/modeling/metrics/regression.py +3 -0
  191. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +5 -3
  192. snowflake/ml/modeling/mixture/gaussian_mixture.py +5 -3
  193. snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
  194. snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
  195. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +1 -1
  196. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +1 -1
  197. snowflake/ml/modeling/multiclass/output_code_classifier.py +1 -1
  198. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +1 -1
  199. snowflake/ml/modeling/naive_bayes/categorical_nb.py +1 -1
  200. snowflake/ml/modeling/naive_bayes/complement_nb.py +1 -1
  201. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +1 -1
  202. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +1 -1
  203. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +1 -1
  204. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +1 -1
  205. snowflake/ml/modeling/neighbors/kernel_density.py +5 -3
  206. snowflake/ml/modeling/neighbors/local_outlier_factor.py +5 -3
  207. snowflake/ml/modeling/neighbors/nearest_centroid.py +1 -1
  208. snowflake/ml/modeling/neighbors/nearest_neighbors.py +5 -3
  209. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +1 -1
  210. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +1 -1
  211. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
  212. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +5 -3
  213. snowflake/ml/modeling/neural_network/mlp_classifier.py +1 -1
  214. snowflake/ml/modeling/neural_network/mlp_regressor.py +1 -1
  215. snowflake/ml/modeling/pipeline/pipeline.py +6 -0
  216. snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
  217. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
  218. snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
  219. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
  220. snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
  221. snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
  222. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +53 -11
  223. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +44 -13
  224. snowflake/ml/modeling/preprocessing/polynomial_features.py +5 -3
  225. snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
  226. snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
  227. snowflake/ml/modeling/semi_supervised/label_propagation.py +1 -1
  228. snowflake/ml/modeling/semi_supervised/label_spreading.py +1 -1
  229. snowflake/ml/modeling/svm/linear_svc.py +1 -1
  230. snowflake/ml/modeling/svm/linear_svr.py +1 -1
  231. snowflake/ml/modeling/svm/nu_svc.py +1 -1
  232. snowflake/ml/modeling/svm/nu_svr.py +1 -1
  233. snowflake/ml/modeling/svm/svc.py +1 -1
  234. snowflake/ml/modeling/svm/svr.py +1 -1
  235. snowflake/ml/modeling/tree/decision_tree_classifier.py +1 -1
  236. snowflake/ml/modeling/tree/decision_tree_regressor.py +1 -1
  237. snowflake/ml/modeling/tree/extra_tree_classifier.py +1 -1
  238. snowflake/ml/modeling/tree/extra_tree_regressor.py +1 -1
  239. snowflake/ml/modeling/xgboost/xgb_classifier.py +1 -1
  240. snowflake/ml/modeling/xgboost/xgb_regressor.py +1 -1
  241. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +1 -1
  242. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +1 -1
  243. snowflake/ml/registry/_manager/model_manager.py +16 -3
  244. snowflake/ml/version.py +1 -1
  245. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/METADATA +51 -7
  246. snowflake_ml_python-1.5.4.dist-info/RECORD +389 -0
  247. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/WHEEL +1 -1
  248. snowflake_ml_python-1.5.2.dist-info/RECORD +0 -384
  249. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/LICENSE.txt +0 -0
  250. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,7 @@ from snowflake.ml._internal.exceptions import (
33
33
  dataset_errors,
34
34
  error_codes,
35
35
  exceptions as snowml_exceptions,
36
+ sql_error_codes,
36
37
  )
37
38
  from snowflake.ml._internal.utils import identifier
38
39
  from snowflake.ml._internal.utils.sql_identifier import (
@@ -131,6 +132,9 @@ _LIST_FEATURE_VIEW_SCHEMA = StructType(
131
132
  StructField("owner", StringType()),
132
133
  StructField("desc", StringType()),
133
134
  StructField("entities", ArrayType(StringType())),
135
+ StructField("refresh_freq", StringType()),
136
+ StructField("refresh_mode", StringType()),
137
+ StructField("scheduling_state", StringType()),
134
138
  ]
135
139
  )
136
140
 
@@ -267,10 +271,7 @@ class FeatureStore:
267
271
  raise snowml_exceptions.SnowflakeMLException(
268
272
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
269
273
  original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
270
- )
271
-
272
- # TODO: remove this after tag_ref_internal rollout
273
- self._use_optimized_tag_ref = self._tag_ref_internal_enabled()
274
+ ) from e
274
275
  self._check_feature_store_object_versions()
275
276
  logger.info(f"Successfully connected to feature store: {self._config.full_schema_path}.")
276
277
 
@@ -469,7 +470,12 @@ class FeatureStore:
469
470
 
470
471
  @dispatch_decorator()
471
472
  def update_feature_view(
472
- self, name: str, version: str, refresh_freq: Optional[str] = None, warehouse: Optional[str] = None
473
+ self,
474
+ name: str,
475
+ version: str,
476
+ refresh_freq: Optional[str] = None,
477
+ warehouse: Optional[str] = None,
478
+ desc: Optional[str] = None,
473
479
  ) -> FeatureView:
474
480
  """Update a registered feature view.
475
481
  Check feature_view.py for which fields are allowed to be updated after registration.
@@ -479,32 +485,67 @@ class FeatureStore:
479
485
  version: version of the FeatureView to be updated.
480
486
  refresh_freq: updated refresh frequency.
481
487
  warehouse: updated warehouse.
488
+ desc: description of feature view.
482
489
 
483
490
  Returns:
484
491
  Updated FeatureView.
485
492
 
493
+ Example::
494
+
495
+ >>> fs = FeatureStore(
496
+ ... ...,
497
+ ... default_warehouse='ORIGINAL_WH',
498
+ ... )
499
+ >>> fv = FeatureView(
500
+ ... name='foo',
501
+ ... entities=[e1, e2],
502
+ ... feature_df=session.sql('...'),
503
+ ... timestamp_col='timestamp',
504
+ ... refresh_freq='1d',
505
+ ... desc='this is old description'
506
+ ... )
507
+ >>> fv = fs.register_feature_view(feature_view=fv, version='v1')
508
+ >>> # update_feature_view will apply new arguments to the registered feature view.
509
+ >>> new_fv = fs.update_feature_view(
510
+ ... name='foo',
511
+ ... version='v1',
512
+ ... refresh_freq='2d',
513
+ ... warehouse='MY_NEW_WH',
514
+ ... desc='that is new descption',
515
+ ... )
516
+
486
517
  Raises:
487
518
  SnowflakeMLException: [RuntimeError] If FeatureView is not managed and refresh_freq is defined.
488
519
  SnowflakeMLException: [RuntimeError] Failed to update feature view.
489
520
  """
490
521
  feature_view = self.get_feature_view(name=name, version=version)
491
- if refresh_freq is not None and feature_view.status == FeatureViewStatus.STATIC:
492
- full_name = f"{feature_view.name}/{feature_view.version}"
493
- raise snowml_exceptions.SnowflakeMLException(
494
- error_code=error_codes.INVALID_ARGUMENT,
495
- original_exception=RuntimeError(f"Feature view {full_name} must be non-static so that can be updated."),
496
- )
522
+ new_desc = desc if desc is not None else feature_view.desc
497
523
 
498
- warehouse = SqlIdentifier(warehouse) if warehouse else feature_view.warehouse
524
+ if feature_view.status == FeatureViewStatus.STATIC:
525
+ if refresh_freq is not None or warehouse is not None:
526
+ full_name = f"{feature_view.name}/{feature_view.version}"
527
+ raise snowml_exceptions.SnowflakeMLException(
528
+ error_code=error_codes.INVALID_ARGUMENT,
529
+ original_exception=RuntimeError(
530
+ f"Static feature view '{full_name}' does not support refresh_freq and warehouse."
531
+ ),
532
+ )
533
+ new_query = f"""
534
+ ALTER VIEW {feature_view.fully_qualified_name()} SET
535
+ COMMENT = '{new_desc}'
536
+ """
537
+ else:
538
+ warehouse = SqlIdentifier(warehouse) if warehouse else feature_view.warehouse
539
+ # TODO(@wezhou): we need to properly handle cron expr
540
+ new_query = f"""
541
+ ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
542
+ TARGET_LAG = '{refresh_freq or feature_view.refresh_freq}'
543
+ WAREHOUSE = {warehouse}
544
+ COMMENT = '{new_desc}'
545
+ """
499
546
 
500
- # TODO(@wezhou): we need to properly handle cron expr
501
547
  try:
502
- self._session.sql(
503
- f"""ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
504
- TARGET_LAG = '{refresh_freq or feature_view.refresh_freq}'
505
- WAREHOUSE = {warehouse}
506
- """
507
- ).collect(statement_params=self._telemetry_stmp)
548
+ self._session.sql(new_query).collect(statement_params=self._telemetry_stmp)
508
549
  except Exception as e:
509
550
  raise snowml_exceptions.SnowflakeMLException(
510
551
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
@@ -559,13 +600,10 @@ class FeatureStore:
559
600
 
560
601
  if entity_name is not None:
561
602
  entity_name = SqlIdentifier(entity_name)
562
- if self._use_optimized_tag_ref:
563
- return self._optimized_find_feature_views(entity_name, feature_view_name)
564
- else:
565
- return self._find_feature_views(entity_name, feature_view_name)
603
+ return self._optimized_find_feature_views(entity_name, feature_view_name)
566
604
  else:
567
605
  output_values: List[List[Any]] = []
568
- for row in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
606
+ for row, _ in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
569
607
  self._extract_feature_view_info(row, output_values)
570
608
  return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
571
609
 
@@ -596,7 +634,90 @@ class FeatureStore:
596
634
  original_exception=ValueError(f"Failed to find FeatureView {name}/{version}: {results}"),
597
635
  )
598
636
 
599
- return self._compose_feature_view(results[0], self.list_entities().collect())
637
+ return self._compose_feature_view(results[0][0], results[0][1], self.list_entities().collect())
638
+
639
+ @dispatch_decorator()
640
+ def refresh_feature_view(self, feature_view: FeatureView) -> None:
641
+ """Manually refresh a feature view.
642
+
643
+ Args:
644
+ feature_view: A registered feature view.
645
+
646
+ Example::
647
+
648
+ >>> fs = FeatureStore(...)
649
+ >>> fv = fs.get_feature_view(name='MY_FV', version='v1')
650
+ >>> fs.refresh_feature_view(fv)
651
+ >>> fs.get_refresh_history(fv).show()
652
+ ---------------------------------------------------------------------------------------------------------------
653
+ |"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
654
+ ---------------------------------------------------------------------------------------------------------------
655
+ |MY_FV$v1 |SUCCEEDED |2024-07-02 13:45:01.11300-07:00 |2024-07-02 13:45:01.82700-07:00 |INCREMENTAL |
656
+ ---------------------------------------------------------------------------------------------------------------
657
+ """
658
+ if feature_view.status == FeatureViewStatus.STATIC:
659
+ warnings.warn(
660
+ "Static feature view can't be refreshed. You must set refresh_freq when register_feature_view().",
661
+ stacklevel=2,
662
+ category=UserWarning,
663
+ )
664
+ return
665
+ self._update_feature_view_status(feature_view, "REFRESH")
666
+
667
+ def get_refresh_history(self, feature_view: FeatureView, verbose: bool = False) -> DataFrame:
668
+ """Get refresh hisotry statistics about a feature view.
669
+
670
+ Args:
671
+ feature_view: A registered feature view.
672
+ verbose: Return more detailed history when set true.
673
+
674
+ Returns:
675
+ A dataframe contains the refresh history information.
676
+
677
+ Example::
678
+
679
+ >>> fs = FeatureStore(...)
680
+ >>> fv = fs.get_feature_view(name='MY_FV', version='v1')
681
+ >>> fs.refresh_feature_view(fv)
682
+ >>> fs.get_refresh_history(fv).show()
683
+ ---------------------------------------------------------------------------------------------------------------
684
+ |"NAME" |"STATE" |"REFRESH_START_TIME" |"REFRESH_END_TIME" |"REFRESH_ACTION" |
685
+ ---------------------------------------------------------------------------------------------------------------
686
+ |MY_FV$v1 |SUCCEEDED |2024-07-02 13:45:01.11300-07:00 |2024-07-02 13:45:01.82700-07:00 |INCREMENTAL |
687
+ ---------------------------------------------------------------------------------------------------------------
688
+ """
689
+ if feature_view.status == FeatureViewStatus.STATIC:
690
+ warnings.warn(
691
+ "Static feature view never refreshes.",
692
+ stacklevel=2,
693
+ category=UserWarning,
694
+ )
695
+ return self._session.create_dataframe([Row()])
696
+
697
+ if feature_view.status == FeatureViewStatus.DRAFT:
698
+ warnings.warn(
699
+ "This feature view has not been registered thus has no refresh history.",
700
+ stacklevel=2,
701
+ category=UserWarning,
702
+ )
703
+ return self._session.create_dataframe([Row()])
704
+
705
+ fv_resolved_name = FeatureView._get_physical_name(
706
+ feature_view.name,
707
+ feature_view.version, # type: ignore[arg-type]
708
+ ).resolved()
709
+ select_cols = "*" if verbose else "name, state, refresh_start_time, refresh_end_time, refresh_action"
710
+ return self._session.sql(
711
+ f"""
712
+ SELECT
713
+ {select_cols}
714
+ FROM TABLE (
715
+ {self._config.database}.INFORMATION_SCHEMA.DYNAMIC_TABLE_REFRESH_HISTORY ()
716
+ )
717
+ WHERE NAME = '{fv_resolved_name}'
718
+ AND SCHEMA_NAME = '{self._config.schema}'
719
+ """
720
+ )
600
721
 
601
722
  @dispatch_decorator()
602
723
  def resume_feature_view(self, feature_view: FeatureView) -> FeatureView:
@@ -807,6 +928,86 @@ class FeatureStore:
807
928
 
808
929
  return df
809
930
 
931
+ @dispatch_decorator()
932
+ def generate_training_set(
933
+ self,
934
+ spine_df: DataFrame,
935
+ features: List[Union[FeatureView, FeatureViewSlice]],
936
+ save_as: Optional[str] = None,
937
+ spine_timestamp_col: Optional[str] = None,
938
+ spine_label_cols: Optional[List[str]] = None,
939
+ exclude_columns: Optional[List[str]] = None,
940
+ include_feature_view_timestamp_col: bool = False,
941
+ ) -> DataFrame:
942
+ """
943
+ Generate a training set from the specified Spine DataFrame and Feature Views. Result is
944
+ materialized to a Snowflake Table if `save_as` is specified.
945
+
946
+ Args:
947
+ spine_df: Snowpark DataFrame to join features into.
948
+ features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
949
+ save_as: If specified, a new table containing the produced result will be created. Name can be a fully
950
+ qualified name or an unqualified name. If unqualified, defaults to the Feature Store database and schema
951
+ spine_timestamp_col: Name of timestamp column in spine_df that will be used to join
952
+ time-series features. If spine_timestamp_col is not none, the input features also must have
953
+ timestamp_col.
954
+ spine_label_cols: Name of column(s) in spine_df that contains labels.
955
+ exclude_columns: Name of column(s) to exclude from the resulting training set.
956
+ include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
957
+ (if feature view has timestamp column) if set true. Default to false.
958
+
959
+ Returns:
960
+ Returns a Snowpark DataFrame representing the training set.
961
+
962
+ Raises:
963
+ SnowflakeMLException: [RuntimeError] Materialized table name already exists
964
+ SnowflakeMLException: [RuntimeError] Failed to create materialized table.
965
+
966
+ Example::
967
+
968
+ >>> fs = FeatureStore(session, ...)
969
+ >>> fv = fs.get_feature_view("MY_FV", "1")
970
+ >>> spine_df = session.create_dataframe(["id_1", "id_2"], schema=["id"])
971
+ >>> training_set = fs.generate_training_set(
972
+ ... spine_df,
973
+ ... [fv],
974
+ ... save_as="my_training_set",
975
+ ... )
976
+ >>> print(type(training_set))
977
+ <class 'snowflake.snowpark.table.Table'>
978
+ >>> print(training_set.queries)
979
+ {'queries': ['SELECT * FROM (my_training_set)'], 'post_actions': []}
980
+
981
+ """
982
+ if spine_timestamp_col is not None:
983
+ spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
984
+ if spine_label_cols is not None:
985
+ spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
986
+
987
+ result_df, join_keys = self._join_features(
988
+ spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
989
+ )
990
+
991
+ if exclude_columns is not None:
992
+ result_df = self._exclude_columns(result_df, exclude_columns)
993
+
994
+ if save_as is not None:
995
+ try:
996
+ save_as = self._get_fully_qualified_name(save_as)
997
+ result_df.write.mode("errorifexists").save_as_table(save_as)
998
+ return self._session.table(save_as)
999
+ except SnowparkSQLException as e:
1000
+ if e.sql_error_code == sql_error_codes.OBJECT_ALREADY_EXISTS:
1001
+ raise snowml_exceptions.SnowflakeMLException(
1002
+ error_code=error_codes.OBJECT_ALREADY_EXISTS,
1003
+ original_exception=RuntimeError(str(e)),
1004
+ ) from e
1005
+ raise snowml_exceptions.SnowflakeMLException(
1006
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1007
+ original_exception=RuntimeError(f"An error occurred during training set materialization: {e}."),
1008
+ ) from e
1009
+ return result_df
1010
+
810
1011
  @overload
811
1012
  def generate_dataset(
812
1013
  self,
@@ -859,7 +1060,7 @@ class FeatureStore:
859
1060
  Args:
860
1061
  name: The name of the Dataset to be generated. Datasets are uniquely identified within a schema
861
1062
  by their name and version.
862
- spine_df: The fact table contains the raw dataset.
1063
+ spine_df: Snowpark DataFrame to join features into.
863
1064
  features: A list of FeatureView or FeatureViewSlice which contains features to be joined.
864
1065
  version: The version of the Dataset to be generated. If none specified, the current timestamp
865
1066
  will be used instead.
@@ -867,22 +1068,19 @@ class FeatureStore:
867
1068
  time-series features. If spine_timestamp_col is not none, the input features also must have
868
1069
  timestamp_col.
869
1070
  spine_label_cols: Name of column(s) in spine_df that contains labels.
870
- exclude_columns: Column names to exclude from the result dataframe.
871
- The underlying storage will still contain the columns.
1071
+ exclude_columns: Name of column(s) to exclude from the resulting training set.
872
1072
  include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
873
1073
  (if feature view has timestamp column) if set true. Default to false.
874
1074
  desc: A description about this dataset.
875
- output_type: The type of Snowflake storage to use for the generated training data.
1075
+ output_type: (Deprecated) The type of Snowflake storage to use for the generated training data.
876
1076
 
877
1077
  Returns:
878
1078
  If output_type is "dataset" (default), returns a Dataset object.
879
1079
  If output_type is "table", returns a Snowpark DataFrame representing the table.
880
1080
 
881
1081
  Raises:
882
- SnowflakeMLException: [ValueError] Dataset name/version already exists
883
- SnowflakeMLException: [ValueError] Snapshot creation failed.
884
1082
  SnowflakeMLException: [ValueError] Invalid output_type specified.
885
- SnowflakeMLException: [RuntimeError] Failed to create clone from table.
1083
+ SnowflakeMLException: [RuntimeError] Dataset name/version already exists.
886
1084
  SnowflakeMLException: [RuntimeError] Failed to find resources.
887
1085
  """
888
1086
  if output_type not in {"table", "dataset"}:
@@ -890,61 +1088,59 @@ class FeatureStore:
890
1088
  error_code=error_codes.INVALID_ARGUMENT,
891
1089
  original_exception=ValueError(f"Invalid output_type: {output_type}."),
892
1090
  )
893
- if spine_timestamp_col is not None:
894
- spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
895
- if spine_label_cols is not None:
896
- spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
897
-
898
- result_df, join_keys = self._join_features(
899
- spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
900
- )
901
1091
 
902
1092
  # Convert name to fully qualified name if not already fully qualified
903
- db_name, schema_name, object_name, _ = identifier.parse_schema_level_object_identifier(name)
904
- name = "{}.{}.{}".format(
905
- db_name or self._config.database,
906
- schema_name or self._config.schema,
907
- object_name,
908
- )
1093
+ name = self._get_fully_qualified_name(name)
909
1094
  version = version or datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
910
1095
 
911
- if exclude_columns is not None:
912
- result_df = self._exclude_columns(result_df, exclude_columns)
913
-
914
1096
  fs_meta = FeatureStoreMetadata(
915
1097
  spine_query=spine_df.queries["queries"][-1],
916
1098
  serialized_feature_views=[fv.to_json() for fv in features],
917
1099
  spine_timestamp_col=spine_timestamp_col,
918
1100
  )
919
1101
 
1102
+ # Only set a save_as name if output_type is table
1103
+ table_name = f"{name}_{version}" if output_type == "table" else None
1104
+ result_df = self.generate_training_set(
1105
+ spine_df,
1106
+ features,
1107
+ spine_timestamp_col=spine_timestamp_col,
1108
+ spine_label_cols=spine_label_cols,
1109
+ exclude_columns=exclude_columns,
1110
+ include_feature_view_timestamp_col=include_feature_view_timestamp_col,
1111
+ save_as=table_name,
1112
+ )
1113
+ if output_type == "table":
1114
+ warnings.warn(
1115
+ "Generating a table from generate_dataset() is deprecated and will be removed in a future release,"
1116
+ " use generate_training_set() instead.",
1117
+ DeprecationWarning,
1118
+ stacklevel=2,
1119
+ )
1120
+ return result_df
1121
+
920
1122
  try:
921
- if output_type == "table":
922
- table_name = f"{name}_{version}"
923
- result_df.write.mode("errorifexists").save_as_table(table_name) # type: ignore[call-overload]
924
- ds_df = self._session.table(table_name)
925
- return ds_df
926
- else:
927
- assert output_type == "dataset"
928
- if not self._is_dataset_enabled():
929
- raise snowml_exceptions.SnowflakeMLException(
930
- error_code=error_codes.SNOWML_CREATE_FAILED,
931
- original_exception=RuntimeError(
932
- "Dataset is not enabled in your account. Ask your account admin to set"
933
- ' FEATURE_DATASET=ENABLED or set output_type="table" to generate the data'
934
- " as a Snowflake Table instead."
935
- ),
936
- )
937
- ds: dataset.Dataset = dataset.create_from_dataframe(
938
- self._session,
939
- name,
940
- version,
941
- input_dataframe=result_df,
942
- exclude_cols=[spine_timestamp_col],
943
- label_cols=spine_label_cols,
944
- properties=fs_meta,
945
- comment=desc,
1123
+ assert output_type == "dataset"
1124
+ if not self._is_dataset_enabled():
1125
+ raise snowml_exceptions.SnowflakeMLException(
1126
+ error_code=error_codes.SNOWML_CREATE_FAILED,
1127
+ original_exception=RuntimeError(
1128
+ "Dataset is not enabled in your account. Ask your account admin to set"
1129
+ " FEATURE_DATASET=ENABLED or use generate_training_set() instead"
1130
+ " to generate the data as a Snowflake Table."
1131
+ ),
946
1132
  )
947
- return ds
1133
+ ds: dataset.Dataset = dataset.create_from_dataframe(
1134
+ self._session,
1135
+ name,
1136
+ version,
1137
+ input_dataframe=result_df,
1138
+ exclude_cols=[spine_timestamp_col] if spine_timestamp_col is not None else [],
1139
+ label_cols=spine_label_cols,
1140
+ properties=fs_meta,
1141
+ comment=desc,
1142
+ )
1143
+ return ds
948
1144
 
949
1145
  except dataset_errors.DatasetExistError as e:
950
1146
  raise snowml_exceptions.SnowflakeMLException(
@@ -1391,20 +1587,36 @@ class FeatureStore:
1391
1587
  return SqlIdentifier(identifier.concat_names([_ENTITY_TAG_PREFIX, raw_name]))
1392
1588
 
1393
1589
  def _get_fully_qualified_name(self, name: Union[SqlIdentifier, str]) -> str:
1394
- return f"{self._config.full_schema_path}.{name}"
1590
+ # Do a quick check to see if we can skip regex operations
1591
+ if "." not in name:
1592
+ return f"{self._config.full_schema_path}.{name}"
1593
+
1594
+ db_name, schema_name, object_name, _ = identifier.parse_schema_level_object_identifier(name)
1595
+ return "{}.{}.{}".format(
1596
+ db_name or self._config.database,
1597
+ schema_name or self._config.schema,
1598
+ object_name,
1599
+ )
1395
1600
 
1396
1601
  # TODO: SHOW DYNAMIC TABLES is very slow while other show objects are fast, investigate with DT in SNOW-902804.
1397
1602
  def _get_fv_backend_representations(
1398
1603
  self, object_name: Optional[SqlIdentifier], prefix_match: bool = False
1399
- ) -> List[Row]:
1400
- dynamic_table_results = self._find_object("DYNAMIC TABLES", object_name, prefix_match)
1401
- view_results = self._find_object("VIEWS", object_name, prefix_match)
1604
+ ) -> List[Tuple[Row, _FeatureStoreObjTypes]]:
1605
+ dynamic_table_results = [
1606
+ (d, _FeatureStoreObjTypes.MANAGED_FEATURE_VIEW)
1607
+ for d in self._find_object("DYNAMIC TABLES", object_name, prefix_match)
1608
+ ]
1609
+ view_results = [
1610
+ (d, _FeatureStoreObjTypes.EXTERNAL_FEATURE_VIEW)
1611
+ for d in self._find_object("VIEWS", object_name, prefix_match)
1612
+ ]
1402
1613
  return dynamic_table_results + view_results
1403
1614
 
1404
1615
  def _update_feature_view_status(self, feature_view: FeatureView, operation: str) -> FeatureView:
1405
1616
  assert operation in [
1406
1617
  "RESUME",
1407
1618
  "SUSPEND",
1619
+ "REFRESH",
1408
1620
  ], f"Operation: {operation} not supported"
1409
1621
  if feature_view.status == FeatureViewStatus.DRAFT or feature_view.version is None:
1410
1622
  raise snowml_exceptions.SnowflakeMLException(
@@ -1417,9 +1629,10 @@ class FeatureStore:
1417
1629
  self._session.sql(f"ALTER DYNAMIC TABLE {fully_qualified_name} {operation}").collect(
1418
1630
  statement_params=self._telemetry_stmp
1419
1631
  )
1420
- self._session.sql(f"ALTER TASK IF EXISTS {fully_qualified_name} {operation}").collect(
1421
- statement_params=self._telemetry_stmp
1422
- )
1632
+ if operation != "REFRESH":
1633
+ self._session.sql(f"ALTER TASK IF EXISTS {fully_qualified_name} {operation}").collect(
1634
+ statement_params=self._telemetry_stmp
1635
+ )
1423
1636
  except Exception as e:
1424
1637
  raise snowml_exceptions.SnowflakeMLException(
1425
1638
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
@@ -1438,46 +1651,28 @@ class FeatureStore:
1438
1651
  # TODO: this can be optimized further by directly getting all possible FVs and filter by tag
1439
1652
  # it's easier to rewrite the code once we can remove the tag_reference path
1440
1653
  all_fvs = self._get_fv_backend_representations(object_name=None)
1441
- fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
1654
+ fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r, _ in all_fvs}
1442
1655
 
1443
1656
  if len(fv_maps.keys()) == 0:
1444
1657
  return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1445
1658
 
1446
- filter_clause = f"WHERE OBJECT_NAME LIKE '{feature_view_name.resolved()}%'" if feature_view_name else ""
1447
- try:
1448
- res = self._session.sql(
1449
- f"""
1450
- SELECT
1451
- OBJECT_NAME
1452
- FROM TABLE(
1453
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1454
- TAG_NAME => '{self._get_fully_qualified_name(self._get_entity_name(entity_name))}'
1455
- )
1456
- ) {filter_clause}"""
1457
- ).collect(statement_params=self._telemetry_stmp)
1458
- except Exception as e:
1459
- raise snowml_exceptions.SnowflakeMLException(
1460
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1461
- original_exception=RuntimeError(f"Failed to find feature views' by entity {entity_name}: {e}"),
1462
- ) from e
1659
+ filters = (
1660
+ [lambda d: d["entityName"].startswith(feature_view_name.resolved())] # type: ignore[union-attr]
1661
+ if feature_view_name
1662
+ else None
1663
+ )
1664
+ res = self._lookup_tagged_objects(self._get_entity_name(entity_name), filters)
1463
1665
 
1464
1666
  output_values: List[List[Any]] = []
1465
1667
  for r in res:
1466
- row = fv_maps[SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)]
1668
+ row = fv_maps[SqlIdentifier(r["entityName"], case_sensitive=True)]
1467
1669
  self._extract_feature_view_info(row, output_values)
1468
1670
 
1469
1671
  return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
1470
1672
 
1471
1673
  def _extract_feature_view_info(self, row: Row, output_values: List[List[Any]]) -> None:
1472
1674
  name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
1473
- m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
1474
- if m is None:
1475
- raise snowml_exceptions.SnowflakeMLException(
1476
- error_code=error_codes.INTERNAL_SNOWML_ERROR,
1477
- original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
1478
- )
1479
-
1480
- fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1675
+ fv_metadata, _ = self._lookup_feature_view_metadata(row, FeatureView._get_physical_name(name, version))
1481
1676
 
1482
1677
  values: List[Any] = []
1483
1678
  values.append(name)
@@ -1488,63 +1683,41 @@ class FeatureStore:
1488
1683
  values.append(row["owner"])
1489
1684
  values.append(row["comment"])
1490
1685
  values.append(fv_metadata.entities)
1686
+ values.append(row["target_lag"] if "target_lag" in row else None)
1687
+ values.append(row["refresh_mode"] if "refresh_mode" in row else None)
1688
+ values.append(row["scheduling_state"] if "scheduling_state" in row else None)
1491
1689
  output_values.append(values)
1492
1690
 
1493
- def _find_feature_views(self, entity_name: SqlIdentifier, feature_view_name: Optional[SqlIdentifier]) -> DataFrame:
1494
- if not self._validate_entity_exists(entity_name):
1495
- return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1496
-
1497
- all_fvs = self._get_fv_backend_representations(object_name=None)
1498
- fv_maps = {SqlIdentifier(r["name"], case_sensitive=True): r for r in all_fvs}
1499
-
1500
- if len(fv_maps.keys()) == 0:
1501
- return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
1502
-
1503
- # NOTE: querying INFORMATION_SCHEMA for Entity lineage can be expensive depending on how many active
1504
- # FeatureViews there are. If this ever become an issue, consider exploring improvements.
1505
- try:
1506
- queries = [
1507
- f"""
1508
- SELECT
1509
- TAG_VALUE,
1510
- OBJECT_NAME
1511
- FROM TABLE(
1512
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1513
- '{self._get_fully_qualified_name(fv_name)}',
1514
- 'table'
1515
- )
1516
- )
1517
- WHERE LEVEL = 'TABLE'
1518
- AND TAG_NAME = '{_FEATURE_VIEW_METADATA_TAG}'
1519
- """
1520
- for fv_name in fv_maps.keys()
1521
- ]
1522
-
1523
- results = self._session.sql("\nUNION\n".join(queries)).collect(statement_params=self._telemetry_stmp)
1524
- except Exception as e:
1525
- raise snowml_exceptions.SnowflakeMLException(
1526
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1527
- original_exception=RuntimeError(f"Failed to retrieve feature views' information: {e}"),
1528
- ) from e
1529
-
1530
- output_values: List[List[Any]] = []
1531
- for r in results:
1532
- fv_metadata = _FeatureViewMetadata.from_json(r["TAG_VALUE"])
1533
- for retrieved_entity in fv_metadata.entities:
1534
- if entity_name == SqlIdentifier(retrieved_entity, case_sensitive=True):
1535
- fv_name, _ = r["OBJECT_NAME"].split(_FEATURE_VIEW_NAME_DELIMITER)
1536
- fv_name = SqlIdentifier(fv_name, case_sensitive=True)
1537
- obj_name = SqlIdentifier(r["OBJECT_NAME"], case_sensitive=True)
1538
- if feature_view_name is not None:
1539
- if fv_name == feature_view_name:
1540
- self._extract_feature_view_info(fv_maps[obj_name], output_values)
1541
- else:
1542
- continue
1543
- else:
1544
- self._extract_feature_view_info(fv_maps[obj_name], output_values)
1545
- return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
1691
+ def _lookup_feature_view_metadata(self, row: Row, fv_name: str) -> Tuple[_FeatureViewMetadata, str]:
1692
+ if len(row["text"]) == 0:
1693
+ # NOTE: if this is a shared feature view, then text column will be empty due to privacy constraints.
1694
+ # So instead of looking at original query text, we will obtain metadata by querying the tag value.
1695
+ # For query body, we will just use a simple select instead of original DDL query since shared feature views
1696
+ # are read-only.
1697
+ try:
1698
+ res = self._lookup_tags(
1699
+ domain="table", obj_name=fv_name, filter_fns=[lambda d: d["tagName"] == _FEATURE_VIEW_METADATA_TAG]
1700
+ )
1701
+ fv_metadata = _FeatureViewMetadata.from_json(res[0]["tagValue"])
1702
+ query = f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}"
1703
+ return (fv_metadata, query)
1704
+ except Exception as e:
1705
+ raise snowml_exceptions.SnowflakeMLException(
1706
+ error_code=error_codes.INTERNAL_SNOWML_ERROR,
1707
+ original_exception=RuntimeError(f"Failed to extract feature_view metadata for {fv_name}: {e}."),
1708
+ )
1709
+ else:
1710
+ m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
1711
+ if m is None:
1712
+ raise snowml_exceptions.SnowflakeMLException(
1713
+ error_code=error_codes.INTERNAL_SNOWML_ERROR,
1714
+ original_exception=RuntimeError(f"Failed to parse query text for FeatureView {fv_name}: {row}."),
1715
+ )
1716
+ fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1717
+ query = m.group("query")
1718
+ return (fv_metadata, query)
1546
1719
 
1547
- def _compose_feature_view(self, row: Row, entity_list: List[Row]) -> FeatureView:
1720
+ def _compose_feature_view(self, row: Row, obj_type: _FeatureStoreObjTypes, entity_list: List[Row]) -> FeatureView:
1548
1721
  def find_and_compose_entity(name: str) -> Entity:
1549
1722
  name = SqlIdentifier(name).resolved()
1550
1723
  for e in entity_list:
@@ -1558,21 +1731,14 @@ class FeatureStore:
1558
1731
 
1559
1732
  name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
1560
1733
  name = SqlIdentifier(name, case_sensitive=True)
1561
- m = re.match(_DT_OR_VIEW_QUERY_PATTERN, row["text"])
1562
- if m is None:
1563
- raise snowml_exceptions.SnowflakeMLException(
1564
- error_code=error_codes.INTERNAL_SNOWML_ERROR,
1565
- original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
1566
- )
1567
-
1568
1734
  fv_name = FeatureView._get_physical_name(name, version)
1735
+ fv_metadata, query = self._lookup_feature_view_metadata(row, fv_name)
1736
+
1569
1737
  infer_schema_df = self._session.sql(f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}")
1738
+ desc = row["comment"]
1570
1739
 
1571
- if m.group("obj_type") == "DYNAMIC TABLE":
1572
- query = m.group("query")
1740
+ if obj_type == _FeatureStoreObjTypes.MANAGED_FEATURE_VIEW:
1573
1741
  df = self._session.sql(query)
1574
- desc = m.group("comment")
1575
- fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1576
1742
  entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1577
1743
  ts_col = fv_metadata.timestamp_col
1578
1744
  timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
@@ -1584,23 +1750,25 @@ class FeatureStore:
1584
1750
  timestamp_col=timestamp_col,
1585
1751
  desc=desc,
1586
1752
  version=version,
1587
- status=FeatureViewStatus(row["scheduling_state"]),
1753
+ status=FeatureViewStatus(row["scheduling_state"])
1754
+ if len(row["scheduling_state"]) > 0
1755
+ else FeatureViewStatus.MASKED,
1588
1756
  feature_descs=self._fetch_column_descs("DYNAMIC TABLE", fv_name),
1589
1757
  refresh_freq=row["target_lag"],
1590
1758
  database=self._config.database.identifier(),
1591
1759
  schema=self._config.schema.identifier(),
1592
- warehouse=SqlIdentifier(row["warehouse"], case_sensitive=True).identifier(),
1760
+ warehouse=SqlIdentifier(row["warehouse"], case_sensitive=True).identifier()
1761
+ if len(row["warehouse"]) > 0
1762
+ else None,
1593
1763
  refresh_mode=row["refresh_mode"],
1594
1764
  refresh_mode_reason=row["refresh_mode_reason"],
1595
1765
  owner=row["owner"],
1596
1766
  infer_schema_df=infer_schema_df,
1767
+ session=self._session,
1597
1768
  )
1598
1769
  return fv
1599
1770
  else:
1600
- query = m.group("query")
1601
1771
  df = self._session.sql(query)
1602
- desc = m.group("comment")
1603
- fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1604
1772
  entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1605
1773
  ts_col = fv_metadata.timestamp_col
1606
1774
  timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
@@ -1622,6 +1790,7 @@ class FeatureStore:
1622
1790
  refresh_mode_reason=None,
1623
1791
  owner=row["owner"],
1624
1792
  infer_schema_df=infer_schema_df,
1793
+ session=self._session,
1625
1794
  )
1626
1795
  return fv
1627
1796
 
@@ -1675,42 +1844,10 @@ class FeatureStore:
1675
1844
  )
1676
1845
  # There could be none-FS objects under FS schema, thus filter on objects with FS special tag.
1677
1846
  if object_type not in tag_free_object_types and len(all_rows) > 0:
1678
- if self._use_optimized_tag_ref:
1679
- fs_obj_rows = self._session.sql(
1680
- f"""
1681
- SELECT
1682
- OBJECT_NAME
1683
- FROM TABLE(
1684
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1685
- TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
1686
- )
1687
- )
1688
- WHERE DOMAIN='{obj_domain}'
1689
- """
1690
- ).collect(statement_params=self._telemetry_stmp)
1691
- else:
1692
- # TODO: remove this after tag_ref_internal rollout
1693
- # Note: <object_name> in TAG_REFERENCES(<object_name>) is case insensitive,
1694
- # use double quotes to make it case-sensitive.
1695
- queries = [
1696
- f"""
1697
- SELECT OBJECT_NAME
1698
- FROM TABLE(
1699
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES(
1700
- '{self._get_fully_qualified_name(SqlIdentifier(row['name'], case_sensitive=True))}',
1701
- '{obj_domain}'
1702
- )
1703
- )
1704
- WHERE TAG_NAME = '{_FEATURE_STORE_OBJECT_TAG}'
1705
- AND TAG_SCHEMA = '{self._config.schema.resolved()}'
1706
- """
1707
- for row in all_rows
1708
- ]
1709
- fs_obj_rows = self._session.sql("\nUNION\n".join(queries)).collect(
1710
- statement_params=self._telemetry_stmp
1711
- )
1712
-
1713
- fs_tag_objects = [row["OBJECT_NAME"] for row in fs_obj_rows]
1847
+ fs_obj_rows = self._lookup_tagged_objects(
1848
+ _FEATURE_STORE_OBJECT_TAG, [lambda d: d["domain"] == obj_domain]
1849
+ )
1850
+ fs_tag_objects = [row["entityName"] for row in fs_obj_rows]
1714
1851
  except Exception as e:
1715
1852
  raise snowml_exceptions.SnowflakeMLException(
1716
1853
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
@@ -1756,21 +1893,6 @@ class FeatureStore:
1756
1893
  )
1757
1894
  return cast(DataFrame, df.drop(exclude_columns))
1758
1895
 
1759
- def _tag_ref_internal_enabled(self) -> bool:
1760
- try:
1761
- self._session.sql(
1762
- f"""
1763
- SELECT * FROM TABLE(
1764
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1765
- TAG_NAME => '{_FEATURE_STORE_OBJECT_TAG}'
1766
- )
1767
- ) LIMIT 1;
1768
- """
1769
- ).collect()
1770
- return True
1771
- except Exception:
1772
- return False
1773
-
1774
1896
  def _is_dataset_enabled(self) -> bool:
1775
1897
  try:
1776
1898
  self._session.sql(f"SHOW DATASETS IN SCHEMA {self._config.full_schema_path}").collect()
@@ -1790,21 +1912,96 @@ class FeatureStore:
1790
1912
  category=UserWarning,
1791
1913
  )
1792
1914
 
1793
- def _collapse_object_versions(self) -> List[pkg_version.Version]:
1794
- if not self._use_optimized_tag_ref:
1795
- return []
1915
+ def _filter_results(
1916
+ self, results: List[Dict[str, str]], filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
1917
+ ) -> List[Dict[str, str]]:
1918
+ if filter_fns is None:
1919
+ return results
1920
+
1921
+ filtered_results = []
1922
+ for r in results:
1923
+ if all([fn(r) for fn in filter_fns]):
1924
+ filtered_results.append(r)
1925
+ return filtered_results
1926
+
1927
+ def _lookup_tags(
1928
+ self, domain: str, obj_name: str, filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
1929
+ ) -> List[Dict[str, str]]:
1930
+ """
1931
+ Lookup tag values for a given object, optionally apply filters on the results.
1932
+
1933
+ Args:
1934
+ domain: Domain of the obj to look for tag. E.g. table
1935
+ obj_name: Name of the obj.
1936
+ filter_fns: List of filter functions applied on the results.
1937
+
1938
+ Returns:
1939
+ List of tag values in dictionary format.
1940
+
1941
+ Raises:
1942
+ SnowflakeMLException: [RuntimeError] Failed to lookup tags.
1943
+
1944
+ Example::
1945
+ self._lookup_tags("TABLE", "MY_FV", [lambda d: d["tagName"] == "TARGET_TAG_NAME"])
1796
1946
 
1797
- query = f"""
1798
- SELECT
1799
- TAG_VALUE
1800
- FROM TABLE(
1801
- {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1802
- TAG_NAME => '{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}'
1803
- )
1804
- )
1805
1947
  """
1948
+ # NOTE: use ENTITY_DETAIL system fn to query tags for given object for it to work in
1949
+ # processes using owner's right. e.g. Streamlit, or stored procedure
1950
+ try:
1951
+ res = self._session.sql(
1952
+ f"""
1953
+ SELECT ENTITY_DETAIL('{domain}','{self._get_fully_qualified_name(obj_name)}', '["TAG_REFERENCES"]');
1954
+ """
1955
+ ).collect(statement_params=self._telemetry_stmp)
1956
+ entity_detail = json.loads(res[0][0])
1957
+ results = entity_detail["tagReferencesInfo"]["tagReferenceList"]
1958
+ return self._filter_results(results, filter_fns)
1959
+ except Exception as e:
1960
+ raise snowml_exceptions.SnowflakeMLException(
1961
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1962
+ original_exception=RuntimeError(f"Failed to lookup tags for object for {obj_name}: {e}"),
1963
+ ) from e
1964
+
1965
+ def _lookup_tagged_objects(
1966
+ self, tag_name: str, filter_fns: Optional[List[Callable[[Dict[str, str]], bool]]] = None
1967
+ ) -> List[Dict[str, str]]:
1968
+ """
1969
+ Lookup objects based on specified tag name, optionally apply filters on the results.
1970
+
1971
+ Args:
1972
+ tag_name: Name of the tag.
1973
+ filter_fns: List of filter functions applied on the results.
1974
+
1975
+ Returns:
1976
+ List of objects in dictionary format.
1977
+
1978
+ Raises:
1979
+ SnowflakeMLException: [RuntimeError] Failed to lookup tagged objects.
1980
+
1981
+ Example::
1982
+ self._lookup_tagged_objects("TARGET_TAG_NAME", [lambda d: d["entityName"] == "MY_FV"])
1983
+
1984
+ """
1985
+ # NOTE: use ENTITY_DETAIL system fn to query objects from tag for it to work in
1986
+ # processes using owner's right. e.g. Streamlit, or stored procedure
1987
+ try:
1988
+ res = self._session.sql(
1989
+ f"""
1990
+ SELECT ENTITY_DETAIL('TAG','{self._get_fully_qualified_name(tag_name)}', '["TAG_REFERENCES_INTERNAL"]');
1991
+ """
1992
+ ).collect(statement_params=self._telemetry_stmp)
1993
+ entity_detail = json.loads(res[0][0])
1994
+ results = entity_detail["referencedEntities"]["tagReferenceList"]
1995
+ return self._filter_results(results, filter_fns)
1996
+ except Exception as e:
1997
+ raise snowml_exceptions.SnowflakeMLException(
1998
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
1999
+ original_exception=RuntimeError(f"Failed to lookup tagged objects for {tag_name}: {e}"),
2000
+ ) from e
2001
+
2002
+ def _collapse_object_versions(self) -> List[pkg_version.Version]:
1806
2003
  try:
1807
- res = self._session.sql(query).collect(statement_params=self._telemetry_stmp)
2004
+ res = self._lookup_tagged_objects(_FEATURE_STORE_OBJECT_TAG)
1808
2005
  except Exception:
1809
2006
  # since this is a best effort user warning to upgrade pkg versions
1810
2007
  # we are treating failures as benign error
@@ -1812,7 +2009,7 @@ class FeatureStore:
1812
2009
  versions = set()
1813
2010
  compatibility_breakage_detected = False
1814
2011
  for r in res:
1815
- info = _FeatureStoreObjInfo.from_json(r["TAG_VALUE"])
2012
+ info = _FeatureStoreObjInfo.from_json(r["tagValue"])
1816
2013
  if info.type == _FeatureStoreObjTypes.UNKNOWN:
1817
2014
  compatibility_breakage_detected = True
1818
2015
  versions.add(pkg_version.parse(info.pkg_version))