snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. snowflake/cortex/_sentiment.py +7 -4
  2. snowflake/ml/_internal/env_utils.py +6 -0
  3. snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
  4. snowflake/ml/_internal/telemetry.py +1 -0
  5. snowflake/ml/_internal/utils/identifier.py +1 -1
  6. snowflake/ml/_internal/utils/sql_identifier.py +14 -1
  7. snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
  8. snowflake/ml/dataset/__init__.py +2 -1
  9. snowflake/ml/dataset/dataset.py +4 -3
  10. snowflake/ml/dataset/dataset_reader.py +5 -8
  11. snowflake/ml/feature_store/__init__.py +6 -0
  12. snowflake/ml/feature_store/access_manager.py +283 -0
  13. snowflake/ml/feature_store/feature_store.py +160 -100
  14. snowflake/ml/feature_store/feature_view.py +30 -19
  15. snowflake/ml/fileset/embedded_stage_fs.py +15 -12
  16. snowflake/ml/fileset/snowfs.py +2 -30
  17. snowflake/ml/fileset/stage_fs.py +25 -7
  18. snowflake/ml/model/_client/model/model_impl.py +46 -39
  19. snowflake/ml/model/_client/model/model_version_impl.py +24 -2
  20. snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
  21. snowflake/ml/model/_client/ops/model_ops.py +174 -16
  22. snowflake/ml/model/_client/sql/_base.py +34 -0
  23. snowflake/ml/model/_client/sql/model.py +32 -39
  24. snowflake/ml/model/_client/sql/model_version.py +111 -42
  25. snowflake/ml/model/_client/sql/stage.py +6 -32
  26. snowflake/ml/model/_client/sql/tag.py +32 -56
  27. snowflake/ml/model/_model_composer/model_composer.py +8 -4
  28. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
  29. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
  30. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
  31. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +90 -142
  32. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
  33. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +81 -3
  34. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
  35. snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
  36. snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
  37. snowflake/ml/modeling/cluster/birch.py +8 -1
  38. snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
  39. snowflake/ml/modeling/cluster/dbscan.py +8 -1
  40. snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
  41. snowflake/ml/modeling/cluster/k_means.py +8 -1
  42. snowflake/ml/modeling/cluster/mean_shift.py +8 -1
  43. snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
  44. snowflake/ml/modeling/cluster/optics.py +8 -1
  45. snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
  46. snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
  47. snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
  48. snowflake/ml/modeling/compose/column_transformer.py +8 -1
  49. snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
  50. snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
  51. snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
  52. snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
  53. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
  54. snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
  55. snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
  56. snowflake/ml/modeling/covariance/oas.py +8 -1
  57. snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
  58. snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
  59. snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
  60. snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
  61. snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
  62. snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
  63. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
  64. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
  65. snowflake/ml/modeling/decomposition/pca.py +8 -1
  66. snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
  67. snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
  68. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
  69. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
  70. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
  71. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
  72. snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
  73. snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
  74. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
  75. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
  76. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
  77. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
  79. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
  80. snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
  81. snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
  82. snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
  83. snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
  84. snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
  85. snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
  86. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
  87. snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
  88. snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
  89. snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
  90. snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
  91. snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
  92. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
  93. snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
  94. snowflake/ml/modeling/framework/base.py +4 -3
  95. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
  96. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
  97. snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
  98. snowflake/ml/modeling/impute/knn_imputer.py +8 -1
  99. snowflake/ml/modeling/impute/missing_indicator.py +8 -1
  100. snowflake/ml/modeling/impute/simple_imputer.py +21 -2
  101. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
  102. snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
  103. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
  104. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
  105. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
  106. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
  107. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
  108. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
  109. snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
  110. snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
  111. snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
  112. snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
  113. snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
  114. snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
  115. snowflake/ml/modeling/linear_model/lars.py +8 -1
  116. snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
  117. snowflake/ml/modeling/linear_model/lasso.py +8 -1
  118. snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
  119. snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
  120. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
  121. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
  122. snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
  123. snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
  124. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
  125. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
  126. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
  127. snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
  128. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
  129. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
  130. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
  131. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
  132. snowflake/ml/modeling/linear_model/perceptron.py +8 -1
  133. snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
  134. snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
  135. snowflake/ml/modeling/linear_model/ridge.py +8 -1
  136. snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
  137. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
  138. snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
  139. snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
  140. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
  141. snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
  142. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
  143. snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
  144. snowflake/ml/modeling/manifold/isomap.py +8 -1
  145. snowflake/ml/modeling/manifold/mds.py +8 -1
  146. snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
  147. snowflake/ml/modeling/manifold/tsne.py +8 -1
  148. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
  149. snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
  150. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
  151. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
  152. snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
  153. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
  154. snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
  155. snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
  156. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
  157. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
  158. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
  159. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
  160. snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
  161. snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
  162. snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
  163. snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
  164. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
  165. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
  166. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
  167. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
  168. snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
  169. snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
  170. snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
  171. snowflake/ml/modeling/pipeline/pipeline.py +27 -7
  172. snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
  173. snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
  174. snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
  175. snowflake/ml/modeling/svm/linear_svc.py +8 -1
  176. snowflake/ml/modeling/svm/linear_svr.py +8 -1
  177. snowflake/ml/modeling/svm/nu_svc.py +8 -1
  178. snowflake/ml/modeling/svm/nu_svr.py +8 -1
  179. snowflake/ml/modeling/svm/svc.py +8 -1
  180. snowflake/ml/modeling/svm/svr.py +8 -1
  181. snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
  182. snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
  183. snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
  184. snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
  185. snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
  186. snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
  187. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
  188. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
  189. snowflake/ml/registry/_manager/model_manager.py +95 -8
  190. snowflake/ml/registry/registry.py +10 -1
  191. snowflake/ml/version.py +1 -1
  192. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +66 -10
  193. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +196 -192
  194. snowflake/ml/_internal/lineage/dataset_dataframe.py +0 -44
  195. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
  196. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
  197. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,19 @@ import re
8
8
  import warnings
9
9
  from dataclasses import dataclass
10
10
  from enum import Enum
11
- from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast
11
+ from typing import (
12
+ Any,
13
+ Callable,
14
+ Dict,
15
+ List,
16
+ Literal,
17
+ Optional,
18
+ Tuple,
19
+ TypeVar,
20
+ Union,
21
+ cast,
22
+ overload,
23
+ )
12
24
 
13
25
  import packaging.version as pkg_version
14
26
  import snowflake.ml.version as snowml_version
@@ -32,7 +44,7 @@ from snowflake.ml.feature_store.entity import _ENTITY_NAME_LENGTH_LIMIT, Entity
32
44
  from snowflake.ml.feature_store.feature_view import (
33
45
  _FEATURE_OBJ_TYPE,
34
46
  _FEATURE_VIEW_NAME_DELIMITER,
35
- _TIMESTAMP_COL_PLACEHOLDER,
47
+ _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS,
36
48
  FeatureView,
37
49
  FeatureViewSlice,
38
50
  FeatureViewStatus,
@@ -242,23 +254,16 @@ class FeatureStore:
242
254
 
243
255
  else:
244
256
  try:
245
- self._session.sql(f"CREATE SCHEMA IF NOT EXISTS {self._config.full_schema_path}").collect(
246
- statement_params=self._telemetry_stmp
247
- )
248
- for tag in to_sql_identifiers(
249
- [
250
- _FEATURE_VIEW_METADATA_TAG,
251
- ]
252
- ):
257
+ # Explicitly check if schema exists first since we may not have CREATE SCHEMA privilege
258
+ if len(self._find_object("SCHEMAS", self._config.schema)) == 0:
259
+ self._session.sql(f"CREATE SCHEMA IF NOT EXISTS {self._config.full_schema_path}").collect(
260
+ statement_params=self._telemetry_stmp
261
+ )
262
+ for tag in to_sql_identifiers([_FEATURE_VIEW_METADATA_TAG, _FEATURE_STORE_OBJECT_TAG]):
253
263
  self._session.sql(f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(tag)}").collect(
254
264
  statement_params=self._telemetry_stmp
255
265
  )
256
-
257
- self._session.sql(
258
- f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}"
259
- ).collect(statement_params=self._telemetry_stmp)
260
266
  except Exception as e:
261
- self.clear()
262
267
  raise snowml_exceptions.SnowflakeMLException(
263
268
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
264
269
  original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
@@ -750,7 +755,7 @@ class FeatureStore:
750
755
  except Exception as e:
751
756
  raise snowml_exceptions.SnowflakeMLException(
752
757
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
753
- original_exception=RuntimeError(f"Failed to alter schema or drop tag: {e}."),
758
+ original_exception=RuntimeError(f"Failed to delete entity: {e}."),
754
759
  ) from e
755
760
  logger.info(f"Deleted Entity {name}.")
756
761
 
@@ -802,7 +807,7 @@ class FeatureStore:
802
807
 
803
808
  return df
804
809
 
805
- @dispatch_decorator()
810
+ @overload
806
811
  def generate_dataset(
807
812
  self,
808
813
  name: str,
@@ -814,7 +819,40 @@ class FeatureStore:
814
819
  exclude_columns: Optional[List[str]] = None,
815
820
  include_feature_view_timestamp_col: bool = False,
816
821
  desc: str = "",
822
+ output_type: Literal["dataset"] = "dataset",
817
823
  ) -> dataset.Dataset:
824
+ ...
825
+
826
+ @overload
827
+ def generate_dataset(
828
+ self,
829
+ name: str,
830
+ spine_df: DataFrame,
831
+ features: List[Union[FeatureView, FeatureViewSlice]],
832
+ output_type: Literal["table"],
833
+ version: Optional[str] = None,
834
+ spine_timestamp_col: Optional[str] = None,
835
+ spine_label_cols: Optional[List[str]] = None,
836
+ exclude_columns: Optional[List[str]] = None,
837
+ include_feature_view_timestamp_col: bool = False,
838
+ desc: str = "",
839
+ ) -> DataFrame:
840
+ ...
841
+
842
+ @dispatch_decorator() # type: ignore[misc]
843
+ def generate_dataset(
844
+ self,
845
+ name: str,
846
+ spine_df: DataFrame,
847
+ features: List[Union[FeatureView, FeatureViewSlice]],
848
+ version: Optional[str] = None,
849
+ spine_timestamp_col: Optional[str] = None,
850
+ spine_label_cols: Optional[List[str]] = None,
851
+ exclude_columns: Optional[List[str]] = None,
852
+ include_feature_view_timestamp_col: bool = False,
853
+ desc: str = "",
854
+ output_type: Literal["dataset", "table"] = "dataset",
855
+ ) -> Union[dataset.Dataset, DataFrame]:
818
856
  """
819
857
  Generate dataset by given source table and feature views.
820
858
 
@@ -834,30 +872,29 @@ class FeatureStore:
834
872
  include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
835
873
  (if feature view has timestamp column) if set true. Default to false.
836
874
  desc: A description about this dataset.
875
+ output_type: The type of Snowflake storage to use for the generated training data.
837
876
 
838
877
  Returns:
839
- A Dataset object.
878
+ If output_type is "dataset" (default), returns a Dataset object.
879
+ If output_type is "table", returns a Snowpark DataFrame representing the table.
840
880
 
841
881
  Raises:
842
- SnowflakeMLException: [ValueError] spine_df contains more than one query.
843
882
  SnowflakeMLException: [ValueError] Dataset name/version already exists
844
883
  SnowflakeMLException: [ValueError] Snapshot creation failed.
884
+ SnowflakeMLException: [ValueError] Invalid output_type specified.
845
885
  SnowflakeMLException: [RuntimeError] Failed to create clone from table.
846
886
  SnowflakeMLException: [RuntimeError] Failed to find resources.
847
887
  """
888
+ if output_type not in {"table", "dataset"}:
889
+ raise snowml_exceptions.SnowflakeMLException(
890
+ error_code=error_codes.INVALID_ARGUMENT,
891
+ original_exception=ValueError(f"Invalid output_type: {output_type}."),
892
+ )
848
893
  if spine_timestamp_col is not None:
849
894
  spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
850
895
  if spine_label_cols is not None:
851
896
  spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
852
897
 
853
- if len(spine_df.queries["queries"]) != 1:
854
- raise snowml_exceptions.SnowflakeMLException(
855
- error_code=error_codes.INVALID_ARGUMENT,
856
- original_exception=ValueError(
857
- f"spine_df must contain only one query. Got: {spine_df.queries['queries']}"
858
- ),
859
- )
860
-
861
898
  result_df, join_keys = self._join_features(
862
899
  spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
863
900
  )
@@ -875,33 +912,49 @@ class FeatureStore:
875
912
  result_df = self._exclude_columns(result_df, exclude_columns)
876
913
 
877
914
  fs_meta = FeatureStoreMetadata(
878
- spine_query=spine_df.queries["queries"][0],
915
+ spine_query=spine_df.queries["queries"][-1],
879
916
  serialized_feature_views=[fv.to_json() for fv in features],
880
917
  spine_timestamp_col=spine_timestamp_col,
881
918
  )
882
919
 
883
920
  try:
884
- ds: dataset.Dataset = dataset.create_from_dataframe(
885
- self._session,
886
- name,
887
- version,
888
- input_dataframe=result_df,
889
- exclude_cols=[spine_timestamp_col],
890
- label_cols=spine_label_cols,
891
- properties=fs_meta,
892
- comment=desc,
893
- )
894
- return ds
921
+ if output_type == "table":
922
+ table_name = f"{name}_{version}"
923
+ result_df.write.mode("errorifexists").save_as_table(table_name) # type: ignore[call-overload]
924
+ ds_df = self._session.table(table_name)
925
+ return ds_df
926
+ else:
927
+ assert output_type == "dataset"
928
+ if not self._is_dataset_enabled():
929
+ raise snowml_exceptions.SnowflakeMLException(
930
+ error_code=error_codes.SNOWML_CREATE_FAILED,
931
+ original_exception=RuntimeError(
932
+ "Dataset is not enabled in your account. Ask your account admin to set"
933
+ ' FEATURE_DATASET=ENABLED or set output_type="table" to generate the data'
934
+ " as a Snowflake Table instead."
935
+ ),
936
+ )
937
+ ds: dataset.Dataset = dataset.create_from_dataframe(
938
+ self._session,
939
+ name,
940
+ version,
941
+ input_dataframe=result_df,
942
+ exclude_cols=[spine_timestamp_col],
943
+ label_cols=spine_label_cols,
944
+ properties=fs_meta,
945
+ comment=desc,
946
+ )
947
+ return ds
895
948
 
896
949
  except dataset_errors.DatasetExistError as e:
897
950
  raise snowml_exceptions.SnowflakeMLException(
898
951
  error_code=error_codes.OBJECT_ALREADY_EXISTS,
899
- original_exception=ValueError(str(e)),
952
+ original_exception=RuntimeError(str(e)),
900
953
  ) from e
901
954
  except SnowparkSQLException as e:
902
955
  raise snowml_exceptions.SnowflakeMLException(
903
956
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
904
- original_exception=RuntimeError(f"An error occurred during Dataset generation: {e}."),
957
+ original_exception=RuntimeError(f"An error occurred during dataset generation: {e}."),
905
958
  ) from e
906
959
 
907
960
  @dispatch_decorator()
@@ -930,52 +983,47 @@ class FeatureStore:
930
983
  return self._load_serialized_feature_objects(source_meta.properties.serialized_feature_views)
931
984
 
932
985
  @dispatch_decorator()
933
- def clear(self) -> None:
986
+ def _clear(self, dryrun: bool = True) -> None:
934
987
  """
935
- Clear all feature store internal objects including feature views, entities etc. Note feature store
936
- instance (snowflake schema) won't be deleted. Use snowflake to delete feature store instance.
988
+ Clear all feature views and entities. Note Feature Store schema and metadata will NOT be purged
989
+ together. Use SQL to delete schema and metadata instead.
937
990
 
938
- Raises:
939
- SnowflakeMLException: [RuntimeError] Failed to clear feature store.
991
+ Args:
992
+ dryrun: Print a list of objects will be deleted but not actually perform the deletion when true.
940
993
  """
941
- try:
942
- result = self._session.sql(
943
- f"""
944
- SELECT *
945
- FROM {self._config.database}.INFORMATION_SCHEMA.SCHEMATA
946
- WHERE SCHEMA_NAME = '{self._config.schema.resolved()}'
947
- """
948
- ).collect()
949
- if len(result) == 0:
950
- return
951
-
952
- fs_obj_tag = self._find_object("TAGS", SqlIdentifier(_FEATURE_STORE_OBJECT_TAG))
953
- if len(fs_obj_tag) == 0:
954
- return
955
-
956
- object_types = ["DYNAMIC TABLES", "DATASETS", "VIEWS", "TASKS"]
957
- for obj_type in object_types:
958
- all_object_rows = self._find_object(obj_type, None)
959
- for row in all_object_rows:
960
- obj_name = self._get_fully_qualified_name(SqlIdentifier(row["name"], case_sensitive=True))
961
- self._session.sql(f"DROP {obj_type[:-1]} {obj_name}").collect()
962
- logger.info(f"Deleted {obj_type[:-1]}: {obj_name}.")
963
-
964
- entity_tags = self._find_object("TAGS", SqlIdentifier(_ENTITY_TAG_PREFIX), prefix_match=True)
965
- all_tags = [
966
- _FEATURE_STORE_OBJECT_TAG,
967
- _FEATURE_VIEW_METADATA_TAG,
968
- ] + [SqlIdentifier(row["name"], case_sensitive=True) for row in entity_tags]
969
- for tag_name in all_tags:
970
- obj_name = self._get_fully_qualified_name(tag_name)
971
- self._session.sql(f"DROP TAG IF EXISTS {obj_name}").collect()
972
- logger.info(f"Deleted TAG: {obj_name}.")
994
+ warnings.warn(
995
+ "It will clear ALL feature views and entities in this Feature Store. Make sure your role"
996
+ " has sufficient access to all feature views and entities. Insufficient access to some feature"
997
+ " views or entities will leave Feature Store in an incomplete state.",
998
+ stacklevel=2,
999
+ category=UserWarning,
1000
+ )
1001
+
1002
+ all_fvs_df = self.list_feature_views()
1003
+ all_entities_df = self.list_entities()
1004
+ all_fvs_rows = all_fvs_df.collect()
1005
+ all_entities_rows = all_entities_df.collect()
1006
+
1007
+ if dryrun:
1008
+ logger.info(
1009
+ "Following feature views and entities will be deleted."
1010
+ + " Set 'dryrun=False' to perform the actual deletion."
1011
+ )
1012
+ logger.info(f"Total {len(all_fvs_rows)} Feature views to be deleted:")
1013
+ all_fvs_df.show(n=len(all_fvs_rows))
1014
+ logger.info(f"\nTotal {len(all_entities_rows)} entities to be deleted:")
1015
+ all_entities_df.show(n=len(all_entities_rows))
1016
+ return
1017
+
1018
+ for fv_row in all_fvs_rows:
1019
+ fv = self.get_feature_view(
1020
+ SqlIdentifier(fv_row["NAME"], case_sensitive=True).identifier(), fv_row["VERSION"]
1021
+ )
1022
+ self.delete_feature_view(fv)
1023
+
1024
+ for entity_row in all_entities_rows:
1025
+ self.delete_entity(SqlIdentifier(entity_row["NAME"], case_sensitive=True).identifier())
973
1026
 
974
- except Exception as e:
975
- raise snowml_exceptions.SnowflakeMLException(
976
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
977
- original_exception=RuntimeError(f"Failed to clear feature store {self._config.full_schema_path}: {e}."),
978
- ) from e
979
1027
  logger.info(f"Feature store {self._config.full_schema_path} has been cleared.")
980
1028
 
981
1029
  def _get_feature_view_if_exists(self, name: str, version: str) -> FeatureView:
@@ -1093,14 +1141,6 @@ class FeatureStore:
1093
1141
  spine_timestamp_col: Optional[SqlIdentifier],
1094
1142
  include_feature_view_timestamp_col: bool,
1095
1143
  ) -> Tuple[DataFrame, List[SqlIdentifier]]:
1096
- if len(spine_df.queries["queries"]) != 1:
1097
- raise snowml_exceptions.SnowflakeMLException(
1098
- error_code=error_codes.INVALID_ARGUMENT,
1099
- original_exception=ValueError(
1100
- f"spine_df must contain only one query. Got: {spine_df.queries['queries']}"
1101
- ),
1102
- )
1103
-
1104
1144
  for f in features:
1105
1145
  f = f.feature_view_ref if isinstance(f, FeatureViewSlice) else f
1106
1146
  if f.status == FeatureViewStatus.DRAFT:
@@ -1122,7 +1162,7 @@ class FeatureStore:
1122
1162
  self._asof_join_enabled = self._is_asof_join_enabled()
1123
1163
 
1124
1164
  # TODO: leverage Snowpark dataframe for more concise syntax once it supports AsOfJoin
1125
- query = spine_df.queries["queries"][0]
1165
+ query = spine_df.queries["queries"][-1]
1126
1166
  layer = 0
1127
1167
  for f in features:
1128
1168
  if isinstance(f, FeatureViewSlice):
@@ -1180,7 +1220,15 @@ class FeatureStore:
1180
1220
  """
1181
1221
  layer += 1
1182
1222
 
1183
- return self._session.sql(query), join_keys
1223
+ # TODO: construct result dataframe with datframe APIs once ASOF join is supported natively.
1224
+ # Below code manually construct result dataframe from private members of spine dataframe, which
1225
+ # likely will cause unintentional issues. This setp is needed because spine_df might contains
1226
+ # prerequisite queries and post actions that must be carried over to result dataframe.
1227
+ result_df = self._session.sql(query)
1228
+ result_df._plan.queries = spine_df._plan.queries[:-1] + result_df._plan.queries
1229
+ result_df._plan.post_actions = spine_df._plan.post_actions
1230
+
1231
+ return result_df, join_keys
1184
1232
 
1185
1233
  def _check_database_exists_or_throw(self) -> None:
1186
1234
  resolved_db_name = self._config.database.resolved()
@@ -1517,6 +1565,9 @@ class FeatureStore:
1517
1565
  original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
1518
1566
  )
1519
1567
 
1568
+ fv_name = FeatureView._get_physical_name(name, version)
1569
+ infer_schema_df = self._session.sql(f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}")
1570
+
1520
1571
  if m.group("obj_type") == "DYNAMIC TABLE":
1521
1572
  query = m.group("query")
1522
1573
  df = self._session.sql(query)
@@ -1524,7 +1575,7 @@ class FeatureStore:
1524
1575
  fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1525
1576
  entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1526
1577
  ts_col = fv_metadata.timestamp_col
1527
- timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1578
+ timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
1528
1579
 
1529
1580
  fv = FeatureView._construct_feature_view(
1530
1581
  name=name,
@@ -1534,9 +1585,7 @@ class FeatureStore:
1534
1585
  desc=desc,
1535
1586
  version=version,
1536
1587
  status=FeatureViewStatus(row["scheduling_state"]),
1537
- feature_descs=self._fetch_column_descs(
1538
- "DYNAMIC TABLE", SqlIdentifier(row["name"], case_sensitive=True)
1539
- ),
1588
+ feature_descs=self._fetch_column_descs("DYNAMIC TABLE", fv_name),
1540
1589
  refresh_freq=row["target_lag"],
1541
1590
  database=self._config.database.identifier(),
1542
1591
  schema=self._config.schema.identifier(),
@@ -1544,6 +1593,7 @@ class FeatureStore:
1544
1593
  refresh_mode=row["refresh_mode"],
1545
1594
  refresh_mode_reason=row["refresh_mode_reason"],
1546
1595
  owner=row["owner"],
1596
+ infer_schema_df=infer_schema_df,
1547
1597
  )
1548
1598
  return fv
1549
1599
  else:
@@ -1553,7 +1603,7 @@ class FeatureStore:
1553
1603
  fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1554
1604
  entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1555
1605
  ts_col = fv_metadata.timestamp_col
1556
- timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1606
+ timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
1557
1607
 
1558
1608
  fv = FeatureView._construct_feature_view(
1559
1609
  name=name,
@@ -1563,7 +1613,7 @@ class FeatureStore:
1563
1613
  desc=desc,
1564
1614
  version=version,
1565
1615
  status=FeatureViewStatus.STATIC,
1566
- feature_descs=self._fetch_column_descs("VIEW", SqlIdentifier(row["name"], case_sensitive=True)),
1616
+ feature_descs=self._fetch_column_descs("VIEW", fv_name),
1567
1617
  refresh_freq=None,
1568
1618
  database=self._config.database.identifier(),
1569
1619
  schema=self._config.schema.identifier(),
@@ -1571,6 +1621,7 @@ class FeatureStore:
1571
1621
  refresh_mode=None,
1572
1622
  refresh_mode_reason=None,
1573
1623
  owner=row["owner"],
1624
+ infer_schema_df=infer_schema_df,
1574
1625
  )
1575
1626
  return fv
1576
1627
 
@@ -1710,7 +1761,7 @@ class FeatureStore:
1710
1761
  self._session.sql(
1711
1762
  f"""
1712
1763
  SELECT * FROM TABLE(
1713
- INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1764
+ {self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
1714
1765
  TAG_NAME => '{_FEATURE_STORE_OBJECT_TAG}'
1715
1766
  )
1716
1767
  ) LIMIT 1;
@@ -1720,6 +1771,15 @@ class FeatureStore:
1720
1771
  except Exception:
1721
1772
  return False
1722
1773
 
1774
+ def _is_dataset_enabled(self) -> bool:
1775
+ try:
1776
+ self._session.sql(f"SHOW DATASETS IN SCHEMA {self._config.full_schema_path}").collect()
1777
+ return True
1778
+ except SnowparkSQLException as e:
1779
+ if "'DATASETS' does not exist" in e.message:
1780
+ return False
1781
+ raise
1782
+
1723
1783
  def _check_feature_store_object_versions(self) -> None:
1724
1784
  versions = self._collapse_object_versions()
1725
1785
  if len(versions) > 0 and pkg_version.parse(snowml_version.VERSION) < versions[0]:
@@ -5,12 +5,8 @@ import re
5
5
  from collections import OrderedDict
6
6
  from dataclasses import asdict, dataclass
7
7
  from enum import Enum
8
- from typing import Dict, List, Optional
8
+ from typing import Any, Dict, List, Optional
9
9
 
10
- from snowflake.ml._internal.exceptions import (
11
- error_codes,
12
- exceptions as snowml_exceptions,
13
- )
14
10
  from snowflake.ml._internal.utils.identifier import concat_names
15
11
  from snowflake.ml._internal.utils.sql_identifier import (
16
12
  SqlIdentifier,
@@ -27,12 +23,18 @@ from snowflake.snowpark.types import (
27
23
  )
28
24
 
29
25
  _FEATURE_VIEW_NAME_DELIMITER = "$"
30
- _TIMESTAMP_COL_PLACEHOLDER = "FS_TIMESTAMP_COL_PLACEHOLDER_VAL"
26
+ _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS = ["FS_TIMESTAMP_COL_PLACEHOLDER_VAL", "NULL"]
27
+ _TIMESTAMP_COL_PLACEHOLDER = "NULL"
31
28
  _FEATURE_OBJ_TYPE = "FEATURE_OBJ_TYPE"
32
29
  # Feature view version rule is aligned with dataset version rule in SQL.
33
30
  _FEATURE_VIEW_VERSION_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_.\-]*$")
34
31
  _FEATURE_VIEW_VERSION_MAX_LENGTH = 128
35
32
 
33
+ _RESULT_SCAN_QUERY_PATTERN = re.compile(
34
+ r".*FROM\s*TABLE\s*\(\s*RESULT_SCAN\s*\(.*",
35
+ flags=re.DOTALL | re.IGNORECASE | re.X,
36
+ )
37
+
36
38
 
37
39
  @dataclass(frozen=True)
38
40
  class _FeatureViewMetadata:
@@ -53,13 +55,10 @@ class _FeatureViewMetadata:
53
55
  class FeatureViewVersion(str):
54
56
  def __new__(cls, version: str) -> FeatureViewVersion:
55
57
  if not _FEATURE_VIEW_VERSION_RE.match(version) or len(version) > _FEATURE_VIEW_VERSION_MAX_LENGTH:
56
- raise snowml_exceptions.SnowflakeMLException(
57
- error_code=error_codes.INVALID_ARGUMENT,
58
- original_exception=ValueError(
59
- f"`{version}` is not a valid feature view version. "
60
- "It must start with letter or digit, and followed by letter, digit, '_', '-' or '.'. "
61
- f"The length limit is {_FEATURE_VIEW_VERSION_MAX_LENGTH}."
62
- ),
58
+ raise ValueError(
59
+ f"`{version}` is not a valid feature view version. "
60
+ "It must start with letter or digit, and followed by letter, digit, '_', '-' or '.'. "
61
+ f"The length limit is {_FEATURE_VIEW_VERSION_MAX_LENGTH}."
63
62
  )
64
63
  return super().__new__(cls, version)
65
64
 
@@ -121,12 +120,13 @@ class FeatureView:
121
120
  timestamp_col: Optional[str] = None,
122
121
  refresh_freq: Optional[str] = None,
123
122
  desc: str = "",
123
+ **_kwargs: Any,
124
124
  ) -> None:
125
125
  """
126
126
  Create a FeatureView instance.
127
127
 
128
128
  Args:
129
- name: name of the FeatureView. NOTE: FeatureView name will be capitalized.
129
+ name: name of the FeatureView. NOTE: following Snowflake identifier rule
130
130
  entities: entities that the FeatureView is associated with.
131
131
  feature_df: Snowpark DataFrame containing data source and all feature feature_df logics.
132
132
  Final projection of the DataFrame should contain feature names, join keys and timestamp(if applicable).
@@ -140,6 +140,7 @@ class FeatureView:
140
140
  NOTE: If refresh_freq is not provided, then FeatureView will be registered as View on Snowflake backend
141
141
  and there won't be extra storage cost.
142
142
  desc: description of the FeatureView.
143
+ _kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
143
144
  """
144
145
 
145
146
  self._name: SqlIdentifier = SqlIdentifier(name)
@@ -149,6 +150,7 @@ class FeatureView:
149
150
  SqlIdentifier(timestamp_col) if timestamp_col is not None else None
150
151
  )
151
152
  self._desc: str = desc
153
+ self._infer_schema_df: DataFrame = _kwargs.get("_infer_schema_df", self._feature_df)
152
154
  self._query: str = self._get_query()
153
155
  self._version: Optional[FeatureViewVersion] = None
154
156
  self._status: FeatureViewStatus = FeatureViewStatus.DRAFT
@@ -295,7 +297,7 @@ class FeatureView:
295
297
 
296
298
  @property
297
299
  def output_schema(self) -> StructType:
298
- return self._feature_df.schema
300
+ return self._infer_schema_df.schema
299
301
 
300
302
  @property
301
303
  def refresh_mode(self) -> Optional[str]:
@@ -329,7 +331,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
329
331
  f"FeatureView name `{self._name}` contains invalid character `{_FEATURE_VIEW_NAME_DELIMITER}`."
330
332
  )
331
333
 
332
- unescaped_df_cols = to_sql_identifiers(self._feature_df.columns)
334
+ unescaped_df_cols = to_sql_identifiers(self._infer_schema_df.columns)
333
335
  for e in self._entities:
334
336
  for k in e.join_keys:
335
337
  if k not in unescaped_df_cols:
@@ -341,17 +343,20 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
341
343
  ts_col = self._timestamp_col
342
344
  if ts_col == SqlIdentifier(_TIMESTAMP_COL_PLACEHOLDER):
343
345
  raise ValueError(f"Invalid timestamp_col name, cannot be {_TIMESTAMP_COL_PLACEHOLDER}.")
344
- if ts_col not in to_sql_identifiers(self._feature_df.columns):
346
+ if ts_col not in to_sql_identifiers(self._infer_schema_df.columns):
345
347
  raise ValueError(f"timestamp_col {ts_col} is not found in input dataframe.")
346
348
 
347
- col_type = self._feature_df.schema[ts_col].datatype
349
+ col_type = self._infer_schema_df.schema[ts_col].datatype
348
350
  if not isinstance(col_type, (DateType, TimeType, TimestampType, _NumericType)):
349
351
  raise ValueError(f"Invalid data type for timestamp_col {ts_col}: {col_type}.")
350
352
 
353
+ if re.match(_RESULT_SCAN_QUERY_PATTERN, self._query) is not None:
354
+ raise ValueError(f"feature_df should not be reading from RESULT_SCAN. Invalid query: {self._query}")
355
+
351
356
  def _get_feature_names(self) -> List[SqlIdentifier]:
352
357
  join_keys = [k for e in self._entities for k in e.join_keys]
353
358
  ts_col = [self._timestamp_col] if self._timestamp_col is not None else []
354
- feature_names = to_sql_identifiers(self._feature_df.columns, case_sensitive=True)
359
+ feature_names = to_sql_identifiers(self._infer_schema_df.columns, case_sensitive=False)
355
360
  return [c for c in feature_names if c not in join_keys + ts_col]
356
361
 
357
362
  def __repr__(self) -> str:
@@ -384,6 +389,9 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
384
389
  fv_dict = self.__dict__.copy()
385
390
  if "_feature_df" in fv_dict:
386
391
  fv_dict.pop("_feature_df")
392
+ if "_infer_schema_df" in fv_dict:
393
+ infer_schema_df = fv_dict.pop("_infer_schema_df")
394
+ fv_dict["_infer_schema_query"] = infer_schema_df.queries["queries"][0]
387
395
  fv_dict["_entities"] = [e._to_dict() for e in self._entities]
388
396
  fv_dict["_status"] = str(self._status)
389
397
  fv_dict["_name"] = str(self._name) if self._name is not None else None
@@ -440,6 +448,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
440
448
  refresh_mode=json_dict["_refresh_mode"],
441
449
  refresh_mode_reason=json_dict["_refresh_mode_reason"],
442
450
  owner=json_dict["_owner"],
451
+ infer_schema_df=session.sql(json_dict.get("_infer_schema_query", None)),
443
452
  )
444
453
 
445
454
  @staticmethod
@@ -471,6 +480,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
471
480
  refresh_mode: Optional[str],
472
481
  refresh_mode_reason: Optional[str],
473
482
  owner: Optional[str],
483
+ infer_schema_df: Optional[DataFrame],
474
484
  ) -> FeatureView:
475
485
  fv = FeatureView(
476
486
  name=name,
@@ -478,6 +488,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
478
488
  feature_df=feature_df,
479
489
  timestamp_col=timestamp_col,
480
490
  desc=desc,
491
+ _infer_schema_df=infer_schema_df,
481
492
  )
482
493
  fv._version = FeatureViewVersion(version) if version is not None else None
483
494
  fv._status = status
@@ -78,22 +78,26 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
78
78
  match = _SNOWURL_PATH_RE.fullmatch(file)
79
79
  assert match is not None and match.group("filepath") is not None
80
80
  versions_dict[match.group("version")].append(match.group("filepath"))
81
- presigned_urls: List[Tuple[str, str]] = []
82
81
  try:
82
+ async_jobs: List[snowpark.AsyncJob] = []
83
83
  for version, version_files in versions_dict.items():
84
84
  for file in version_files:
85
85
  stage_loc = f"{self.stage_name}/versions/{version}"
86
- presigned_urls.extend(
87
- self._session.sql(
88
- f"select '{version}/{file}' as name,"
89
- f" get_presigned_url('{stage_loc}', '{file}', {url_lifetime}) as url"
90
- ).collect(
91
- statement_params=telemetry.get_function_usage_statement_params(
92
- project=stage_fs._PROJECT,
93
- api_calls=[snowpark.DataFrame.collect],
94
- ),
95
- )
86
+ query_result = self._session.sql(
87
+ f"select '{version}/{file}' as name,"
88
+ f" get_presigned_url('{stage_loc}', '{file}', {url_lifetime}) as url"
89
+ ).collect(
90
+ block=False,
91
+ statement_params=telemetry.get_function_usage_statement_params(
92
+ project=stage_fs._PROJECT,
93
+ api_calls=[snowpark.DataFrame.collect],
94
+ ),
96
95
  )
96
+ async_jobs.append(query_result)
97
+ presigned_urls: List[Tuple[str, str]] = [
98
+ (r["NAME"], r["URL"]) for job in async_jobs for r in stage_fs._resolve_async_job(job)
99
+ ]
100
+ return presigned_urls
97
101
  except snowpark_exceptions.SnowparkClientException as e:
98
102
  if e.message.startswith(fileset_errors.ERRNO_DOMAIN_NOT_EXIST) or e.message.startswith(
99
103
  fileset_errors.ERRNO_STAGE_NOT_EXIST
@@ -109,7 +113,6 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
109
113
  error_code=error_codes.INTERNAL_SNOWML_ERROR,
110
114
  original_exception=fileset_errors.FileSetError(str(e)),
111
115
  )
112
- return presigned_urls
113
116
 
114
117
  @classmethod
115
118
  def _parent(cls, path: str) -> str: