snowflake-ml-python 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. snowflake/cortex/__init__.py +4 -1
  2. snowflake/cortex/_classify_text.py +36 -0
  3. snowflake/cortex/_complete.py +281 -21
  4. snowflake/cortex/_extract_answer.py +0 -1
  5. snowflake/cortex/_sentiment.py +0 -1
  6. snowflake/cortex/_summarize.py +0 -1
  7. snowflake/cortex/_translate.py +0 -1
  8. snowflake/cortex/_util.py +12 -85
  9. snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
  10. snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
  11. snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
  12. snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
  13. snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
  14. snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
  15. snowflake/ml/_internal/lineage/lineage_utils.py +4 -4
  16. snowflake/ml/_internal/telemetry.py +38 -2
  17. snowflake/ml/_internal/utils/identifier.py +14 -0
  18. snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
  19. snowflake/ml/data/_internal/arrow_ingestor.py +228 -0
  20. snowflake/ml/data/_internal/ingestor_utils.py +58 -0
  21. snowflake/ml/data/data_connector.py +133 -0
  22. snowflake/ml/data/data_ingestor.py +28 -0
  23. snowflake/ml/data/data_source.py +23 -0
  24. snowflake/ml/dataset/dataset.py +39 -32
  25. snowflake/ml/dataset/dataset_reader.py +18 -118
  26. snowflake/ml/feature_store/access_manager.py +7 -1
  27. snowflake/ml/feature_store/entity.py +19 -2
  28. snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +20 -0
  29. snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +31 -0
  30. snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +24 -0
  31. snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +4 -0
  32. snowflake/ml/feature_store/examples/example_helper.py +240 -0
  33. snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +12 -0
  34. snowflake/ml/feature_store/examples/new_york_taxi_features/features/dropoff_features.py +39 -0
  35. snowflake/ml/feature_store/examples/new_york_taxi_features/features/pickup_features.py +58 -0
  36. snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +5 -0
  37. snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +36 -0
  38. snowflake/ml/feature_store/examples/source_data/fraud_transactions.yaml +29 -0
  39. snowflake/ml/feature_store/examples/source_data/nyc_yellow_trips.yaml +4 -0
  40. snowflake/ml/feature_store/examples/source_data/winequality_red.yaml +32 -0
  41. snowflake/ml/feature_store/examples/wine_quality_features/entities.py +14 -0
  42. snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +29 -0
  43. snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +21 -0
  44. snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +5 -0
  45. snowflake/ml/feature_store/feature_store.py +987 -264
  46. snowflake/ml/feature_store/feature_view.py +228 -13
  47. snowflake/ml/fileset/embedded_stage_fs.py +25 -21
  48. snowflake/ml/fileset/fileset.py +2 -2
  49. snowflake/ml/fileset/snowfs.py +4 -15
  50. snowflake/ml/fileset/stage_fs.py +24 -18
  51. snowflake/ml/lineage/__init__.py +3 -0
  52. snowflake/ml/lineage/lineage_node.py +139 -0
  53. snowflake/ml/model/_client/model/model_impl.py +47 -14
  54. snowflake/ml/model/_client/model/model_version_impl.py +82 -2
  55. snowflake/ml/model/_client/ops/model_ops.py +77 -5
  56. snowflake/ml/model/_client/sql/model.py +1 -0
  57. snowflake/ml/model/_client/sql/model_version.py +45 -2
  58. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +4 -6
  59. snowflake/ml/model/_model_composer/model_composer.py +15 -17
  60. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -17
  61. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -1
  62. snowflake/ml/model/_model_composer/model_method/function_generator.py +20 -4
  63. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +3 -32
  64. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +55 -0
  65. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +5 -34
  66. snowflake/ml/model/_model_composer/model_method/model_method.py +10 -7
  67. snowflake/ml/model/_packager/model_handlers/_base.py +13 -3
  68. snowflake/ml/model/_packager/model_handlers/_utils.py +59 -1
  69. snowflake/ml/model/_packager/model_handlers/catboost.py +44 -2
  70. snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
  71. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
  72. snowflake/ml/model/_packager/model_handlers/lightgbm.py +70 -2
  73. snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
  74. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
  75. snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
  76. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
  77. snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
  78. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
  79. snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
  80. snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
  81. snowflake/ml/model/_packager/model_handlers/xgboost.py +61 -2
  82. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  83. snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
  84. snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
  85. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  86. snowflake/ml/model/_packager/model_packager.py +9 -4
  87. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  88. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -5
  89. snowflake/ml/model/custom_model.py +22 -2
  90. snowflake/ml/model/model_signature.py +4 -4
  91. snowflake/ml/model/type_hints.py +77 -4
  92. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +3 -1
  93. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +13 -1
  94. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +1 -0
  95. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +6 -0
  96. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +1 -0
  97. snowflake/ml/modeling/cluster/affinity_propagation.py +4 -2
  98. snowflake/ml/modeling/cluster/agglomerative_clustering.py +4 -2
  99. snowflake/ml/modeling/cluster/birch.py +4 -2
  100. snowflake/ml/modeling/cluster/bisecting_k_means.py +4 -2
  101. snowflake/ml/modeling/cluster/dbscan.py +4 -2
  102. snowflake/ml/modeling/cluster/feature_agglomeration.py +4 -2
  103. snowflake/ml/modeling/cluster/k_means.py +4 -2
  104. snowflake/ml/modeling/cluster/mean_shift.py +4 -2
  105. snowflake/ml/modeling/cluster/mini_batch_k_means.py +4 -2
  106. snowflake/ml/modeling/cluster/optics.py +4 -2
  107. snowflake/ml/modeling/cluster/spectral_biclustering.py +4 -2
  108. snowflake/ml/modeling/cluster/spectral_clustering.py +4 -2
  109. snowflake/ml/modeling/cluster/spectral_coclustering.py +4 -2
  110. snowflake/ml/modeling/compose/column_transformer.py +4 -2
  111. snowflake/ml/modeling/covariance/elliptic_envelope.py +4 -2
  112. snowflake/ml/modeling/covariance/empirical_covariance.py +4 -2
  113. snowflake/ml/modeling/covariance/graphical_lasso.py +4 -2
  114. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +4 -2
  115. snowflake/ml/modeling/covariance/ledoit_wolf.py +4 -2
  116. snowflake/ml/modeling/covariance/min_cov_det.py +4 -2
  117. snowflake/ml/modeling/covariance/oas.py +4 -2
  118. snowflake/ml/modeling/covariance/shrunk_covariance.py +4 -2
  119. snowflake/ml/modeling/decomposition/dictionary_learning.py +4 -2
  120. snowflake/ml/modeling/decomposition/factor_analysis.py +4 -2
  121. snowflake/ml/modeling/decomposition/fast_ica.py +4 -2
  122. snowflake/ml/modeling/decomposition/incremental_pca.py +4 -2
  123. snowflake/ml/modeling/decomposition/kernel_pca.py +4 -2
  124. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +4 -2
  125. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +4 -2
  126. snowflake/ml/modeling/decomposition/pca.py +4 -2
  127. snowflake/ml/modeling/decomposition/sparse_pca.py +4 -2
  128. snowflake/ml/modeling/decomposition/truncated_svd.py +4 -2
  129. snowflake/ml/modeling/ensemble/isolation_forest.py +4 -2
  130. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +4 -2
  131. snowflake/ml/modeling/feature_selection/variance_threshold.py +4 -2
  132. snowflake/ml/modeling/impute/iterative_imputer.py +4 -2
  133. snowflake/ml/modeling/impute/knn_imputer.py +4 -2
  134. snowflake/ml/modeling/impute/missing_indicator.py +4 -2
  135. snowflake/ml/modeling/impute/simple_imputer.py +26 -0
  136. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +4 -2
  137. snowflake/ml/modeling/kernel_approximation/nystroem.py +4 -2
  138. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +4 -2
  139. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +4 -2
  140. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +4 -2
  141. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +4 -2
  142. snowflake/ml/modeling/manifold/isomap.py +4 -2
  143. snowflake/ml/modeling/manifold/mds.py +4 -2
  144. snowflake/ml/modeling/manifold/spectral_embedding.py +4 -2
  145. snowflake/ml/modeling/manifold/tsne.py +4 -2
  146. snowflake/ml/modeling/metrics/ranking.py +3 -0
  147. snowflake/ml/modeling/metrics/regression.py +3 -0
  148. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +4 -2
  149. snowflake/ml/modeling/mixture/gaussian_mixture.py +4 -2
  150. snowflake/ml/modeling/neighbors/kernel_density.py +4 -2
  151. snowflake/ml/modeling/neighbors/local_outlier_factor.py +4 -2
  152. snowflake/ml/modeling/neighbors/nearest_neighbors.py +4 -2
  153. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +4 -2
  154. snowflake/ml/modeling/pipeline/pipeline.py +5 -4
  155. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +43 -9
  156. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +36 -8
  157. snowflake/ml/modeling/preprocessing/polynomial_features.py +4 -2
  158. snowflake/ml/registry/_manager/model_manager.py +16 -3
  159. snowflake/ml/registry/registry.py +100 -13
  160. snowflake/ml/version.py +1 -1
  161. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/METADATA +81 -7
  162. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/RECORD +165 -139
  163. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/WHEEL +1 -1
  164. snowflake/ml/_internal/lineage/data_source.py +0 -10
  165. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/LICENSE.txt +0 -0
  166. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/top_level.txt +0 -0
@@ -2,17 +2,25 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import re
5
+ import warnings
5
6
  from collections import OrderedDict
6
7
  from dataclasses import asdict, dataclass
7
8
  from enum import Enum
8
9
  from typing import Any, Dict, List, Optional
9
10
 
11
+ from snowflake.ml._internal.exceptions import (
12
+ error_codes,
13
+ exceptions as snowml_exceptions,
14
+ )
15
+ from snowflake.ml._internal.utils import identifier
10
16
  from snowflake.ml._internal.utils.identifier import concat_names
11
17
  from snowflake.ml._internal.utils.sql_identifier import (
12
18
  SqlIdentifier,
13
19
  to_sql_identifiers,
14
20
  )
21
+ from snowflake.ml.feature_store import feature_store
15
22
  from snowflake.ml.feature_store.entity import Entity
23
+ from snowflake.ml.lineage import lineage_node
16
24
  from snowflake.snowpark import DataFrame, Session
17
25
  from snowflake.snowpark.types import (
18
26
  DateType,
@@ -67,6 +75,7 @@ class FeatureViewVersion(str):
67
75
 
68
76
 
69
77
  class FeatureViewStatus(Enum):
78
+ MASKED = "MASKED" # for shared feature views where scheduling state is not available
70
79
  DRAFT = "DRAFT"
71
80
  STATIC = "STATIC"
72
81
  RUNNING = "RUNNING" # This can be deprecated after BCR 2024_02 gets fully deployed
@@ -107,7 +116,7 @@ class FeatureViewSlice:
107
116
  return cls(**json_dict)
108
117
 
109
118
 
110
- class FeatureView:
119
+ class FeatureView(lineage_node.LineageNode):
111
120
  """
112
121
  A FeatureView instance encapsulates a logical group of features.
113
122
  """
@@ -117,9 +126,11 @@ class FeatureView:
117
126
  name: str,
118
127
  entities: List[Entity],
119
128
  feature_df: DataFrame,
129
+ *,
120
130
  timestamp_col: Optional[str] = None,
121
131
  refresh_freq: Optional[str] = None,
122
132
  desc: str = "",
133
+ warehouse: Optional[str] = None,
123
134
  **_kwargs: Any,
124
135
  ) -> None:
125
136
  """
@@ -140,7 +151,33 @@ class FeatureView:
140
151
  NOTE: If refresh_freq is not provided, then FeatureView will be registered as View on Snowflake backend
141
152
  and there won't be extra storage cost.
142
153
  desc: description of the FeatureView.
154
+ warehouse: warehouse to refresh feature view. Not needed for static feature view (refresh_freq is None).
155
+ For managed feature view, this warehouse will overwrite the default warehouse of Feature Store if it is
156
+ specified, otherwise the default warehouse will be used.
143
157
  _kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
158
+
159
+ Example::
160
+
161
+ >>> fs = FeatureStore(...)
162
+ >>> # draft_fv is a local object that hasn't materiaized to Snowflake backend yet.
163
+ >>> feature_df = session.sql("select f_1, f_2 from source_table")
164
+ >>> draft_fv = FeatureView(
165
+ ... name="my_fv",
166
+ ... entities=[e1, e2],
167
+ ... feature_df=feature_df,
168
+ ... timestamp_col='TS', # optional
169
+ ... refresh_freq='1d', # optional
170
+ ... desc='A line about this feature view', # optional
171
+ ... warehouse='WH' # optional, the warehouse used to refresh (managed) feature view
172
+ ... )
173
+ >>> print(draft_fv.status)
174
+ FeatureViewStatus.DRAFT
175
+ <BLANKLINE>
176
+ >>> # registered_fv is a local object that maps to a Snowflake backend object.
177
+ >>> registered_fv = fs.register_feature_view(draft_fv, "v1")
178
+ >>> print(registered_fv.status)
179
+ FeatureViewStatus.ACTIVE
180
+
144
181
  """
145
182
 
146
183
  self._name: SqlIdentifier = SqlIdentifier(name)
@@ -158,7 +195,7 @@ class FeatureView:
158
195
  self._refresh_freq: Optional[str] = refresh_freq
159
196
  self._database: Optional[SqlIdentifier] = None
160
197
  self._schema: Optional[SqlIdentifier] = None
161
- self._warehouse: Optional[SqlIdentifier] = None
198
+ self._warehouse: Optional[SqlIdentifier] = SqlIdentifier(warehouse) if warehouse is not None else None
162
199
  self._refresh_mode: Optional[str] = None
163
200
  self._refresh_mode_reason: Optional[str] = None
164
201
  self._owner: Optional[str] = None
@@ -176,6 +213,33 @@ class FeatureView:
176
213
 
177
214
  Raises:
178
215
  ValueError: if selected feature names is not found in the FeatureView.
216
+
217
+ Example::
218
+
219
+ >>> fs = FeatureStore(...)
220
+ >>> e = fs.get_entity('TRIP_ID')
221
+ >>> # feature_df contains 3 features and 1 entity
222
+ >>> feature_df = session.table(source_table).select(
223
+ ... 'TRIPDURATION',
224
+ ... 'START_STATION_LATITUDE',
225
+ ... 'END_STATION_LONGITUDE',
226
+ ... 'TRIP_ID'
227
+ ... )
228
+ >>> darft_fv = FeatureView(name='F_TRIP', entities=[e], feature_df=feature_df)
229
+ >>> fv = fs.register_feature_view(darft_fv, version='1.0')
230
+ >>> # shows all 3 features
231
+ >>> fv.feature_names
232
+ ['TRIPDURATION', 'START_STATION_LATITUDE', 'END_STATION_LONGITUDE']
233
+ <BLANKLINE>
234
+ >>> # slice a subset of features
235
+ >>> fv_slice = fv.slice(['TRIPDURATION', 'START_STATION_LATITUDE'])
236
+ >>> fv_slice.names
237
+ ['TRIPDURATION', 'START_STATION_LATITUDE']
238
+ <BLANKLINE>
239
+ >>> # query the full set of features in original feature view
240
+ >>> fv_slice.feature_view_ref.feature_names
241
+ ['TRIPDURATION', 'START_STATION_LATITUDE', 'END_STATION_LONGITUDE']
242
+
179
243
  """
180
244
 
181
245
  res = []
@@ -187,14 +251,30 @@ class FeatureView:
187
251
  return FeatureViewSlice(self, res)
188
252
 
189
253
  def fully_qualified_name(self) -> str:
190
- """Returns the fully qualified name (<database_name>.<schema_name>.<feature_view_name>) for the
191
- FeatureView in Snowflake.
254
+ """
255
+ Returns the fully qualified name (<database_name>.<schema_name>.<feature_view_name>) for the
256
+ FeatureView in Snowflake.
192
257
 
193
258
  Returns:
194
259
  fully qualified name string.
195
260
 
196
261
  Raises:
197
262
  RuntimeError: if the FeatureView is not registered.
263
+
264
+ Example::
265
+
266
+ >>> fs = FeatureStore(...)
267
+ >>> e = fs.get_entity('TRIP_ID')
268
+ >>> feature_df = session.table(source_table).select(
269
+ ... 'TRIPDURATION',
270
+ ... 'START_STATION_LATITUDE',
271
+ ... 'TRIP_ID'
272
+ ... )
273
+ >>> darft_fv = FeatureView(name='F_TRIP', entities=[e], feature_df=feature_df)
274
+ >>> registered_fv = fs.register_feature_view(darft_fv, version='1.0')
275
+ >>> registered_fv.fully_qualified_name()
276
+ 'MY_DB.MY_SCHEMA."F_TRIP$1.0"'
277
+
198
278
  """
199
279
  if self.status == FeatureViewStatus.DRAFT or self.version is None:
200
280
  raise RuntimeError(f"FeatureView {self.name} has not been registered.")
@@ -212,6 +292,22 @@ class FeatureView:
212
292
 
213
293
  Raises:
214
294
  ValueError: if feature name is not found in the FeatureView.
295
+
296
+ Example::
297
+
298
+ >>> fs = FeatureStore(...)
299
+ >>> e = fs.get_entity('TRIP_ID')
300
+ >>> feature_df = session.table(source_table).select('TRIPDURATION', 'START_STATION_LATITUDE', 'TRIP_ID')
301
+ >>> draft_fv = FeatureView(name='F_TRIP', entities=[e], feature_df=feature_df)
302
+ >>> draft_fv = draft_fv.attach_feature_desc({
303
+ ... "TRIPDURATION": "Duration of a trip.",
304
+ ... "START_STATION_LATITUDE": "Latitude of the start station."
305
+ ... })
306
+ >>> registered_fv = fs.register_feature_view(draft_fv, version='1.0')
307
+ >>> registered_fv.feature_descs
308
+ OrderedDict([('TRIPDURATION', 'Duration of a trip.'),
309
+ ('START_STATION_LATITUDE', 'Latitude of the start station.')])
310
+
215
311
  """
216
312
  for f, d in descs.items():
217
313
  f = SqlIdentifier(f)
@@ -243,6 +339,41 @@ class FeatureView:
243
339
  def desc(self) -> str:
244
340
  return self._desc
245
341
 
342
+ @desc.setter
343
+ def desc(self, new_value: str) -> None:
344
+ """Set the description of feature view.
345
+
346
+ Args:
347
+ new_value: new value of description.
348
+
349
+ Example::
350
+
351
+ >>> fs = FeatureStore(...)
352
+ >>> e = fs.get_entity('TRIP_ID')
353
+ >>> darft_fv = FeatureView(
354
+ ... name='F_TRIP',
355
+ ... entities=[e],
356
+ ... feature_df=feature_df,
357
+ ... desc='old desc'
358
+ ... )
359
+ >>> fv_1 = fs.register_feature_view(darft_fv, version='1.0')
360
+ >>> print(fv_1.desc)
361
+ old desc
362
+ <BLANKLINE>
363
+ >>> darft_fv.desc = 'NEW DESC'
364
+ >>> fv_2 = fs.register_feature_view(darft_fv, version='2.0')
365
+ >>> print(fv_2.desc)
366
+ NEW DESC
367
+
368
+ """
369
+ warnings.warn(
370
+ "You must call register_feature_view() to make it effective. "
371
+ "Or use update_feature_view(desc=<new_value>).",
372
+ stacklevel=2,
373
+ category=UserWarning,
374
+ )
375
+ self._desc = new_value
376
+
246
377
  @property
247
378
  def query(self) -> str:
248
379
  return self._query
@@ -269,10 +400,37 @@ class FeatureView:
269
400
 
270
401
  @refresh_freq.setter
271
402
  def refresh_freq(self, new_value: str) -> None:
272
- if self.status == FeatureViewStatus.DRAFT or self.status == FeatureViewStatus.STATIC:
273
- raise RuntimeError(
274
- f"Feature view {self.name}/{self.version} must be registered and non-static to update refresh_freq."
275
- )
403
+ """Set refresh frequency of feature view.
404
+
405
+ Args:
406
+ new_value: The new value of refresh frequency.
407
+
408
+ Example::
409
+
410
+ >>> fs = FeatureStore(...)
411
+ >>> e = fs.get_entity('TRIP_ID')
412
+ >>> darft_fv = FeatureView(
413
+ ... name='F_TRIP',
414
+ ... entities=[e],
415
+ ... feature_df=feature_df,
416
+ ... refresh_freq='1d'
417
+ ... )
418
+ >>> fv_1 = fs.register_feature_view(darft_fv, version='1.0')
419
+ >>> print(fv_1.refresh_freq)
420
+ 1 day
421
+ <BLANKLINE>
422
+ >>> darft_fv.refresh_freq = '12h'
423
+ >>> fv_2 = fs.register_feature_view(darft_fv, version='2.0')
424
+ >>> print(fv_2.refresh_freq)
425
+ 12 hours
426
+
427
+ """
428
+ warnings.warn(
429
+ "You must call register_feature_view() to make it effective. "
430
+ "Or use update_feature_view(refresh_freq=<new_value>).",
431
+ stacklevel=2,
432
+ category=UserWarning,
433
+ )
276
434
  self._refresh_freq = new_value
277
435
 
278
436
  @property
@@ -289,10 +447,38 @@ class FeatureView:
289
447
 
290
448
  @warehouse.setter
291
449
  def warehouse(self, new_value: str) -> None:
292
- if self.status == FeatureViewStatus.DRAFT or self.status == FeatureViewStatus.STATIC:
293
- raise RuntimeError(
294
- f"Feature view {self.name}/{self.version} must be registered and non-static to update warehouse."
295
- )
450
+ """Set warehouse of feature view.
451
+
452
+ Args:
453
+ new_value: The new value of warehouse.
454
+
455
+ Example::
456
+
457
+ >>> fs = FeatureStore(...)
458
+ >>> e = fs.get_entity('TRIP_ID')
459
+ >>> darft_fv = FeatureView(
460
+ ... name='F_TRIP',
461
+ ... entities=[e],
462
+ ... feature_df=feature_df,
463
+ ... refresh_freq='1d',
464
+ ... warehouse='WH1',
465
+ ... )
466
+ >>> fv_1 = fs.register_feature_view(darft_fv, version='1.0')
467
+ >>> print(fv_1.warehouse)
468
+ WH1
469
+ <BLANKLINE>
470
+ >>> darft_fv.warehouse = 'WH2'
471
+ >>> fv_2 = fs.register_feature_view(darft_fv, version='2.0')
472
+ >>> print(fv_2.warehouse)
473
+ WH2
474
+
475
+ """
476
+ warnings.warn(
477
+ "You must call register_feature_view() to make it effective. "
478
+ "Or use update_feature_view(warehouse=<new_value>).",
479
+ stacklevel=2,
480
+ category=UserWarning,
481
+ )
296
482
  self._warehouse = SqlIdentifier(new_value)
297
483
 
298
484
  @property
@@ -406,6 +592,11 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
406
592
  feature_desc_dict[k.identifier()] = v
407
593
  fv_dict["_feature_desc"] = feature_desc_dict
408
594
 
595
+ lineage_node_keys = [key for key in fv_dict if key.startswith("_node") or key == "_session"]
596
+
597
+ for key in lineage_node_keys:
598
+ fv_dict.pop(key)
599
+
409
600
  return fv_dict
410
601
 
411
602
  def to_df(self, session: Session) -> DataFrame:
@@ -428,7 +619,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
428
619
 
429
620
  entities = []
430
621
  for e_json in json_dict["_entities"]:
431
- e = Entity(e_json["name"], e_json["join_keys"], e_json["desc"])
622
+ e = Entity(e_json["name"], e_json["join_keys"], desc=e_json["desc"])
432
623
  e.owner = e_json["owner"]
433
624
  entities.append(e)
434
625
 
@@ -449,6 +640,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
449
640
  refresh_mode_reason=json_dict["_refresh_mode_reason"],
450
641
  owner=json_dict["_owner"],
451
642
  infer_schema_df=session.sql(json_dict.get("_infer_schema_query", None)),
643
+ session=session,
452
644
  )
453
645
 
454
646
  @staticmethod
@@ -463,6 +655,21 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
463
655
  )
464
656
  )
465
657
 
658
+ @staticmethod
659
+ def _load_from_lineage_node(session: Session, name: str, version: str) -> FeatureView:
660
+ db_name, feature_store_name, feature_view_name, _ = identifier.parse_schema_level_object_identifier(name)
661
+
662
+ session_warehouse = session.get_current_warehouse()
663
+
664
+ if not session_warehouse:
665
+ raise snowml_exceptions.SnowflakeMLException(
666
+ error_code=error_codes.NOT_FOUND,
667
+ original_exception=ValueError("No active warehouse selected in the current session"),
668
+ )
669
+
670
+ fs = feature_store.FeatureStore(session, db_name, feature_store_name, default_warehouse=session_warehouse)
671
+ return fs.get_feature_view(feature_view_name, version) # type: ignore[no-any-return]
672
+
466
673
  @staticmethod
467
674
  def _construct_feature_view(
468
675
  name: str,
@@ -481,6 +688,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
481
688
  refresh_mode_reason: Optional[str],
482
689
  owner: Optional[str],
483
690
  infer_schema_df: Optional[DataFrame],
691
+ session: Session,
484
692
  ) -> FeatureView:
485
693
  fv = FeatureView(
486
694
  name=name,
@@ -500,4 +708,11 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
500
708
  fv._refresh_mode_reason = refresh_mode_reason
501
709
  fv._owner = owner
502
710
  fv.attach_feature_desc(feature_descs)
711
+
712
+ lineage_node.LineageNode.__init__(
713
+ fv, session=session, name=f"{fv.database}.{fv._schema}.{name}", domain="feature_view", version=version
714
+ )
503
715
  return fv
716
+
717
+
718
+ lineage_node.DOMAIN_LINEAGE_REGISTRY["feature_view"] = FeatureView
@@ -11,11 +11,17 @@ from snowflake.ml._internal.exceptions import (
11
11
  fileset_errors,
12
12
  )
13
13
  from snowflake.ml._internal.utils import identifier
14
+ from snowflake.ml.fileset import stage_fs
14
15
  from snowflake.snowpark import exceptions as snowpark_exceptions
15
16
 
16
- from . import stage_fs
17
-
18
- _SNOWURL_PATH_RE = re.compile(r"versions/(?P<version>[^/]+)(?:/+(?P<filepath>.*))?")
17
+ PROTOCOL_NAME = "snow"
18
+ _SNOWURL_ENTITY_PATTERN = (
19
+ f"(?:{PROTOCOL_NAME}://)?"
20
+ r"(?<!@)(?P<domain>\w+)/"
21
+ rf"(?P<name>(?:{identifier._SF_IDENTIFIER}\.){{,2}}{identifier._SF_IDENTIFIER})/"
22
+ )
23
+ _SNOWURL_VERSION_PATTERN = r"(?P<path>versions/(?:(?P<version>[^/]+)(?:/+(?P<relpath>.*))?)?)"
24
+ _SNOWURL_PATH_RE = re.compile(f"(?:{_SNOWURL_ENTITY_PATTERN})?" + _SNOWURL_VERSION_PATTERN)
19
25
 
20
26
 
21
27
  class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
@@ -76,8 +82,8 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
76
82
  versions_dict = defaultdict(list)
77
83
  for file in files:
78
84
  match = _SNOWURL_PATH_RE.fullmatch(file)
79
- assert match is not None and match.group("filepath") is not None
80
- versions_dict[match.group("version")].append(match.group("filepath"))
85
+ assert match is not None and match.group("relpath") is not None
86
+ versions_dict[match.group("version")].append(match.group("relpath"))
81
87
  try:
82
88
  async_jobs: List[snowpark.AsyncJob] = []
83
89
  for version, version_files in versions_dict.items():
@@ -98,10 +104,8 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
98
104
  (r["NAME"], r["URL"]) for job in async_jobs for r in stage_fs._resolve_async_job(job)
99
105
  ]
100
106
  return presigned_urls
101
- except snowpark_exceptions.SnowparkClientException as e:
102
- if e.message.startswith(fileset_errors.ERRNO_DOMAIN_NOT_EXIST) or e.message.startswith(
103
- fileset_errors.ERRNO_STAGE_NOT_EXIST
104
- ):
107
+ except snowpark_exceptions.SnowparkSQLException as e:
108
+ if e.sql_error_code in {fileset_errors.ERRNO_DOMAIN_NOT_EXIST, fileset_errors.ERRNO_STAGE_NOT_EXIST}:
105
109
  raise snowml_exceptions.SnowflakeMLException(
106
110
  error_code=error_codes.SNOWML_NOT_FOUND,
107
111
  original_exception=fileset_errors.StageNotFoundError(
@@ -118,7 +122,7 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
118
122
  def _parent(cls, path: str) -> str:
119
123
  """Get parent of specified path up to minimally valid root path.
120
124
 
121
- For SnowURL, the minimum valid path is snow://<domain>/<entity>/versions/<version>
125
+ For SnowURL, the minimum valid relative path is versions/<version>
122
126
 
123
127
  Args:
124
128
  path: File or directory path
@@ -128,22 +132,22 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
128
132
 
129
133
  Examples:
130
134
  ----
131
- >>> fs._parent("snow://dataset/my_ds/versions/my_version/file.ext")
132
- "snow://dataset/my_ds/versions/my_version/"
133
- >>> fs._parent("snow://dataset/my_ds/versions/my_version/subdir/file.ext")
134
- "snow://dataset/my_ds/versions/my_version/subdir/"
135
- >>> fs._parent("snow://dataset/my_ds/versions/my_version/")
136
- "snow://dataset/my_ds/versions/my_version/"
137
- >>> fs._parent("snow://dataset/my_ds/versions/my_version")
138
- "snow://dataset/my_ds/versions/my_version"
135
+ >>> fs._parent("versions/my_version/file.ext")
136
+ "versions/my_version"
137
+ >>> fs._parent("versions/my_version/subdir/file.ext")
138
+ "versions/my_version/subdir"
139
+ >>> fs._parent("versions/my_version/")
140
+ "versions/my_version"
141
+ >>> fs._parent("versions/my_version")
142
+ "versions/my_version"
139
143
  """
140
144
  path_match = _SNOWURL_PATH_RE.fullmatch(path)
141
145
  if not path_match:
142
146
  return super()._parent(path) # type: ignore[no-any-return]
143
- filepath: str = path_match.group("filepath") or ""
144
- root: str = path[: path_match.start("filepath")] if filepath else path
147
+ filepath: str = path_match.group("relpath") or ""
148
+ root: str = path[: path_match.start("relpath")] if filepath else path
145
149
  if "/" in filepath:
146
150
  parent = filepath.rsplit("/", 1)[0]
147
151
  return root + parent
148
152
  else:
149
- return root
153
+ return root.rstrip("/")
@@ -256,9 +256,9 @@ class FileSet:
256
256
  api_calls=[snowpark.DataFrameWriter.copy_into_location],
257
257
  ),
258
258
  )
259
- except snowpark_exceptions.SnowparkClientException as e:
259
+ except snowpark_exceptions.SnowparkSQLException as e:
260
260
  # Snowpark wraps the Python Connector error code in the head of the error message.
261
- if e.message.startswith(fileset_errors.ERRNO_FILE_EXIST_IN_STAGE):
261
+ if e.sql_error_code == fileset_errors.ERRNO_FILE_EXIST_IN_STAGE:
262
262
  raise fileset_errors.FileSetExistError(fileset_error_messages.FILESET_ALREADY_EXISTS.format(name))
263
263
  else:
264
264
  raise fileset_errors.FileSetError(str(e))
@@ -14,18 +14,10 @@ from snowflake.ml._internal.exceptions import (
14
14
  from snowflake.ml._internal.utils import identifier
15
15
  from snowflake.ml.fileset import embedded_stage_fs, sfcfs
16
16
 
17
- PROTOCOL_NAME = "snow"
18
-
19
17
  _SFFileEntityPath = collections.namedtuple(
20
18
  "_SFFileEntityPath", ["domain", "name", "filepath", "version", "relative_path"]
21
19
  )
22
- _PROJECT = "FileSet"
23
- _SNOWURL_PATTERN = re.compile(
24
- f"({PROTOCOL_NAME}://)?"
25
- r"(?<!@)(?P<domain>\w+)/"
26
- rf"(?P<name>(?:{identifier._SF_IDENTIFIER}\.){{,2}}{identifier._SF_IDENTIFIER})/"
27
- r"(?P<path>versions/(?:(?P<version>[^/]+)(?:/(?P<relpath>.*))?)?)"
28
- )
20
+ _SNOWURL_PATTERN = re.compile(embedded_stage_fs._SNOWURL_ENTITY_PATTERN + embedded_stage_fs._SNOWURL_VERSION_PATTERN)
29
21
 
30
22
 
31
23
  class SnowFileSystem(sfcfs.SFFileSystem):
@@ -38,7 +30,7 @@ class SnowFileSystem(sfcfs.SFFileSystem):
38
30
  See `sfcfs.SFFileSystem` documentation for example usage patterns.
39
31
  """
40
32
 
41
- protocol = PROTOCOL_NAME
33
+ protocol = embedded_stage_fs.PROTOCOL_NAME
42
34
  _IS_BUGGED_VERSION = None
43
35
 
44
36
  def __init__(
@@ -75,10 +67,7 @@ class SnowFileSystem(sfcfs.SFFileSystem):
75
67
  """Convert the relative path in a stage to an absolute path starts with the location of the stage."""
76
68
  # Strip protocol from absolute path, since backend needs snow:// prefix to resolve correctly
77
69
  # but fsspec logic strips protocol when doing any searching and globbing
78
- stage_name = stage_fs.stage_name
79
- protocol = f"{PROTOCOL_NAME}://"
80
- if stage_name.startswith(protocol):
81
- stage_name = stage_name[len(protocol) :]
70
+ stage_name: str = self._strip_protocol(stage_fs.stage_name)
82
71
  abs_path = stage_name + "/" + path
83
72
  return abs_path
84
73
 
@@ -128,4 +117,4 @@ class SnowFileSystem(sfcfs.SFFileSystem):
128
117
  )
129
118
 
130
119
 
131
- fsspec.register_implementation(PROTOCOL_NAME, SnowFileSystem)
120
+ fsspec.register_implementation(SnowFileSystem.protocol, SnowFileSystem)
@@ -170,8 +170,8 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
170
170
  path = path.lstrip("/")
171
171
  async_job: snowpark.AsyncJob = self._session.sql(f"LIST '{loc}/{path}'").collect(block=False)
172
172
  objects: List[snowpark.Row] = _resolve_async_job(async_job)
173
- except snowpark_exceptions.SnowparkClientException as e:
174
- if e.message.startswith(fileset_errors.ERRNO_DOMAIN_NOT_EXIST):
173
+ except snowpark_exceptions.SnowparkSQLException as e:
174
+ if e.sql_error_code == fileset_errors.ERRNO_DOMAIN_NOT_EXIST:
175
175
  raise snowml_exceptions.SnowflakeMLException(
176
176
  error_code=error_codes.SNOWML_NOT_FOUND,
177
177
  original_exception=fileset_errors.StageNotFoundError(
@@ -234,21 +234,29 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
234
234
 
235
235
  Raises:
236
236
  SnowflakeMLException: An error occurred when the given path points to a file that cannot be found.
237
+ snowpark_exceptions.SnowparkClientException: File access failed with a Snowpark exception
237
238
  """
238
239
  path = path.lstrip("/")
239
240
  if self._USE_FALLBACK_FILE_ACCESS:
240
241
  return self._open_with_snowpark(path)
241
242
  cached_presigned_url = self._url_cache.get(path, None)
242
- if not cached_presigned_url:
243
- res = self._fetch_presigned_urls([path])
244
- url = res[0][1]
245
- expire_at = time.time() + _PRESIGNED_URL_LIFETIME_SEC
246
- cached_presigned_url = _PresignedUrl(url, expire_at)
247
- self._url_cache[path] = cached_presigned_url
248
- logging.debug(f"Retrieved presigned url for {path}.")
249
- elif cached_presigned_url.is_expiring():
250
- self.optimize_read()
251
- cached_presigned_url = self._url_cache[path]
243
+ try:
244
+ if not cached_presigned_url:
245
+ res = self._fetch_presigned_urls([path])
246
+ url = res[0][1]
247
+ expire_at = time.time() + _PRESIGNED_URL_LIFETIME_SEC
248
+ cached_presigned_url = _PresignedUrl(url, expire_at)
249
+ self._url_cache[path] = cached_presigned_url
250
+ logging.debug(f"Retrieved presigned url for {path}.")
251
+ elif cached_presigned_url.is_expiring():
252
+ self.optimize_read()
253
+ cached_presigned_url = self._url_cache[path]
254
+ except snowpark_exceptions.SnowparkClientException as e:
255
+ if self._USE_FALLBACK_FILE_ACCESS == False: # noqa: E712 # Fallback disabled
256
+ raise
257
+ # This may be an intermittent failure, so don't set _USE_FALLBACK_FILE_ACCESS = True
258
+ logging.warning(f"Pre-signed URL generation failed with {e.message}, trying fallback file access")
259
+ return self._open_with_snowpark(path)
252
260
  url = cached_presigned_url.url
253
261
  try:
254
262
  return self._fs._open(url, mode=mode, **kwargs)
@@ -387,10 +395,8 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
387
395
  api_calls=[snowpark.DataFrame.collect],
388
396
  ),
389
397
  )
390
- except snowpark_exceptions.SnowparkClientException as e:
391
- if e.message.startswith(fileset_errors.ERRNO_DOMAIN_NOT_EXIST) or e.message.startswith(
392
- fileset_errors.ERRNO_STAGE_NOT_EXIST
393
- ):
398
+ except snowpark_exceptions.SnowparkSQLException as e:
399
+ if e.sql_error_code in {fileset_errors.ERRNO_DOMAIN_NOT_EXIST, fileset_errors.ERRNO_STAGE_NOT_EXIST}:
394
400
  raise snowml_exceptions.SnowflakeMLException(
395
401
  error_code=error_codes.SNOWML_NOT_FOUND,
396
402
  original_exception=fileset_errors.StageNotFoundError(
@@ -406,9 +412,9 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
406
412
 
407
413
 
408
414
  def _match_error_code(ex: snowpark_exceptions.SnowparkSQLException, error_code: int) -> bool:
409
- # Snowpark writes error code to message instead of populating e.error_code
415
+ # Snowpark writes error code to message instead of populating e.sql_error_code
410
416
  error_code_str = str(error_code)
411
- return ex.error_code == error_code_str or error_code_str in ex.message
417
+ return ex.sql_error_code == error_code_str or error_code_str in ex.message
412
418
 
413
419
 
414
420
  @snowflake_plan.SnowflakePlan.Decorator.wrap_exception # type: ignore[misc]
@@ -0,0 +1,3 @@
1
+ from .lineage_node import LineageNode
2
+
3
+ __all__ = ["LineageNode"]