snowflake-ml-python 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +4 -1
- snowflake/cortex/_classify_text.py +36 -0
- snowflake/cortex/_complete.py +281 -21
- snowflake/cortex/_extract_answer.py +0 -1
- snowflake/cortex/_sentiment.py +0 -1
- snowflake/cortex/_summarize.py +0 -1
- snowflake/cortex/_translate.py +0 -1
- snowflake/cortex/_util.py +12 -85
- snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
- snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
- snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
- snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
- snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
- snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
- snowflake/ml/_internal/lineage/lineage_utils.py +4 -4
- snowflake/ml/_internal/telemetry.py +38 -2
- snowflake/ml/_internal/utils/identifier.py +14 -0
- snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
- snowflake/ml/data/_internal/arrow_ingestor.py +228 -0
- snowflake/ml/data/_internal/ingestor_utils.py +58 -0
- snowflake/ml/data/data_connector.py +133 -0
- snowflake/ml/data/data_ingestor.py +28 -0
- snowflake/ml/data/data_source.py +23 -0
- snowflake/ml/dataset/dataset.py +39 -32
- snowflake/ml/dataset/dataset_reader.py +18 -118
- snowflake/ml/feature_store/access_manager.py +7 -1
- snowflake/ml/feature_store/entity.py +19 -2
- snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +20 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +31 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +24 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +4 -0
- snowflake/ml/feature_store/examples/example_helper.py +240 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +12 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/dropoff_features.py +39 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/pickup_features.py +58 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +5 -0
- snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +36 -0
- snowflake/ml/feature_store/examples/source_data/fraud_transactions.yaml +29 -0
- snowflake/ml/feature_store/examples/source_data/nyc_yellow_trips.yaml +4 -0
- snowflake/ml/feature_store/examples/source_data/winequality_red.yaml +32 -0
- snowflake/ml/feature_store/examples/wine_quality_features/entities.py +14 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +29 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +21 -0
- snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +5 -0
- snowflake/ml/feature_store/feature_store.py +987 -264
- snowflake/ml/feature_store/feature_view.py +228 -13
- snowflake/ml/fileset/embedded_stage_fs.py +25 -21
- snowflake/ml/fileset/fileset.py +2 -2
- snowflake/ml/fileset/snowfs.py +4 -15
- snowflake/ml/fileset/stage_fs.py +24 -18
- snowflake/ml/lineage/__init__.py +3 -0
- snowflake/ml/lineage/lineage_node.py +139 -0
- snowflake/ml/model/_client/model/model_impl.py +47 -14
- snowflake/ml/model/_client/model/model_version_impl.py +82 -2
- snowflake/ml/model/_client/ops/model_ops.py +77 -5
- snowflake/ml/model/_client/sql/model.py +1 -0
- snowflake/ml/model/_client/sql/model_version.py +45 -2
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +4 -6
- snowflake/ml/model/_model_composer/model_composer.py +15 -17
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -17
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -1
- snowflake/ml/model/_model_composer/model_method/function_generator.py +20 -4
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +3 -32
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +55 -0
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +5 -34
- snowflake/ml/model/_model_composer/model_method/model_method.py +10 -7
- snowflake/ml/model/_packager/model_handlers/_base.py +13 -3
- snowflake/ml/model/_packager/model_handlers/_utils.py +59 -1
- snowflake/ml/model/_packager/model_handlers/catboost.py +44 -2
- snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +70 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
- snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
- snowflake/ml/model/_packager/model_handlers/xgboost.py +61 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_packager.py +9 -4
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -5
- snowflake/ml/model/custom_model.py +22 -2
- snowflake/ml/model/model_signature.py +4 -4
- snowflake/ml/model/type_hints.py +77 -4
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +3 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +13 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +1 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +6 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +1 -0
- snowflake/ml/modeling/cluster/affinity_propagation.py +4 -2
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +4 -2
- snowflake/ml/modeling/cluster/birch.py +4 -2
- snowflake/ml/modeling/cluster/bisecting_k_means.py +4 -2
- snowflake/ml/modeling/cluster/dbscan.py +4 -2
- snowflake/ml/modeling/cluster/feature_agglomeration.py +4 -2
- snowflake/ml/modeling/cluster/k_means.py +4 -2
- snowflake/ml/modeling/cluster/mean_shift.py +4 -2
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +4 -2
- snowflake/ml/modeling/cluster/optics.py +4 -2
- snowflake/ml/modeling/cluster/spectral_biclustering.py +4 -2
- snowflake/ml/modeling/cluster/spectral_clustering.py +4 -2
- snowflake/ml/modeling/cluster/spectral_coclustering.py +4 -2
- snowflake/ml/modeling/compose/column_transformer.py +4 -2
- snowflake/ml/modeling/covariance/elliptic_envelope.py +4 -2
- snowflake/ml/modeling/covariance/empirical_covariance.py +4 -2
- snowflake/ml/modeling/covariance/graphical_lasso.py +4 -2
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +4 -2
- snowflake/ml/modeling/covariance/ledoit_wolf.py +4 -2
- snowflake/ml/modeling/covariance/min_cov_det.py +4 -2
- snowflake/ml/modeling/covariance/oas.py +4 -2
- snowflake/ml/modeling/covariance/shrunk_covariance.py +4 -2
- snowflake/ml/modeling/decomposition/dictionary_learning.py +4 -2
- snowflake/ml/modeling/decomposition/factor_analysis.py +4 -2
- snowflake/ml/modeling/decomposition/fast_ica.py +4 -2
- snowflake/ml/modeling/decomposition/incremental_pca.py +4 -2
- snowflake/ml/modeling/decomposition/kernel_pca.py +4 -2
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +4 -2
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +4 -2
- snowflake/ml/modeling/decomposition/pca.py +4 -2
- snowflake/ml/modeling/decomposition/sparse_pca.py +4 -2
- snowflake/ml/modeling/decomposition/truncated_svd.py +4 -2
- snowflake/ml/modeling/ensemble/isolation_forest.py +4 -2
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +4 -2
- snowflake/ml/modeling/feature_selection/variance_threshold.py +4 -2
- snowflake/ml/modeling/impute/iterative_imputer.py +4 -2
- snowflake/ml/modeling/impute/knn_imputer.py +4 -2
- snowflake/ml/modeling/impute/missing_indicator.py +4 -2
- snowflake/ml/modeling/impute/simple_imputer.py +26 -0
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +4 -2
- snowflake/ml/modeling/kernel_approximation/nystroem.py +4 -2
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +4 -2
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +4 -2
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +4 -2
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +4 -2
- snowflake/ml/modeling/manifold/isomap.py +4 -2
- snowflake/ml/modeling/manifold/mds.py +4 -2
- snowflake/ml/modeling/manifold/spectral_embedding.py +4 -2
- snowflake/ml/modeling/manifold/tsne.py +4 -2
- snowflake/ml/modeling/metrics/ranking.py +3 -0
- snowflake/ml/modeling/metrics/regression.py +3 -0
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +4 -2
- snowflake/ml/modeling/mixture/gaussian_mixture.py +4 -2
- snowflake/ml/modeling/neighbors/kernel_density.py +4 -2
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +4 -2
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +4 -2
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +4 -2
- snowflake/ml/modeling/pipeline/pipeline.py +5 -4
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +43 -9
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +36 -8
- snowflake/ml/modeling/preprocessing/polynomial_features.py +4 -2
- snowflake/ml/registry/_manager/model_manager.py +16 -3
- snowflake/ml/registry/registry.py +100 -13
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/METADATA +81 -7
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/RECORD +165 -139
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/lineage/data_source.py +0 -10
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/top_level.txt +0 -0
@@ -2,17 +2,25 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import json
|
4
4
|
import re
|
5
|
+
import warnings
|
5
6
|
from collections import OrderedDict
|
6
7
|
from dataclasses import asdict, dataclass
|
7
8
|
from enum import Enum
|
8
9
|
from typing import Any, Dict, List, Optional
|
9
10
|
|
11
|
+
from snowflake.ml._internal.exceptions import (
|
12
|
+
error_codes,
|
13
|
+
exceptions as snowml_exceptions,
|
14
|
+
)
|
15
|
+
from snowflake.ml._internal.utils import identifier
|
10
16
|
from snowflake.ml._internal.utils.identifier import concat_names
|
11
17
|
from snowflake.ml._internal.utils.sql_identifier import (
|
12
18
|
SqlIdentifier,
|
13
19
|
to_sql_identifiers,
|
14
20
|
)
|
21
|
+
from snowflake.ml.feature_store import feature_store
|
15
22
|
from snowflake.ml.feature_store.entity import Entity
|
23
|
+
from snowflake.ml.lineage import lineage_node
|
16
24
|
from snowflake.snowpark import DataFrame, Session
|
17
25
|
from snowflake.snowpark.types import (
|
18
26
|
DateType,
|
@@ -67,6 +75,7 @@ class FeatureViewVersion(str):
|
|
67
75
|
|
68
76
|
|
69
77
|
class FeatureViewStatus(Enum):
|
78
|
+
MASKED = "MASKED" # for shared feature views where scheduling state is not available
|
70
79
|
DRAFT = "DRAFT"
|
71
80
|
STATIC = "STATIC"
|
72
81
|
RUNNING = "RUNNING" # This can be deprecated after BCR 2024_02 gets fully deployed
|
@@ -107,7 +116,7 @@ class FeatureViewSlice:
|
|
107
116
|
return cls(**json_dict)
|
108
117
|
|
109
118
|
|
110
|
-
class FeatureView:
|
119
|
+
class FeatureView(lineage_node.LineageNode):
|
111
120
|
"""
|
112
121
|
A FeatureView instance encapsulates a logical group of features.
|
113
122
|
"""
|
@@ -117,9 +126,11 @@ class FeatureView:
|
|
117
126
|
name: str,
|
118
127
|
entities: List[Entity],
|
119
128
|
feature_df: DataFrame,
|
129
|
+
*,
|
120
130
|
timestamp_col: Optional[str] = None,
|
121
131
|
refresh_freq: Optional[str] = None,
|
122
132
|
desc: str = "",
|
133
|
+
warehouse: Optional[str] = None,
|
123
134
|
**_kwargs: Any,
|
124
135
|
) -> None:
|
125
136
|
"""
|
@@ -140,7 +151,33 @@ class FeatureView:
|
|
140
151
|
NOTE: If refresh_freq is not provided, then FeatureView will be registered as View on Snowflake backend
|
141
152
|
and there won't be extra storage cost.
|
142
153
|
desc: description of the FeatureView.
|
154
|
+
warehouse: warehouse to refresh feature view. Not needed for static feature view (refresh_freq is None).
|
155
|
+
For managed feature view, this warehouse will overwrite the default warehouse of Feature Store if it is
|
156
|
+
specified, otherwise the default warehouse will be used.
|
143
157
|
_kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
|
158
|
+
|
159
|
+
Example::
|
160
|
+
|
161
|
+
>>> fs = FeatureStore(...)
|
162
|
+
>>> # draft_fv is a local object that hasn't materiaized to Snowflake backend yet.
|
163
|
+
>>> feature_df = session.sql("select f_1, f_2 from source_table")
|
164
|
+
>>> draft_fv = FeatureView(
|
165
|
+
... name="my_fv",
|
166
|
+
... entities=[e1, e2],
|
167
|
+
... feature_df=feature_df,
|
168
|
+
... timestamp_col='TS', # optional
|
169
|
+
... refresh_freq='1d', # optional
|
170
|
+
... desc='A line about this feature view', # optional
|
171
|
+
... warehouse='WH' # optional, the warehouse used to refresh (managed) feature view
|
172
|
+
... )
|
173
|
+
>>> print(draft_fv.status)
|
174
|
+
FeatureViewStatus.DRAFT
|
175
|
+
<BLANKLINE>
|
176
|
+
>>> # registered_fv is a local object that maps to a Snowflake backend object.
|
177
|
+
>>> registered_fv = fs.register_feature_view(draft_fv, "v1")
|
178
|
+
>>> print(registered_fv.status)
|
179
|
+
FeatureViewStatus.ACTIVE
|
180
|
+
|
144
181
|
"""
|
145
182
|
|
146
183
|
self._name: SqlIdentifier = SqlIdentifier(name)
|
@@ -158,7 +195,7 @@ class FeatureView:
|
|
158
195
|
self._refresh_freq: Optional[str] = refresh_freq
|
159
196
|
self._database: Optional[SqlIdentifier] = None
|
160
197
|
self._schema: Optional[SqlIdentifier] = None
|
161
|
-
self._warehouse: Optional[SqlIdentifier] = None
|
198
|
+
self._warehouse: Optional[SqlIdentifier] = SqlIdentifier(warehouse) if warehouse is not None else None
|
162
199
|
self._refresh_mode: Optional[str] = None
|
163
200
|
self._refresh_mode_reason: Optional[str] = None
|
164
201
|
self._owner: Optional[str] = None
|
@@ -176,6 +213,33 @@ class FeatureView:
|
|
176
213
|
|
177
214
|
Raises:
|
178
215
|
ValueError: if selected feature names is not found in the FeatureView.
|
216
|
+
|
217
|
+
Example::
|
218
|
+
|
219
|
+
>>> fs = FeatureStore(...)
|
220
|
+
>>> e = fs.get_entity('TRIP_ID')
|
221
|
+
>>> # feature_df contains 3 features and 1 entity
|
222
|
+
>>> feature_df = session.table(source_table).select(
|
223
|
+
... 'TRIPDURATION',
|
224
|
+
... 'START_STATION_LATITUDE',
|
225
|
+
... 'END_STATION_LONGITUDE',
|
226
|
+
... 'TRIP_ID'
|
227
|
+
... )
|
228
|
+
>>> darft_fv = FeatureView(name='F_TRIP', entities=[e], feature_df=feature_df)
|
229
|
+
>>> fv = fs.register_feature_view(darft_fv, version='1.0')
|
230
|
+
>>> # shows all 3 features
|
231
|
+
>>> fv.feature_names
|
232
|
+
['TRIPDURATION', 'START_STATION_LATITUDE', 'END_STATION_LONGITUDE']
|
233
|
+
<BLANKLINE>
|
234
|
+
>>> # slice a subset of features
|
235
|
+
>>> fv_slice = fv.slice(['TRIPDURATION', 'START_STATION_LATITUDE'])
|
236
|
+
>>> fv_slice.names
|
237
|
+
['TRIPDURATION', 'START_STATION_LATITUDE']
|
238
|
+
<BLANKLINE>
|
239
|
+
>>> # query the full set of features in original feature view
|
240
|
+
>>> fv_slice.feature_view_ref.feature_names
|
241
|
+
['TRIPDURATION', 'START_STATION_LATITUDE', 'END_STATION_LONGITUDE']
|
242
|
+
|
179
243
|
"""
|
180
244
|
|
181
245
|
res = []
|
@@ -187,14 +251,30 @@ class FeatureView:
|
|
187
251
|
return FeatureViewSlice(self, res)
|
188
252
|
|
189
253
|
def fully_qualified_name(self) -> str:
|
190
|
-
"""
|
191
|
-
|
254
|
+
"""
|
255
|
+
Returns the fully qualified name (<database_name>.<schema_name>.<feature_view_name>) for the
|
256
|
+
FeatureView in Snowflake.
|
192
257
|
|
193
258
|
Returns:
|
194
259
|
fully qualified name string.
|
195
260
|
|
196
261
|
Raises:
|
197
262
|
RuntimeError: if the FeatureView is not registered.
|
263
|
+
|
264
|
+
Example::
|
265
|
+
|
266
|
+
>>> fs = FeatureStore(...)
|
267
|
+
>>> e = fs.get_entity('TRIP_ID')
|
268
|
+
>>> feature_df = session.table(source_table).select(
|
269
|
+
... 'TRIPDURATION',
|
270
|
+
... 'START_STATION_LATITUDE',
|
271
|
+
... 'TRIP_ID'
|
272
|
+
... )
|
273
|
+
>>> darft_fv = FeatureView(name='F_TRIP', entities=[e], feature_df=feature_df)
|
274
|
+
>>> registered_fv = fs.register_feature_view(darft_fv, version='1.0')
|
275
|
+
>>> registered_fv.fully_qualified_name()
|
276
|
+
'MY_DB.MY_SCHEMA."F_TRIP$1.0"'
|
277
|
+
|
198
278
|
"""
|
199
279
|
if self.status == FeatureViewStatus.DRAFT or self.version is None:
|
200
280
|
raise RuntimeError(f"FeatureView {self.name} has not been registered.")
|
@@ -212,6 +292,22 @@ class FeatureView:
|
|
212
292
|
|
213
293
|
Raises:
|
214
294
|
ValueError: if feature name is not found in the FeatureView.
|
295
|
+
|
296
|
+
Example::
|
297
|
+
|
298
|
+
>>> fs = FeatureStore(...)
|
299
|
+
>>> e = fs.get_entity('TRIP_ID')
|
300
|
+
>>> feature_df = session.table(source_table).select('TRIPDURATION', 'START_STATION_LATITUDE', 'TRIP_ID')
|
301
|
+
>>> draft_fv = FeatureView(name='F_TRIP', entities=[e], feature_df=feature_df)
|
302
|
+
>>> draft_fv = draft_fv.attach_feature_desc({
|
303
|
+
... "TRIPDURATION": "Duration of a trip.",
|
304
|
+
... "START_STATION_LATITUDE": "Latitude of the start station."
|
305
|
+
... })
|
306
|
+
>>> registered_fv = fs.register_feature_view(draft_fv, version='1.0')
|
307
|
+
>>> registered_fv.feature_descs
|
308
|
+
OrderedDict([('TRIPDURATION', 'Duration of a trip.'),
|
309
|
+
('START_STATION_LATITUDE', 'Latitude of the start station.')])
|
310
|
+
|
215
311
|
"""
|
216
312
|
for f, d in descs.items():
|
217
313
|
f = SqlIdentifier(f)
|
@@ -243,6 +339,41 @@ class FeatureView:
|
|
243
339
|
def desc(self) -> str:
|
244
340
|
return self._desc
|
245
341
|
|
342
|
+
@desc.setter
|
343
|
+
def desc(self, new_value: str) -> None:
|
344
|
+
"""Set the description of feature view.
|
345
|
+
|
346
|
+
Args:
|
347
|
+
new_value: new value of description.
|
348
|
+
|
349
|
+
Example::
|
350
|
+
|
351
|
+
>>> fs = FeatureStore(...)
|
352
|
+
>>> e = fs.get_entity('TRIP_ID')
|
353
|
+
>>> darft_fv = FeatureView(
|
354
|
+
... name='F_TRIP',
|
355
|
+
... entities=[e],
|
356
|
+
... feature_df=feature_df,
|
357
|
+
... desc='old desc'
|
358
|
+
... )
|
359
|
+
>>> fv_1 = fs.register_feature_view(darft_fv, version='1.0')
|
360
|
+
>>> print(fv_1.desc)
|
361
|
+
old desc
|
362
|
+
<BLANKLINE>
|
363
|
+
>>> darft_fv.desc = 'NEW DESC'
|
364
|
+
>>> fv_2 = fs.register_feature_view(darft_fv, version='2.0')
|
365
|
+
>>> print(fv_2.desc)
|
366
|
+
NEW DESC
|
367
|
+
|
368
|
+
"""
|
369
|
+
warnings.warn(
|
370
|
+
"You must call register_feature_view() to make it effective. "
|
371
|
+
"Or use update_feature_view(desc=<new_value>).",
|
372
|
+
stacklevel=2,
|
373
|
+
category=UserWarning,
|
374
|
+
)
|
375
|
+
self._desc = new_value
|
376
|
+
|
246
377
|
@property
|
247
378
|
def query(self) -> str:
|
248
379
|
return self._query
|
@@ -269,10 +400,37 @@ class FeatureView:
|
|
269
400
|
|
270
401
|
@refresh_freq.setter
|
271
402
|
def refresh_freq(self, new_value: str) -> None:
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
403
|
+
"""Set refresh frequency of feature view.
|
404
|
+
|
405
|
+
Args:
|
406
|
+
new_value: The new value of refresh frequency.
|
407
|
+
|
408
|
+
Example::
|
409
|
+
|
410
|
+
>>> fs = FeatureStore(...)
|
411
|
+
>>> e = fs.get_entity('TRIP_ID')
|
412
|
+
>>> darft_fv = FeatureView(
|
413
|
+
... name='F_TRIP',
|
414
|
+
... entities=[e],
|
415
|
+
... feature_df=feature_df,
|
416
|
+
... refresh_freq='1d'
|
417
|
+
... )
|
418
|
+
>>> fv_1 = fs.register_feature_view(darft_fv, version='1.0')
|
419
|
+
>>> print(fv_1.refresh_freq)
|
420
|
+
1 day
|
421
|
+
<BLANKLINE>
|
422
|
+
>>> darft_fv.refresh_freq = '12h'
|
423
|
+
>>> fv_2 = fs.register_feature_view(darft_fv, version='2.0')
|
424
|
+
>>> print(fv_2.refresh_freq)
|
425
|
+
12 hours
|
426
|
+
|
427
|
+
"""
|
428
|
+
warnings.warn(
|
429
|
+
"You must call register_feature_view() to make it effective. "
|
430
|
+
"Or use update_feature_view(refresh_freq=<new_value>).",
|
431
|
+
stacklevel=2,
|
432
|
+
category=UserWarning,
|
433
|
+
)
|
276
434
|
self._refresh_freq = new_value
|
277
435
|
|
278
436
|
@property
|
@@ -289,10 +447,38 @@ class FeatureView:
|
|
289
447
|
|
290
448
|
@warehouse.setter
|
291
449
|
def warehouse(self, new_value: str) -> None:
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
450
|
+
"""Set warehouse of feature view.
|
451
|
+
|
452
|
+
Args:
|
453
|
+
new_value: The new value of warehouse.
|
454
|
+
|
455
|
+
Example::
|
456
|
+
|
457
|
+
>>> fs = FeatureStore(...)
|
458
|
+
>>> e = fs.get_entity('TRIP_ID')
|
459
|
+
>>> darft_fv = FeatureView(
|
460
|
+
... name='F_TRIP',
|
461
|
+
... entities=[e],
|
462
|
+
... feature_df=feature_df,
|
463
|
+
... refresh_freq='1d',
|
464
|
+
... warehouse='WH1',
|
465
|
+
... )
|
466
|
+
>>> fv_1 = fs.register_feature_view(darft_fv, version='1.0')
|
467
|
+
>>> print(fv_1.warehouse)
|
468
|
+
WH1
|
469
|
+
<BLANKLINE>
|
470
|
+
>>> darft_fv.warehouse = 'WH2'
|
471
|
+
>>> fv_2 = fs.register_feature_view(darft_fv, version='2.0')
|
472
|
+
>>> print(fv_2.warehouse)
|
473
|
+
WH2
|
474
|
+
|
475
|
+
"""
|
476
|
+
warnings.warn(
|
477
|
+
"You must call register_feature_view() to make it effective. "
|
478
|
+
"Or use update_feature_view(warehouse=<new_value>).",
|
479
|
+
stacklevel=2,
|
480
|
+
category=UserWarning,
|
481
|
+
)
|
296
482
|
self._warehouse = SqlIdentifier(new_value)
|
297
483
|
|
298
484
|
@property
|
@@ -406,6 +592,11 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
406
592
|
feature_desc_dict[k.identifier()] = v
|
407
593
|
fv_dict["_feature_desc"] = feature_desc_dict
|
408
594
|
|
595
|
+
lineage_node_keys = [key for key in fv_dict if key.startswith("_node") or key == "_session"]
|
596
|
+
|
597
|
+
for key in lineage_node_keys:
|
598
|
+
fv_dict.pop(key)
|
599
|
+
|
409
600
|
return fv_dict
|
410
601
|
|
411
602
|
def to_df(self, session: Session) -> DataFrame:
|
@@ -428,7 +619,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
428
619
|
|
429
620
|
entities = []
|
430
621
|
for e_json in json_dict["_entities"]:
|
431
|
-
e = Entity(e_json["name"], e_json["join_keys"], e_json["desc"])
|
622
|
+
e = Entity(e_json["name"], e_json["join_keys"], desc=e_json["desc"])
|
432
623
|
e.owner = e_json["owner"]
|
433
624
|
entities.append(e)
|
434
625
|
|
@@ -449,6 +640,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
449
640
|
refresh_mode_reason=json_dict["_refresh_mode_reason"],
|
450
641
|
owner=json_dict["_owner"],
|
451
642
|
infer_schema_df=session.sql(json_dict.get("_infer_schema_query", None)),
|
643
|
+
session=session,
|
452
644
|
)
|
453
645
|
|
454
646
|
@staticmethod
|
@@ -463,6 +655,21 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
463
655
|
)
|
464
656
|
)
|
465
657
|
|
658
|
+
@staticmethod
|
659
|
+
def _load_from_lineage_node(session: Session, name: str, version: str) -> FeatureView:
|
660
|
+
db_name, feature_store_name, feature_view_name, _ = identifier.parse_schema_level_object_identifier(name)
|
661
|
+
|
662
|
+
session_warehouse = session.get_current_warehouse()
|
663
|
+
|
664
|
+
if not session_warehouse:
|
665
|
+
raise snowml_exceptions.SnowflakeMLException(
|
666
|
+
error_code=error_codes.NOT_FOUND,
|
667
|
+
original_exception=ValueError("No active warehouse selected in the current session"),
|
668
|
+
)
|
669
|
+
|
670
|
+
fs = feature_store.FeatureStore(session, db_name, feature_store_name, default_warehouse=session_warehouse)
|
671
|
+
return fs.get_feature_view(feature_view_name, version) # type: ignore[no-any-return]
|
672
|
+
|
466
673
|
@staticmethod
|
467
674
|
def _construct_feature_view(
|
468
675
|
name: str,
|
@@ -481,6 +688,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
481
688
|
refresh_mode_reason: Optional[str],
|
482
689
|
owner: Optional[str],
|
483
690
|
infer_schema_df: Optional[DataFrame],
|
691
|
+
session: Session,
|
484
692
|
) -> FeatureView:
|
485
693
|
fv = FeatureView(
|
486
694
|
name=name,
|
@@ -500,4 +708,11 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
500
708
|
fv._refresh_mode_reason = refresh_mode_reason
|
501
709
|
fv._owner = owner
|
502
710
|
fv.attach_feature_desc(feature_descs)
|
711
|
+
|
712
|
+
lineage_node.LineageNode.__init__(
|
713
|
+
fv, session=session, name=f"{fv.database}.{fv._schema}.{name}", domain="feature_view", version=version
|
714
|
+
)
|
503
715
|
return fv
|
716
|
+
|
717
|
+
|
718
|
+
lineage_node.DOMAIN_LINEAGE_REGISTRY["feature_view"] = FeatureView
|
@@ -11,11 +11,17 @@ from snowflake.ml._internal.exceptions import (
|
|
11
11
|
fileset_errors,
|
12
12
|
)
|
13
13
|
from snowflake.ml._internal.utils import identifier
|
14
|
+
from snowflake.ml.fileset import stage_fs
|
14
15
|
from snowflake.snowpark import exceptions as snowpark_exceptions
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
17
|
+
PROTOCOL_NAME = "snow"
|
18
|
+
_SNOWURL_ENTITY_PATTERN = (
|
19
|
+
f"(?:{PROTOCOL_NAME}://)?"
|
20
|
+
r"(?<!@)(?P<domain>\w+)/"
|
21
|
+
rf"(?P<name>(?:{identifier._SF_IDENTIFIER}\.){{,2}}{identifier._SF_IDENTIFIER})/"
|
22
|
+
)
|
23
|
+
_SNOWURL_VERSION_PATTERN = r"(?P<path>versions/(?:(?P<version>[^/]+)(?:/+(?P<relpath>.*))?)?)"
|
24
|
+
_SNOWURL_PATH_RE = re.compile(f"(?:{_SNOWURL_ENTITY_PATTERN})?" + _SNOWURL_VERSION_PATTERN)
|
19
25
|
|
20
26
|
|
21
27
|
class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
|
@@ -76,8 +82,8 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
|
|
76
82
|
versions_dict = defaultdict(list)
|
77
83
|
for file in files:
|
78
84
|
match = _SNOWURL_PATH_RE.fullmatch(file)
|
79
|
-
assert match is not None and match.group("
|
80
|
-
versions_dict[match.group("version")].append(match.group("
|
85
|
+
assert match is not None and match.group("relpath") is not None
|
86
|
+
versions_dict[match.group("version")].append(match.group("relpath"))
|
81
87
|
try:
|
82
88
|
async_jobs: List[snowpark.AsyncJob] = []
|
83
89
|
for version, version_files in versions_dict.items():
|
@@ -98,10 +104,8 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
|
|
98
104
|
(r["NAME"], r["URL"]) for job in async_jobs for r in stage_fs._resolve_async_job(job)
|
99
105
|
]
|
100
106
|
return presigned_urls
|
101
|
-
except snowpark_exceptions.
|
102
|
-
if e.
|
103
|
-
fileset_errors.ERRNO_STAGE_NOT_EXIST
|
104
|
-
):
|
107
|
+
except snowpark_exceptions.SnowparkSQLException as e:
|
108
|
+
if e.sql_error_code in {fileset_errors.ERRNO_DOMAIN_NOT_EXIST, fileset_errors.ERRNO_STAGE_NOT_EXIST}:
|
105
109
|
raise snowml_exceptions.SnowflakeMLException(
|
106
110
|
error_code=error_codes.SNOWML_NOT_FOUND,
|
107
111
|
original_exception=fileset_errors.StageNotFoundError(
|
@@ -118,7 +122,7 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
|
|
118
122
|
def _parent(cls, path: str) -> str:
|
119
123
|
"""Get parent of specified path up to minimally valid root path.
|
120
124
|
|
121
|
-
For SnowURL, the minimum valid path is
|
125
|
+
For SnowURL, the minimum valid relative path is versions/<version>
|
122
126
|
|
123
127
|
Args:
|
124
128
|
path: File or directory path
|
@@ -128,22 +132,22 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
|
|
128
132
|
|
129
133
|
Examples:
|
130
134
|
----
|
131
|
-
>>> fs._parent("
|
132
|
-
"
|
133
|
-
>>> fs._parent("
|
134
|
-
"
|
135
|
-
>>> fs._parent("
|
136
|
-
"
|
137
|
-
>>> fs._parent("
|
138
|
-
"
|
135
|
+
>>> fs._parent("versions/my_version/file.ext")
|
136
|
+
"versions/my_version"
|
137
|
+
>>> fs._parent("versions/my_version/subdir/file.ext")
|
138
|
+
"versions/my_version/subdir"
|
139
|
+
>>> fs._parent("versions/my_version/")
|
140
|
+
"versions/my_version"
|
141
|
+
>>> fs._parent("versions/my_version")
|
142
|
+
"versions/my_version"
|
139
143
|
"""
|
140
144
|
path_match = _SNOWURL_PATH_RE.fullmatch(path)
|
141
145
|
if not path_match:
|
142
146
|
return super()._parent(path) # type: ignore[no-any-return]
|
143
|
-
filepath: str = path_match.group("
|
144
|
-
root: str = path[: path_match.start("
|
147
|
+
filepath: str = path_match.group("relpath") or ""
|
148
|
+
root: str = path[: path_match.start("relpath")] if filepath else path
|
145
149
|
if "/" in filepath:
|
146
150
|
parent = filepath.rsplit("/", 1)[0]
|
147
151
|
return root + parent
|
148
152
|
else:
|
149
|
-
return root
|
153
|
+
return root.rstrip("/")
|
snowflake/ml/fileset/fileset.py
CHANGED
@@ -256,9 +256,9 @@ class FileSet:
|
|
256
256
|
api_calls=[snowpark.DataFrameWriter.copy_into_location],
|
257
257
|
),
|
258
258
|
)
|
259
|
-
except snowpark_exceptions.
|
259
|
+
except snowpark_exceptions.SnowparkSQLException as e:
|
260
260
|
# Snowpark wraps the Python Connector error code in the head of the error message.
|
261
|
-
if e.
|
261
|
+
if e.sql_error_code == fileset_errors.ERRNO_FILE_EXIST_IN_STAGE:
|
262
262
|
raise fileset_errors.FileSetExistError(fileset_error_messages.FILESET_ALREADY_EXISTS.format(name))
|
263
263
|
else:
|
264
264
|
raise fileset_errors.FileSetError(str(e))
|
snowflake/ml/fileset/snowfs.py
CHANGED
@@ -14,18 +14,10 @@ from snowflake.ml._internal.exceptions import (
|
|
14
14
|
from snowflake.ml._internal.utils import identifier
|
15
15
|
from snowflake.ml.fileset import embedded_stage_fs, sfcfs
|
16
16
|
|
17
|
-
PROTOCOL_NAME = "snow"
|
18
|
-
|
19
17
|
_SFFileEntityPath = collections.namedtuple(
|
20
18
|
"_SFFileEntityPath", ["domain", "name", "filepath", "version", "relative_path"]
|
21
19
|
)
|
22
|
-
|
23
|
-
_SNOWURL_PATTERN = re.compile(
|
24
|
-
f"({PROTOCOL_NAME}://)?"
|
25
|
-
r"(?<!@)(?P<domain>\w+)/"
|
26
|
-
rf"(?P<name>(?:{identifier._SF_IDENTIFIER}\.){{,2}}{identifier._SF_IDENTIFIER})/"
|
27
|
-
r"(?P<path>versions/(?:(?P<version>[^/]+)(?:/(?P<relpath>.*))?)?)"
|
28
|
-
)
|
20
|
+
_SNOWURL_PATTERN = re.compile(embedded_stage_fs._SNOWURL_ENTITY_PATTERN + embedded_stage_fs._SNOWURL_VERSION_PATTERN)
|
29
21
|
|
30
22
|
|
31
23
|
class SnowFileSystem(sfcfs.SFFileSystem):
|
@@ -38,7 +30,7 @@ class SnowFileSystem(sfcfs.SFFileSystem):
|
|
38
30
|
See `sfcfs.SFFileSystem` documentation for example usage patterns.
|
39
31
|
"""
|
40
32
|
|
41
|
-
protocol = PROTOCOL_NAME
|
33
|
+
protocol = embedded_stage_fs.PROTOCOL_NAME
|
42
34
|
_IS_BUGGED_VERSION = None
|
43
35
|
|
44
36
|
def __init__(
|
@@ -75,10 +67,7 @@ class SnowFileSystem(sfcfs.SFFileSystem):
|
|
75
67
|
"""Convert the relative path in a stage to an absolute path starts with the location of the stage."""
|
76
68
|
# Strip protocol from absolute path, since backend needs snow:// prefix to resolve correctly
|
77
69
|
# but fsspec logic strips protocol when doing any searching and globbing
|
78
|
-
stage_name = stage_fs.stage_name
|
79
|
-
protocol = f"{PROTOCOL_NAME}://"
|
80
|
-
if stage_name.startswith(protocol):
|
81
|
-
stage_name = stage_name[len(protocol) :]
|
70
|
+
stage_name: str = self._strip_protocol(stage_fs.stage_name)
|
82
71
|
abs_path = stage_name + "/" + path
|
83
72
|
return abs_path
|
84
73
|
|
@@ -128,4 +117,4 @@ class SnowFileSystem(sfcfs.SFFileSystem):
|
|
128
117
|
)
|
129
118
|
|
130
119
|
|
131
|
-
fsspec.register_implementation(
|
120
|
+
fsspec.register_implementation(SnowFileSystem.protocol, SnowFileSystem)
|
snowflake/ml/fileset/stage_fs.py
CHANGED
@@ -170,8 +170,8 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
|
|
170
170
|
path = path.lstrip("/")
|
171
171
|
async_job: snowpark.AsyncJob = self._session.sql(f"LIST '{loc}/{path}'").collect(block=False)
|
172
172
|
objects: List[snowpark.Row] = _resolve_async_job(async_job)
|
173
|
-
except snowpark_exceptions.
|
174
|
-
if e.
|
173
|
+
except snowpark_exceptions.SnowparkSQLException as e:
|
174
|
+
if e.sql_error_code == fileset_errors.ERRNO_DOMAIN_NOT_EXIST:
|
175
175
|
raise snowml_exceptions.SnowflakeMLException(
|
176
176
|
error_code=error_codes.SNOWML_NOT_FOUND,
|
177
177
|
original_exception=fileset_errors.StageNotFoundError(
|
@@ -234,21 +234,29 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
|
|
234
234
|
|
235
235
|
Raises:
|
236
236
|
SnowflakeMLException: An error occurred when the given path points to a file that cannot be found.
|
237
|
+
snowpark_exceptions.SnowparkClientException: File access failed with a Snowpark exception
|
237
238
|
"""
|
238
239
|
path = path.lstrip("/")
|
239
240
|
if self._USE_FALLBACK_FILE_ACCESS:
|
240
241
|
return self._open_with_snowpark(path)
|
241
242
|
cached_presigned_url = self._url_cache.get(path, None)
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
243
|
+
try:
|
244
|
+
if not cached_presigned_url:
|
245
|
+
res = self._fetch_presigned_urls([path])
|
246
|
+
url = res[0][1]
|
247
|
+
expire_at = time.time() + _PRESIGNED_URL_LIFETIME_SEC
|
248
|
+
cached_presigned_url = _PresignedUrl(url, expire_at)
|
249
|
+
self._url_cache[path] = cached_presigned_url
|
250
|
+
logging.debug(f"Retrieved presigned url for {path}.")
|
251
|
+
elif cached_presigned_url.is_expiring():
|
252
|
+
self.optimize_read()
|
253
|
+
cached_presigned_url = self._url_cache[path]
|
254
|
+
except snowpark_exceptions.SnowparkClientException as e:
|
255
|
+
if self._USE_FALLBACK_FILE_ACCESS == False: # noqa: E712 # Fallback disabled
|
256
|
+
raise
|
257
|
+
# This may be an intermittent failure, so don't set _USE_FALLBACK_FILE_ACCESS = True
|
258
|
+
logging.warning(f"Pre-signed URL generation failed with {e.message}, trying fallback file access")
|
259
|
+
return self._open_with_snowpark(path)
|
252
260
|
url = cached_presigned_url.url
|
253
261
|
try:
|
254
262
|
return self._fs._open(url, mode=mode, **kwargs)
|
@@ -387,10 +395,8 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
|
|
387
395
|
api_calls=[snowpark.DataFrame.collect],
|
388
396
|
),
|
389
397
|
)
|
390
|
-
except snowpark_exceptions.
|
391
|
-
if e.
|
392
|
-
fileset_errors.ERRNO_STAGE_NOT_EXIST
|
393
|
-
):
|
398
|
+
except snowpark_exceptions.SnowparkSQLException as e:
|
399
|
+
if e.sql_error_code in {fileset_errors.ERRNO_DOMAIN_NOT_EXIST, fileset_errors.ERRNO_STAGE_NOT_EXIST}:
|
394
400
|
raise snowml_exceptions.SnowflakeMLException(
|
395
401
|
error_code=error_codes.SNOWML_NOT_FOUND,
|
396
402
|
original_exception=fileset_errors.StageNotFoundError(
|
@@ -406,9 +412,9 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
|
|
406
412
|
|
407
413
|
|
408
414
|
def _match_error_code(ex: snowpark_exceptions.SnowparkSQLException, error_code: int) -> bool:
|
409
|
-
# Snowpark writes error code to message instead of populating e.
|
415
|
+
# Snowpark writes error code to message instead of populating e.sql_error_code
|
410
416
|
error_code_str = str(error_code)
|
411
|
-
return ex.
|
417
|
+
return ex.sql_error_code == error_code_str or error_code_str in ex.message
|
412
418
|
|
413
419
|
|
414
420
|
@snowflake_plan.SnowflakePlan.Decorator.wrap_exception # type: ignore[misc]
|