snowflake-ml-python 1.5.4__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +2 -0
- snowflake/cortex/_classify_text.py +36 -0
- snowflake/cortex/_complete.py +66 -35
- snowflake/cortex/_util.py +4 -4
- snowflake/ml/_internal/env_utils.py +11 -5
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +4 -1
- snowflake/ml/_internal/lineage/lineage_utils.py +4 -4
- snowflake/ml/_internal/telemetry.py +26 -2
- snowflake/ml/_internal/utils/pkg_version_utils.py +8 -22
- snowflake/ml/data/_internal/arrow_ingestor.py +284 -0
- snowflake/ml/data/data_connector.py +186 -0
- snowflake/ml/data/data_ingestor.py +45 -0
- snowflake/ml/data/data_source.py +23 -0
- snowflake/ml/data/ingestor_utils.py +62 -0
- snowflake/ml/data/torch_dataset.py +33 -0
- snowflake/ml/dataset/dataset.py +1 -13
- snowflake/ml/dataset/dataset_metadata.py +3 -1
- snowflake/ml/dataset/dataset_reader.py +23 -117
- snowflake/ml/feature_store/access_manager.py +7 -1
- snowflake/ml/feature_store/entity.py +19 -2
- snowflake/ml/feature_store/examples/airline_features/entities.py +16 -0
- snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +31 -0
- snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +42 -0
- snowflake/ml/feature_store/examples/airline_features/source.yaml +7 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +20 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +37 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +30 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +7 -0
- snowflake/ml/feature_store/examples/example_helper.py +278 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +12 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/location_features.py +44 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +36 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +9 -0
- snowflake/ml/feature_store/examples/source_data/airline.yaml +4 -0
- snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +36 -0
- snowflake/ml/feature_store/examples/source_data/fraud_transactions.yaml +29 -0
- snowflake/ml/feature_store/examples/source_data/nyc_yellow_trips.yaml +4 -0
- snowflake/ml/feature_store/examples/source_data/winequality_red.yaml +32 -0
- snowflake/ml/feature_store/examples/wine_quality_features/entities.py +14 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +36 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +24 -0
- snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +8 -0
- snowflake/ml/feature_store/feature_store.py +637 -76
- snowflake/ml/feature_store/feature_view.py +316 -9
- snowflake/ml/fileset/stage_fs.py +18 -10
- snowflake/ml/lineage/lineage_node.py +1 -1
- snowflake/ml/model/_client/model/model_impl.py +11 -2
- snowflake/ml/model/_client/model/model_version_impl.py +171 -20
- snowflake/ml/model/_client/ops/model_ops.py +105 -27
- snowflake/ml/model/_client/ops/service_ops.py +121 -0
- snowflake/ml/model/_client/service/model_deployment_spec.py +95 -0
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +31 -0
- snowflake/ml/model/_client/sql/model_version.py +13 -4
- snowflake/ml/model/_client/sql/service.py +129 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +2 -3
- snowflake/ml/model/_model_composer/model_composer.py +14 -14
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +33 -17
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +5 -1
- snowflake/ml/model/_model_composer/model_method/function_generator.py +3 -3
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +3 -32
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +3 -27
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -32
- snowflake/ml/model/_model_composer/model_method/model_method.py +5 -2
- snowflake/ml/model/_packager/model_env/model_env.py +7 -2
- snowflake/ml/model/_packager/model_handlers/_base.py +30 -3
- snowflake/ml/model/_packager/model_handlers/_utils.py +58 -1
- snowflake/ml/model/_packager/model_handlers/catboost.py +52 -3
- snowflake/ml/model/_packager/model_handlers/custom.py +6 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +9 -5
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +80 -3
- snowflake/ml/model/_packager/model_handlers/llm.py +7 -3
- snowflake/ml/model/_packager/model_handlers/mlflow.py +8 -3
- snowflake/ml/model/_packager/model_handlers/pytorch.py +8 -3
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -3
- snowflake/ml/model/_packager/model_handlers/sklearn.py +87 -4
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +7 -2
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +9 -4
- snowflake/ml/model/_packager/model_handlers/torchscript.py +8 -3
- snowflake/ml/model/_packager/model_handlers/xgboost.py +71 -3
- snowflake/ml/model/_packager/model_meta/model_meta.py +32 -2
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +19 -0
- snowflake/ml/model/_packager/model_packager.py +2 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +7 -7
- snowflake/ml/model/model_signature.py +4 -4
- snowflake/ml/model/type_hints.py +2 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +1 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +13 -1
- snowflake/ml/modeling/framework/base.py +28 -19
- snowflake/ml/modeling/impute/simple_imputer.py +26 -0
- snowflake/ml/modeling/pipeline/pipeline.py +7 -4
- snowflake/ml/registry/_manager/model_manager.py +16 -2
- snowflake/ml/registry/registry.py +100 -13
- snowflake/ml/utils/sql_client.py +22 -0
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/METADATA +81 -2
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/RECORD +99 -66
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/lineage/data_source.py +0 -10
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ import warnings
|
|
6
6
|
from collections import OrderedDict
|
7
7
|
from dataclasses import asdict, dataclass
|
8
8
|
from enum import Enum
|
9
|
-
from typing import Any, Dict, List, Optional
|
9
|
+
from typing import Any, Dict, List, Optional, Union
|
10
10
|
|
11
11
|
from snowflake.ml._internal.exceptions import (
|
12
12
|
error_codes,
|
@@ -60,6 +60,29 @@ class _FeatureViewMetadata:
|
|
60
60
|
return cls(**state_dict)
|
61
61
|
|
62
62
|
|
63
|
+
@dataclass(frozen=True)
|
64
|
+
class _CompactRepresentation:
|
65
|
+
"""
|
66
|
+
A compact representation for FeatureView and FeatureViewSlice, which contains fully qualified name
|
67
|
+
and optionally a list of feature indices (None means all features will be included).
|
68
|
+
This is to make the metadata much smaller when generating dataset.
|
69
|
+
"""
|
70
|
+
|
71
|
+
db: str
|
72
|
+
sch: str
|
73
|
+
name: str
|
74
|
+
version: str
|
75
|
+
feature_indices: Optional[List[int]] = None
|
76
|
+
|
77
|
+
def to_json(self) -> str:
|
78
|
+
return json.dumps(asdict(self))
|
79
|
+
|
80
|
+
@classmethod
|
81
|
+
def from_json(cls, json_str: str) -> _CompactRepresentation:
|
82
|
+
state_dict = json.loads(json_str)
|
83
|
+
return cls(**state_dict)
|
84
|
+
|
85
|
+
|
63
86
|
class FeatureViewVersion(str):
|
64
87
|
def __new__(cls, version: str) -> FeatureViewVersion:
|
65
88
|
if not _FEATURE_VIEW_VERSION_RE.match(version) or len(version) > _FEATURE_VIEW_VERSION_MAX_LENGTH:
|
@@ -115,6 +138,19 @@ class FeatureViewSlice:
|
|
115
138
|
json_dict["feature_view_ref"] = FeatureView.from_json(json_dict["feature_view_ref"], session)
|
116
139
|
return cls(**json_dict)
|
117
140
|
|
141
|
+
def _get_compact_repr(self) -> _CompactRepresentation:
|
142
|
+
return _CompactRepresentation(
|
143
|
+
db=self.feature_view_ref.database.identifier(), # type: ignore[union-attr]
|
144
|
+
sch=self.feature_view_ref.schema.identifier(), # type: ignore[union-attr]
|
145
|
+
name=self.feature_view_ref.name.identifier(),
|
146
|
+
version=self.feature_view_ref.version, # type: ignore[arg-type]
|
147
|
+
feature_indices=self._feature_names_to_indices(),
|
148
|
+
)
|
149
|
+
|
150
|
+
def _feature_names_to_indices(self) -> List[int]:
|
151
|
+
name_to_indices_map = {name: idx for idx, name in enumerate(self.feature_view_ref.feature_names)}
|
152
|
+
return [name_to_indices_map[n] for n in self.names]
|
153
|
+
|
118
154
|
|
119
155
|
class FeatureView(lineage_node.LineageNode):
|
120
156
|
"""
|
@@ -126,9 +162,11 @@ class FeatureView(lineage_node.LineageNode):
|
|
126
162
|
name: str,
|
127
163
|
entities: List[Entity],
|
128
164
|
feature_df: DataFrame,
|
165
|
+
*,
|
129
166
|
timestamp_col: Optional[str] = None,
|
130
167
|
refresh_freq: Optional[str] = None,
|
131
168
|
desc: str = "",
|
169
|
+
warehouse: Optional[str] = None,
|
132
170
|
**_kwargs: Any,
|
133
171
|
) -> None:
|
134
172
|
"""
|
@@ -149,7 +187,33 @@ class FeatureView(lineage_node.LineageNode):
|
|
149
187
|
NOTE: If refresh_freq is not provided, then FeatureView will be registered as View on Snowflake backend
|
150
188
|
and there won't be extra storage cost.
|
151
189
|
desc: description of the FeatureView.
|
190
|
+
warehouse: warehouse to refresh feature view. Not needed for static feature view (refresh_freq is None).
|
191
|
+
For managed feature view, this warehouse will overwrite the default warehouse of Feature Store if it is
|
192
|
+
specified, otherwise the default warehouse will be used.
|
152
193
|
_kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
|
194
|
+
|
195
|
+
Example::
|
196
|
+
|
197
|
+
>>> fs = FeatureStore(...)
|
198
|
+
>>> # draft_fv is a local object that hasn't materiaized to Snowflake backend yet.
|
199
|
+
>>> feature_df = session.sql("select f_1, f_2 from source_table")
|
200
|
+
>>> draft_fv = FeatureView(
|
201
|
+
... name="my_fv",
|
202
|
+
... entities=[e1, e2],
|
203
|
+
... feature_df=feature_df,
|
204
|
+
... timestamp_col='TS', # optional
|
205
|
+
... refresh_freq='1d', # optional
|
206
|
+
... desc='A line about this feature view', # optional
|
207
|
+
... warehouse='WH' # optional, the warehouse used to refresh (managed) feature view
|
208
|
+
... )
|
209
|
+
>>> print(draft_fv.status)
|
210
|
+
FeatureViewStatus.DRAFT
|
211
|
+
<BLANKLINE>
|
212
|
+
>>> # registered_fv is a local object that maps to a Snowflake backend object.
|
213
|
+
>>> registered_fv = fs.register_feature_view(draft_fv, "v1")
|
214
|
+
>>> print(registered_fv.status)
|
215
|
+
FeatureViewStatus.ACTIVE
|
216
|
+
|
153
217
|
"""
|
154
218
|
|
155
219
|
self._name: SqlIdentifier = SqlIdentifier(name)
|
@@ -167,8 +231,8 @@ class FeatureView(lineage_node.LineageNode):
|
|
167
231
|
self._refresh_freq: Optional[str] = refresh_freq
|
168
232
|
self._database: Optional[SqlIdentifier] = None
|
169
233
|
self._schema: Optional[SqlIdentifier] = None
|
170
|
-
self._warehouse: Optional[SqlIdentifier] = None
|
171
|
-
self._refresh_mode: Optional[str] =
|
234
|
+
self._warehouse: Optional[SqlIdentifier] = SqlIdentifier(warehouse) if warehouse is not None else None
|
235
|
+
self._refresh_mode: Optional[str] = _kwargs.get("refresh_mode", "AUTO")
|
172
236
|
self._refresh_mode_reason: Optional[str] = None
|
173
237
|
self._owner: Optional[str] = None
|
174
238
|
self._validate()
|
@@ -185,6 +249,33 @@ class FeatureView(lineage_node.LineageNode):
|
|
185
249
|
|
186
250
|
Raises:
|
187
251
|
ValueError: if selected feature names is not found in the FeatureView.
|
252
|
+
|
253
|
+
Example::
|
254
|
+
|
255
|
+
>>> fs = FeatureStore(...)
|
256
|
+
>>> e = fs.get_entity('TRIP_ID')
|
257
|
+
>>> # feature_df contains 3 features and 1 entity
|
258
|
+
>>> feature_df = session.table(source_table).select(
|
259
|
+
... 'TRIPDURATION',
|
260
|
+
... 'START_STATION_LATITUDE',
|
261
|
+
... 'END_STATION_LONGITUDE',
|
262
|
+
... 'TRIP_ID'
|
263
|
+
... )
|
264
|
+
>>> darft_fv = FeatureView(name='F_TRIP', entities=[e], feature_df=feature_df)
|
265
|
+
>>> fv = fs.register_feature_view(darft_fv, version='1.0')
|
266
|
+
>>> # shows all 3 features
|
267
|
+
>>> fv.feature_names
|
268
|
+
['TRIPDURATION', 'START_STATION_LATITUDE', 'END_STATION_LONGITUDE']
|
269
|
+
<BLANKLINE>
|
270
|
+
>>> # slice a subset of features
|
271
|
+
>>> fv_slice = fv.slice(['TRIPDURATION', 'START_STATION_LATITUDE'])
|
272
|
+
>>> fv_slice.names
|
273
|
+
['TRIPDURATION', 'START_STATION_LATITUDE']
|
274
|
+
<BLANKLINE>
|
275
|
+
>>> # query the full set of features in original feature view
|
276
|
+
>>> fv_slice.feature_view_ref.feature_names
|
277
|
+
['TRIPDURATION', 'START_STATION_LATITUDE', 'END_STATION_LONGITUDE']
|
278
|
+
|
188
279
|
"""
|
189
280
|
|
190
281
|
res = []
|
@@ -196,14 +287,30 @@ class FeatureView(lineage_node.LineageNode):
|
|
196
287
|
return FeatureViewSlice(self, res)
|
197
288
|
|
198
289
|
def fully_qualified_name(self) -> str:
|
199
|
-
"""
|
200
|
-
|
290
|
+
"""
|
291
|
+
Returns the fully qualified name (<database_name>.<schema_name>.<feature_view_name>) for the
|
292
|
+
FeatureView in Snowflake.
|
201
293
|
|
202
294
|
Returns:
|
203
295
|
fully qualified name string.
|
204
296
|
|
205
297
|
Raises:
|
206
298
|
RuntimeError: if the FeatureView is not registered.
|
299
|
+
|
300
|
+
Example::
|
301
|
+
|
302
|
+
>>> fs = FeatureStore(...)
|
303
|
+
>>> e = fs.get_entity('TRIP_ID')
|
304
|
+
>>> feature_df = session.table(source_table).select(
|
305
|
+
... 'TRIPDURATION',
|
306
|
+
... 'START_STATION_LATITUDE',
|
307
|
+
... 'TRIP_ID'
|
308
|
+
... )
|
309
|
+
>>> darft_fv = FeatureView(name='F_TRIP', entities=[e], feature_df=feature_df)
|
310
|
+
>>> registered_fv = fs.register_feature_view(darft_fv, version='1.0')
|
311
|
+
>>> registered_fv.fully_qualified_name()
|
312
|
+
'MY_DB.MY_SCHEMA."F_TRIP$1.0"'
|
313
|
+
|
207
314
|
"""
|
208
315
|
if self.status == FeatureViewStatus.DRAFT or self.version is None:
|
209
316
|
raise RuntimeError(f"FeatureView {self.name} has not been registered.")
|
@@ -221,6 +328,22 @@ class FeatureView(lineage_node.LineageNode):
|
|
221
328
|
|
222
329
|
Raises:
|
223
330
|
ValueError: if feature name is not found in the FeatureView.
|
331
|
+
|
332
|
+
Example::
|
333
|
+
|
334
|
+
>>> fs = FeatureStore(...)
|
335
|
+
>>> e = fs.get_entity('TRIP_ID')
|
336
|
+
>>> feature_df = session.table(source_table).select('TRIPDURATION', 'START_STATION_LATITUDE', 'TRIP_ID')
|
337
|
+
>>> draft_fv = FeatureView(name='F_TRIP', entities=[e], feature_df=feature_df)
|
338
|
+
>>> draft_fv = draft_fv.attach_feature_desc({
|
339
|
+
... "TRIPDURATION": "Duration of a trip.",
|
340
|
+
... "START_STATION_LATITUDE": "Latitude of the start station."
|
341
|
+
... })
|
342
|
+
>>> registered_fv = fs.register_feature_view(draft_fv, version='1.0')
|
343
|
+
>>> registered_fv.feature_descs
|
344
|
+
OrderedDict([('TRIPDURATION', 'Duration of a trip.'),
|
345
|
+
('START_STATION_LATITUDE', 'Latitude of the start station.')])
|
346
|
+
|
224
347
|
"""
|
225
348
|
for f, d in descs.items():
|
226
349
|
f = SqlIdentifier(f)
|
@@ -254,6 +377,31 @@ class FeatureView(lineage_node.LineageNode):
|
|
254
377
|
|
255
378
|
@desc.setter
|
256
379
|
def desc(self, new_value: str) -> None:
|
380
|
+
"""Set the description of feature view.
|
381
|
+
|
382
|
+
Args:
|
383
|
+
new_value: new value of description.
|
384
|
+
|
385
|
+
Example::
|
386
|
+
|
387
|
+
>>> fs = FeatureStore(...)
|
388
|
+
>>> e = fs.get_entity('TRIP_ID')
|
389
|
+
>>> darft_fv = FeatureView(
|
390
|
+
... name='F_TRIP',
|
391
|
+
... entities=[e],
|
392
|
+
... feature_df=feature_df,
|
393
|
+
... desc='old desc'
|
394
|
+
... )
|
395
|
+
>>> fv_1 = fs.register_feature_view(darft_fv, version='1.0')
|
396
|
+
>>> print(fv_1.desc)
|
397
|
+
old desc
|
398
|
+
<BLANKLINE>
|
399
|
+
>>> darft_fv.desc = 'NEW DESC'
|
400
|
+
>>> fv_2 = fs.register_feature_view(darft_fv, version='2.0')
|
401
|
+
>>> print(fv_2.desc)
|
402
|
+
NEW DESC
|
403
|
+
|
404
|
+
"""
|
257
405
|
warnings.warn(
|
258
406
|
"You must call register_feature_view() to make it effective. "
|
259
407
|
"Or use update_feature_view(desc=<new_value>).",
|
@@ -282,12 +430,85 @@ class FeatureView(lineage_node.LineageNode):
|
|
282
430
|
def feature_descs(self) -> Dict[SqlIdentifier, str]:
|
283
431
|
return self._feature_desc
|
284
432
|
|
433
|
+
def list_columns(self) -> DataFrame:
|
434
|
+
"""List all columns and their information.
|
435
|
+
|
436
|
+
Returns:
|
437
|
+
A Snowpark DataFrame contains feature information.
|
438
|
+
|
439
|
+
Example::
|
440
|
+
|
441
|
+
>>> fs = FeatureStore(...)
|
442
|
+
>>> e = Entity("foo", ["id"], desc='my entity')
|
443
|
+
>>> fs.register_entity(e)
|
444
|
+
<BLANKLINE>
|
445
|
+
>>> draft_fv = FeatureView(
|
446
|
+
... name="fv",
|
447
|
+
... entities=[e],
|
448
|
+
... feature_df=self._session.table(<source_table>).select(["NAME", "ID", "TITLE", "AGE", "TS"]),
|
449
|
+
... timestamp_col="ts",
|
450
|
+
>>> ).attach_feature_desc({"AGE": "my age", "TITLE": '"my title"'})
|
451
|
+
>>> fv = fs.register_feature_view(draft_fv, '1.0')
|
452
|
+
<BLANKLINE>
|
453
|
+
>>> fv.list_columns().show()
|
454
|
+
--------------------------------------------------
|
455
|
+
|"NAME" |"CATEGORY" |"DTYPE" |"DESC" |
|
456
|
+
--------------------------------------------------
|
457
|
+
|NAME |FEATURE |string(64) | |
|
458
|
+
|ID |ENTITY |bigint |my entity |
|
459
|
+
|TITLE |FEATURE |string(128) |"my title" |
|
460
|
+
|AGE |FEATURE |bigint |my age |
|
461
|
+
|TS |TIMESTAMP |bigint |NULL |
|
462
|
+
--------------------------------------------------
|
463
|
+
|
464
|
+
"""
|
465
|
+
session = self._feature_df.session
|
466
|
+
rows = []
|
467
|
+
for name, type in self._feature_df.dtypes:
|
468
|
+
if SqlIdentifier(name) in self.feature_descs:
|
469
|
+
desc = self.feature_descs[SqlIdentifier(name)]
|
470
|
+
rows.append((name, "FEATURE", type, desc))
|
471
|
+
elif SqlIdentifier(name) == self._timestamp_col:
|
472
|
+
rows.append((name, "TIMESTAMP", type, None)) # type: ignore[arg-type]
|
473
|
+
else:
|
474
|
+
for e in self._entities:
|
475
|
+
if SqlIdentifier(name) in e.join_keys:
|
476
|
+
rows.append((name, "ENTITY", type, e.desc))
|
477
|
+
break
|
478
|
+
|
479
|
+
return session.create_dataframe(rows, schema=["name", "category", "dtype", "desc"])
|
480
|
+
|
285
481
|
@property
|
286
482
|
def refresh_freq(self) -> Optional[str]:
|
287
483
|
return self._refresh_freq
|
288
484
|
|
289
485
|
@refresh_freq.setter
|
290
486
|
def refresh_freq(self, new_value: str) -> None:
|
487
|
+
"""Set refresh frequency of feature view.
|
488
|
+
|
489
|
+
Args:
|
490
|
+
new_value: The new value of refresh frequency.
|
491
|
+
|
492
|
+
Example::
|
493
|
+
|
494
|
+
>>> fs = FeatureStore(...)
|
495
|
+
>>> e = fs.get_entity('TRIP_ID')
|
496
|
+
>>> darft_fv = FeatureView(
|
497
|
+
... name='F_TRIP',
|
498
|
+
... entities=[e],
|
499
|
+
... feature_df=feature_df,
|
500
|
+
... refresh_freq='1d'
|
501
|
+
... )
|
502
|
+
>>> fv_1 = fs.register_feature_view(darft_fv, version='1.0')
|
503
|
+
>>> print(fv_1.refresh_freq)
|
504
|
+
1 day
|
505
|
+
<BLANKLINE>
|
506
|
+
>>> darft_fv.refresh_freq = '12h'
|
507
|
+
>>> fv_2 = fs.register_feature_view(darft_fv, version='2.0')
|
508
|
+
>>> print(fv_2.refresh_freq)
|
509
|
+
12 hours
|
510
|
+
|
511
|
+
"""
|
291
512
|
warnings.warn(
|
292
513
|
"You must call register_feature_view() to make it effective. "
|
293
514
|
"Or use update_feature_view(refresh_freq=<new_value>).",
|
@@ -310,6 +531,32 @@ class FeatureView(lineage_node.LineageNode):
|
|
310
531
|
|
311
532
|
@warehouse.setter
|
312
533
|
def warehouse(self, new_value: str) -> None:
|
534
|
+
"""Set warehouse of feature view.
|
535
|
+
|
536
|
+
Args:
|
537
|
+
new_value: The new value of warehouse.
|
538
|
+
|
539
|
+
Example::
|
540
|
+
|
541
|
+
>>> fs = FeatureStore(...)
|
542
|
+
>>> e = fs.get_entity('TRIP_ID')
|
543
|
+
>>> darft_fv = FeatureView(
|
544
|
+
... name='F_TRIP',
|
545
|
+
... entities=[e],
|
546
|
+
... feature_df=feature_df,
|
547
|
+
... refresh_freq='1d',
|
548
|
+
... warehouse='WH1',
|
549
|
+
... )
|
550
|
+
>>> fv_1 = fs.register_feature_view(darft_fv, version='1.0')
|
551
|
+
>>> print(fv_1.warehouse)
|
552
|
+
WH1
|
553
|
+
<BLANKLINE>
|
554
|
+
>>> darft_fv.warehouse = 'WH2'
|
555
|
+
>>> fv_2 = fs.register_feature_view(darft_fv, version='2.0')
|
556
|
+
>>> print(fv_2.warehouse)
|
557
|
+
WH2
|
558
|
+
|
559
|
+
"""
|
313
560
|
warnings.warn(
|
314
561
|
"You must call register_feature_view() to make it effective. "
|
315
562
|
"Or use update_feature_view(warehouse=<new_value>).",
|
@@ -436,12 +683,50 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
436
683
|
|
437
684
|
return fv_dict
|
438
685
|
|
439
|
-
def to_df(self, session: Session) -> DataFrame:
|
686
|
+
def to_df(self, session: Optional[Session] = None) -> DataFrame:
|
687
|
+
"""Convert feature view to a Snowpark DataFrame object.
|
688
|
+
|
689
|
+
Args:
|
690
|
+
session: [deprecated] This argument has no effect. No need to pass a session object.
|
691
|
+
|
692
|
+
Returns:
|
693
|
+
A Snowpark Dataframe object contains the information about feature view.
|
694
|
+
|
695
|
+
Example::
|
696
|
+
|
697
|
+
>>> fs = FeatureStore(...)
|
698
|
+
>>> e = Entity("foo", ["id"], desc='my entity')
|
699
|
+
>>> fs.register_entity(e)
|
700
|
+
<BLANKLINE>
|
701
|
+
>>> draft_fv = FeatureView(
|
702
|
+
... name="fv",
|
703
|
+
... entities=[e],
|
704
|
+
... feature_df=self._session.table(<source_table>).select(["NAME", "ID", "TITLE", "AGE", "TS"]),
|
705
|
+
... timestamp_col="ts",
|
706
|
+
>>> ).attach_feature_desc({"AGE": "my age", "TITLE": '"my title"'})
|
707
|
+
>>> fv = fs.register_feature_view(draft_fv, '1.0')
|
708
|
+
<BLANKLINE>
|
709
|
+
fv.to_df().show()
|
710
|
+
----------------------------------------------------------------...
|
711
|
+
|"NAME" |"ENTITIES" |"TIMESTAMP_COL" |"DESC" |
|
712
|
+
----------------------------------------------------------------...
|
713
|
+
|FV |[ |TS |foobar |
|
714
|
+
| | { | | |
|
715
|
+
| | "desc": "my entity", | | |
|
716
|
+
| | "join_keys": [ | | |
|
717
|
+
| | "ID" | | |
|
718
|
+
| | ], | | |
|
719
|
+
| | "name": "FOO", | | |
|
720
|
+
| | "owner": null | | |
|
721
|
+
| | } | | |
|
722
|
+
| |] | | |
|
723
|
+
----------------------------------------------------------------...
|
724
|
+
"""
|
440
725
|
values = list(self._to_dict().values())
|
441
726
|
schema = [x.lstrip("_") for x in list(self._to_dict().keys())]
|
442
727
|
values.append(str(FeatureView._get_physical_name(self._name, self._version))) # type: ignore[arg-type]
|
443
728
|
schema.append("physical_name")
|
444
|
-
return session.create_dataframe([values], schema=schema)
|
729
|
+
return self._feature_df.session.create_dataframe([values], schema=schema)
|
445
730
|
|
446
731
|
def to_json(self) -> str:
|
447
732
|
state_dict = self._to_dict()
|
@@ -456,7 +741,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
456
741
|
|
457
742
|
entities = []
|
458
743
|
for e_json in json_dict["_entities"]:
|
459
|
-
e = Entity(e_json["name"], e_json["join_keys"], e_json["desc"])
|
744
|
+
e = Entity(e_json["name"], e_json["join_keys"], desc=e_json["desc"])
|
460
745
|
e.owner = e_json["owner"]
|
461
746
|
entities.append(e)
|
462
747
|
|
@@ -480,6 +765,14 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
480
765
|
session=session,
|
481
766
|
)
|
482
767
|
|
768
|
+
def _get_compact_repr(self) -> _CompactRepresentation:
|
769
|
+
return _CompactRepresentation(
|
770
|
+
db=self.database.identifier(), # type: ignore[union-attr]
|
771
|
+
sch=self.schema.identifier(), # type: ignore[union-attr]
|
772
|
+
name=self.name.identifier(),
|
773
|
+
version=self.version, # type: ignore[arg-type]
|
774
|
+
)
|
775
|
+
|
483
776
|
@staticmethod
|
484
777
|
def _get_physical_name(fv_name: SqlIdentifier, fv_version: FeatureViewVersion) -> SqlIdentifier:
|
485
778
|
return SqlIdentifier(
|
@@ -492,6 +785,20 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
492
785
|
)
|
493
786
|
)
|
494
787
|
|
788
|
+
@staticmethod
|
789
|
+
def _load_from_compact_repr(session: Session, serialized_repr: str) -> Union[FeatureView, FeatureViewSlice]:
|
790
|
+
compact_repr = _CompactRepresentation.from_json(serialized_repr)
|
791
|
+
|
792
|
+
fs = feature_store.FeatureStore(
|
793
|
+
session, compact_repr.db, compact_repr.sch, default_warehouse=session.get_current_warehouse()
|
794
|
+
)
|
795
|
+
fv = fs.get_feature_view(compact_repr.name, compact_repr.version)
|
796
|
+
|
797
|
+
if compact_repr.feature_indices is not None:
|
798
|
+
feature_names = [fv.feature_names[i] for i in compact_repr.feature_indices]
|
799
|
+
return fv.slice(feature_names) # type: ignore[no-any-return]
|
800
|
+
return fv # type: ignore[no-any-return]
|
801
|
+
|
495
802
|
@staticmethod
|
496
803
|
def _load_from_lineage_node(session: Session, name: str, version: str) -> FeatureView:
|
497
804
|
db_name, feature_store_name, feature_view_name, _ = identifier.parse_schema_level_object_identifier(name)
|
@@ -504,7 +811,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
504
811
|
original_exception=ValueError("No active warehouse selected in the current session"),
|
505
812
|
)
|
506
813
|
|
507
|
-
fs = feature_store.FeatureStore(session, db_name, feature_store_name, session_warehouse)
|
814
|
+
fs = feature_store.FeatureStore(session, db_name, feature_store_name, default_warehouse=session_warehouse)
|
508
815
|
return fs.get_feature_view(feature_view_name, version) # type: ignore[no-any-return]
|
509
816
|
|
510
817
|
@staticmethod
|
snowflake/ml/fileset/stage_fs.py
CHANGED
@@ -234,21 +234,29 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
|
|
234
234
|
|
235
235
|
Raises:
|
236
236
|
SnowflakeMLException: An error occurred when the given path points to a file that cannot be found.
|
237
|
+
snowpark_exceptions.SnowparkClientException: File access failed with a Snowpark exception
|
237
238
|
"""
|
238
239
|
path = path.lstrip("/")
|
239
240
|
if self._USE_FALLBACK_FILE_ACCESS:
|
240
241
|
return self._open_with_snowpark(path)
|
241
242
|
cached_presigned_url = self._url_cache.get(path, None)
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
243
|
+
try:
|
244
|
+
if not cached_presigned_url:
|
245
|
+
res = self._fetch_presigned_urls([path])
|
246
|
+
url = res[0][1]
|
247
|
+
expire_at = time.time() + _PRESIGNED_URL_LIFETIME_SEC
|
248
|
+
cached_presigned_url = _PresignedUrl(url, expire_at)
|
249
|
+
self._url_cache[path] = cached_presigned_url
|
250
|
+
logging.debug(f"Retrieved presigned url for {path}.")
|
251
|
+
elif cached_presigned_url.is_expiring():
|
252
|
+
self.optimize_read()
|
253
|
+
cached_presigned_url = self._url_cache[path]
|
254
|
+
except snowpark_exceptions.SnowparkClientException as e:
|
255
|
+
if self._USE_FALLBACK_FILE_ACCESS == False: # noqa: E712 # Fallback disabled
|
256
|
+
raise
|
257
|
+
# This may be an intermittent failure, so don't set _USE_FALLBACK_FILE_ACCESS = True
|
258
|
+
logging.warning(f"Pre-signed URL generation failed with {e.message}, trying fallback file access")
|
259
|
+
return self._open_with_snowpark(path)
|
252
260
|
url = cached_presigned_url.url
|
253
261
|
try:
|
254
262
|
return self._fs._open(url, mode=mode, **kwargs)
|
@@ -118,7 +118,7 @@ class LineageNode:
|
|
118
118
|
)
|
119
119
|
domain = lineage_object["domain"].lower()
|
120
120
|
if domain_filter is None or domain in domain_filter:
|
121
|
-
if domain in DOMAIN_LINEAGE_REGISTRY:
|
121
|
+
if domain in DOMAIN_LINEAGE_REGISTRY and lineage_object["status"] == "ACTIVE":
|
122
122
|
lineage_nodes.append(
|
123
123
|
DOMAIN_LINEAGE_REGISTRY[domain]._load_from_lineage_node(
|
124
124
|
self._session, lineage_object["name"], lineage_object.get("version")
|
@@ -5,7 +5,7 @@ import pandas as pd
|
|
5
5
|
from snowflake.ml._internal import telemetry
|
6
6
|
from snowflake.ml._internal.utils import sql_identifier
|
7
7
|
from snowflake.ml.model._client.model import model_version_impl
|
8
|
-
from snowflake.ml.model._client.ops import model_ops
|
8
|
+
from snowflake.ml.model._client.ops import model_ops, service_ops
|
9
9
|
|
10
10
|
_TELEMETRY_PROJECT = "MLOps"
|
11
11
|
_TELEMETRY_SUBPROJECT = "ModelManagement"
|
@@ -19,6 +19,7 @@ class Model:
|
|
19
19
|
"""Model Object containing multiple versions. Mapping to SQL's MODEL object."""
|
20
20
|
|
21
21
|
_model_ops: model_ops.ModelOperator
|
22
|
+
_service_ops: service_ops.ServiceOperator
|
22
23
|
_model_name: sql_identifier.SqlIdentifier
|
23
24
|
|
24
25
|
def __init__(self) -> None:
|
@@ -29,17 +30,23 @@ class Model:
|
|
29
30
|
cls,
|
30
31
|
model_ops: model_ops.ModelOperator,
|
31
32
|
*,
|
33
|
+
service_ops: service_ops.ServiceOperator,
|
32
34
|
model_name: sql_identifier.SqlIdentifier,
|
33
35
|
) -> "Model":
|
34
36
|
self: "Model" = object.__new__(cls)
|
35
37
|
self._model_ops = model_ops
|
38
|
+
self._service_ops = service_ops
|
36
39
|
self._model_name = model_name
|
37
40
|
return self
|
38
41
|
|
39
42
|
def __eq__(self, __value: object) -> bool:
|
40
43
|
if not isinstance(__value, Model):
|
41
44
|
return False
|
42
|
-
return
|
45
|
+
return (
|
46
|
+
self._model_ops == __value._model_ops
|
47
|
+
and self._service_ops == __value._service_ops
|
48
|
+
and self._model_name == __value._model_name
|
49
|
+
)
|
43
50
|
|
44
51
|
@property
|
45
52
|
def name(self) -> str:
|
@@ -208,6 +215,7 @@ class Model:
|
|
208
215
|
|
209
216
|
return model_version_impl.ModelVersion._ref(
|
210
217
|
self._model_ops,
|
218
|
+
service_ops=self._service_ops,
|
211
219
|
model_name=self._model_name,
|
212
220
|
version_name=version_id,
|
213
221
|
)
|
@@ -235,6 +243,7 @@ class Model:
|
|
235
243
|
return [
|
236
244
|
model_version_impl.ModelVersion._ref(
|
237
245
|
self._model_ops,
|
246
|
+
service_ops=self._service_ops,
|
238
247
|
model_name=self._model_name,
|
239
248
|
version_name=version_name,
|
240
249
|
)
|