snowflake-ml-python 1.5.4__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +2 -0
- snowflake/cortex/_classify_text.py +36 -0
- snowflake/cortex/_complete.py +66 -35
- snowflake/cortex/_util.py +4 -4
- snowflake/ml/_internal/env_utils.py +11 -5
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +4 -1
- snowflake/ml/_internal/lineage/lineage_utils.py +4 -4
- snowflake/ml/_internal/telemetry.py +26 -2
- snowflake/ml/_internal/utils/pkg_version_utils.py +8 -22
- snowflake/ml/data/_internal/arrow_ingestor.py +284 -0
- snowflake/ml/data/data_connector.py +186 -0
- snowflake/ml/data/data_ingestor.py +45 -0
- snowflake/ml/data/data_source.py +23 -0
- snowflake/ml/data/ingestor_utils.py +62 -0
- snowflake/ml/data/torch_dataset.py +33 -0
- snowflake/ml/dataset/dataset.py +1 -13
- snowflake/ml/dataset/dataset_metadata.py +3 -1
- snowflake/ml/dataset/dataset_reader.py +23 -117
- snowflake/ml/feature_store/access_manager.py +7 -1
- snowflake/ml/feature_store/entity.py +19 -2
- snowflake/ml/feature_store/examples/airline_features/entities.py +16 -0
- snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +31 -0
- snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +42 -0
- snowflake/ml/feature_store/examples/airline_features/source.yaml +7 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +20 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +37 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +30 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +7 -0
- snowflake/ml/feature_store/examples/example_helper.py +278 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +12 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/location_features.py +44 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +36 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +9 -0
- snowflake/ml/feature_store/examples/source_data/airline.yaml +4 -0
- snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +36 -0
- snowflake/ml/feature_store/examples/source_data/fraud_transactions.yaml +29 -0
- snowflake/ml/feature_store/examples/source_data/nyc_yellow_trips.yaml +4 -0
- snowflake/ml/feature_store/examples/source_data/winequality_red.yaml +32 -0
- snowflake/ml/feature_store/examples/wine_quality_features/entities.py +14 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +36 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +24 -0
- snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +8 -0
- snowflake/ml/feature_store/feature_store.py +637 -76
- snowflake/ml/feature_store/feature_view.py +316 -9
- snowflake/ml/fileset/stage_fs.py +18 -10
- snowflake/ml/lineage/lineage_node.py +1 -1
- snowflake/ml/model/_client/model/model_impl.py +11 -2
- snowflake/ml/model/_client/model/model_version_impl.py +171 -20
- snowflake/ml/model/_client/ops/model_ops.py +105 -27
- snowflake/ml/model/_client/ops/service_ops.py +121 -0
- snowflake/ml/model/_client/service/model_deployment_spec.py +95 -0
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +31 -0
- snowflake/ml/model/_client/sql/model_version.py +13 -4
- snowflake/ml/model/_client/sql/service.py +129 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +2 -3
- snowflake/ml/model/_model_composer/model_composer.py +14 -14
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +33 -17
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +5 -1
- snowflake/ml/model/_model_composer/model_method/function_generator.py +3 -3
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +3 -32
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +3 -27
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -32
- snowflake/ml/model/_model_composer/model_method/model_method.py +5 -2
- snowflake/ml/model/_packager/model_env/model_env.py +7 -2
- snowflake/ml/model/_packager/model_handlers/_base.py +30 -3
- snowflake/ml/model/_packager/model_handlers/_utils.py +58 -1
- snowflake/ml/model/_packager/model_handlers/catboost.py +52 -3
- snowflake/ml/model/_packager/model_handlers/custom.py +6 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +9 -5
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +80 -3
- snowflake/ml/model/_packager/model_handlers/llm.py +7 -3
- snowflake/ml/model/_packager/model_handlers/mlflow.py +8 -3
- snowflake/ml/model/_packager/model_handlers/pytorch.py +8 -3
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -3
- snowflake/ml/model/_packager/model_handlers/sklearn.py +87 -4
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +7 -2
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +9 -4
- snowflake/ml/model/_packager/model_handlers/torchscript.py +8 -3
- snowflake/ml/model/_packager/model_handlers/xgboost.py +71 -3
- snowflake/ml/model/_packager/model_meta/model_meta.py +32 -2
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +19 -0
- snowflake/ml/model/_packager/model_packager.py +2 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +7 -7
- snowflake/ml/model/model_signature.py +4 -4
- snowflake/ml/model/type_hints.py +2 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +1 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +13 -1
- snowflake/ml/modeling/framework/base.py +28 -19
- snowflake/ml/modeling/impute/simple_imputer.py +26 -0
- snowflake/ml/modeling/pipeline/pipeline.py +7 -4
- snowflake/ml/registry/_manager/model_manager.py +16 -2
- snowflake/ml/registry/registry.py +100 -13
- snowflake/ml/utils/sql_client.py +22 -0
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/METADATA +81 -2
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/RECORD +99 -66
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/lineage/data_source.py +0 -10
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/top_level.txt +0 -0
@@ -1,48 +1,37 @@
|
|
1
|
-
from typing import Any, List
|
2
|
-
|
3
|
-
import pandas as pd
|
4
|
-
from pyarrow import parquet as pq
|
1
|
+
from typing import Any, List, Optional, Type
|
5
2
|
|
6
3
|
from snowflake import snowpark
|
7
4
|
from snowflake.ml._internal import telemetry
|
8
|
-
from snowflake.ml._internal.lineage import
|
9
|
-
from snowflake.ml.
|
5
|
+
from snowflake.ml._internal.lineage import lineage_utils
|
6
|
+
from snowflake.ml.data import data_connector, data_ingestor, data_source, ingestor_utils
|
10
7
|
from snowflake.ml.fileset import snowfs
|
11
8
|
|
12
9
|
_PROJECT = "Dataset"
|
13
10
|
_SUBPROJECT = "DatasetReader"
|
14
|
-
TARGET_FILE_SIZE = 32 * 2**20 # The max file size for data loading.
|
15
11
|
|
16
12
|
|
17
|
-
class DatasetReader:
|
13
|
+
class DatasetReader(data_connector.DataConnector):
|
18
14
|
"""Snowflake Dataset abstraction which provides application integration connectors"""
|
19
15
|
|
20
16
|
@telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
|
21
17
|
def __init__(
|
22
18
|
self,
|
23
|
-
|
24
|
-
|
19
|
+
ingestor: data_ingestor.DataIngestor,
|
20
|
+
*,
|
21
|
+
snowpark_session: snowpark.Session,
|
25
22
|
) -> None:
|
26
|
-
|
23
|
+
super().__init__(ingestor)
|
27
24
|
|
28
|
-
|
29
|
-
|
30
|
-
|
25
|
+
self._session: snowpark.Session = snowpark_session
|
26
|
+
self._fs: snowfs.SnowFileSystem = ingestor_utils.get_dataset_filesystem(self._session)
|
27
|
+
self._files: Optional[List[str]] = None
|
31
28
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
self._sources = sources
|
39
|
-
self._fs: snowfs.SnowFileSystem = snowfs.SnowFileSystem(
|
40
|
-
snowpark_session=self._session,
|
41
|
-
cache_type="bytes",
|
42
|
-
block_size=2 * TARGET_FILE_SIZE,
|
43
|
-
)
|
44
|
-
|
45
|
-
self._files: List[str] = []
|
29
|
+
@classmethod
|
30
|
+
def from_dataframe(
|
31
|
+
cls, df: snowpark.DataFrame, ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None, **kwargs: Any
|
32
|
+
) -> "DatasetReader":
|
33
|
+
# Block superclass constructor from Snowpark DataFrames
|
34
|
+
raise RuntimeError("Creating DatasetReader from DataFrames not supported")
|
46
35
|
|
47
36
|
def _list_files(self) -> List[str]:
|
48
37
|
"""Private helper function that lists all files in this DatasetVersion and caches the results."""
|
@@ -50,18 +39,14 @@ class DatasetReader:
|
|
50
39
|
return self._files
|
51
40
|
|
52
41
|
files: List[str] = []
|
53
|
-
for source in self.
|
54
|
-
|
55
|
-
files.extend(
|
42
|
+
for source in self.data_sources:
|
43
|
+
assert isinstance(source, data_source.DatasetInfo)
|
44
|
+
files.extend(ingestor_utils.get_dataset_files(self._session, source, filesystem=self._fs))
|
56
45
|
files.sort()
|
57
46
|
|
58
47
|
self._files = files
|
59
48
|
return self._files
|
60
49
|
|
61
|
-
@property
|
62
|
-
def data_sources(self) -> List[data_source.DataSource]:
|
63
|
-
return self._sources
|
64
|
-
|
65
50
|
@telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
|
66
51
|
def files(self) -> List[str]:
|
67
52
|
"""Get the list of remote file paths for the current DatasetVersion.
|
@@ -85,76 +70,6 @@ class DatasetReader:
|
|
85
70
|
"""Return an fsspec FileSystem which can be used to load the DatasetVersion's `files()`"""
|
86
71
|
return self._fs
|
87
72
|
|
88
|
-
@telemetry.send_api_usage_telemetry(
|
89
|
-
project=_PROJECT,
|
90
|
-
subproject=_SUBPROJECT,
|
91
|
-
func_params_to_log=["batch_size", "shuffle", "drop_last_batch"],
|
92
|
-
)
|
93
|
-
def to_torch_datapipe(self, *, batch_size: int, shuffle: bool = False, drop_last_batch: bool = True) -> Any:
|
94
|
-
"""Transform the Snowflake data into a ready-to-use Pytorch datapipe.
|
95
|
-
|
96
|
-
Return a Pytorch datapipe which iterates on rows of data.
|
97
|
-
|
98
|
-
Args:
|
99
|
-
batch_size: It specifies the size of each data batch which will be
|
100
|
-
yield in the result datapipe
|
101
|
-
shuffle: It specifies whether the data will be shuffled. If True, files will be shuffled, and
|
102
|
-
rows in each file will also be shuffled.
|
103
|
-
drop_last_batch: Whether the last batch of data should be dropped. If set to be true,
|
104
|
-
then the last batch will get dropped if its size is smaller than the given batch_size.
|
105
|
-
|
106
|
-
Returns:
|
107
|
-
A Pytorch iterable datapipe that yield data.
|
108
|
-
|
109
|
-
Examples:
|
110
|
-
>>> dp = dataset.to_torch_datapipe(batch_size=1)
|
111
|
-
>>> for data in dp:
|
112
|
-
>>> print(data)
|
113
|
-
----
|
114
|
-
{'_COL_1':[10]}
|
115
|
-
"""
|
116
|
-
IterableWrapper, _ = import_utils.import_or_get_dummy("torchdata.datapipes.iter.IterableWrapper")
|
117
|
-
torch_datapipe_module, _ = import_utils.import_or_get_dummy("snowflake.ml.fileset.torch_datapipe")
|
118
|
-
|
119
|
-
self._fs.optimize_read(self._list_files())
|
120
|
-
|
121
|
-
input_dp = IterableWrapper(self._list_files())
|
122
|
-
return torch_datapipe_module.ReadAndParseParquet(input_dp, self._fs, batch_size, shuffle, drop_last_batch)
|
123
|
-
|
124
|
-
@telemetry.send_api_usage_telemetry(
|
125
|
-
project=_PROJECT,
|
126
|
-
subproject=_SUBPROJECT,
|
127
|
-
func_params_to_log=["batch_size", "shuffle", "drop_last_batch"],
|
128
|
-
)
|
129
|
-
def to_tf_dataset(self, *, batch_size: int, shuffle: bool = False, drop_last_batch: bool = True) -> Any:
|
130
|
-
"""Transform the Snowflake data into a ready-to-use TensorFlow tf.data.Dataset.
|
131
|
-
|
132
|
-
Args:
|
133
|
-
batch_size: It specifies the size of each data batch which will be
|
134
|
-
yield in the result datapipe
|
135
|
-
shuffle: It specifies whether the data will be shuffled. If True, files will be shuffled, and
|
136
|
-
rows in each file will also be shuffled.
|
137
|
-
drop_last_batch: Whether the last batch of data should be dropped. If set to be true,
|
138
|
-
then the last batch will get dropped if its size is smaller than the given batch_size.
|
139
|
-
|
140
|
-
Returns:
|
141
|
-
A tf.data.Dataset that yields batched tf.Tensors.
|
142
|
-
|
143
|
-
Examples:
|
144
|
-
>>> dp = dataset.to_tf_dataset(batch_size=1)
|
145
|
-
>>> for data in dp:
|
146
|
-
>>> print(data)
|
147
|
-
----
|
148
|
-
{'_COL_1': <tf.Tensor: shape=(1,), dtype=int64, numpy=[10]>}
|
149
|
-
"""
|
150
|
-
tf_dataset_module, _ = import_utils.import_or_get_dummy("snowflake.ml.fileset.tf_dataset")
|
151
|
-
|
152
|
-
self._fs.optimize_read(self._list_files())
|
153
|
-
|
154
|
-
return tf_dataset_module.read_and_parse_parquet(
|
155
|
-
self._list_files(), self._fs, batch_size, shuffle, drop_last_batch
|
156
|
-
)
|
157
|
-
|
158
73
|
@telemetry.send_api_usage_telemetry(
|
159
74
|
project=_PROJECT,
|
160
75
|
subproject=_SUBPROJECT,
|
@@ -177,7 +92,8 @@ class DatasetReader:
|
|
177
92
|
"""
|
178
93
|
file_path_pattern = ".*data_.*[.]parquet"
|
179
94
|
dfs: List[snowpark.DataFrame] = []
|
180
|
-
for source in self.
|
95
|
+
for source in self.data_sources:
|
96
|
+
assert isinstance(source, data_source.DatasetInfo) and source.url is not None
|
181
97
|
df = self._session.read.option("pattern", file_path_pattern).parquet(source.url)
|
182
98
|
if only_feature_cols and source.exclude_cols:
|
183
99
|
df = df.drop(source.exclude_cols)
|
@@ -186,14 +102,4 @@ class DatasetReader:
|
|
186
102
|
combined_df = dfs[0]
|
187
103
|
for df in dfs[1:]:
|
188
104
|
combined_df = combined_df.union_all_by_name(df)
|
189
|
-
return lineage_utils.patch_dataframe(combined_df, data_sources=self.
|
190
|
-
|
191
|
-
@telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
|
192
|
-
def to_pandas(self) -> pd.DataFrame:
|
193
|
-
"""Retrieve the DatasetVersion contents as a Pandas Dataframe"""
|
194
|
-
files = self._list_files()
|
195
|
-
if not files:
|
196
|
-
return pd.DataFrame() # Return empty DataFrame
|
197
|
-
self._fs.optimize_read(files)
|
198
|
-
pd_ds = pq.ParquetDataset(files, filesystem=self._fs)
|
199
|
-
return pd_ds.read_pandas().to_pandas()
|
105
|
+
return lineage_utils.patch_dataframe(combined_df, data_sources=self.data_sources, inplace=True)
|
@@ -273,7 +273,13 @@ def setup_feature_store(
|
|
273
273
|
assert current_role is not None # to make mypy happy
|
274
274
|
try:
|
275
275
|
session.use_role(producer_role)
|
276
|
-
fs = FeatureStore(
|
276
|
+
fs = FeatureStore(
|
277
|
+
session,
|
278
|
+
database,
|
279
|
+
schema,
|
280
|
+
default_warehouse=warehouse,
|
281
|
+
creation_mode=CreationMode.CREATE_IF_NOT_EXIST,
|
282
|
+
)
|
277
283
|
finally:
|
278
284
|
session.use_role(current_role)
|
279
285
|
|
@@ -22,7 +22,7 @@ class Entity:
|
|
22
22
|
It can also be used for FeatureView search and lineage tracking.
|
23
23
|
"""
|
24
24
|
|
25
|
-
def __init__(self, name: str, join_keys: List[str], desc: str = "") -> None:
|
25
|
+
def __init__(self, name: str, join_keys: List[str], *, desc: str = "") -> None:
|
26
26
|
"""
|
27
27
|
Creates an Entity instance.
|
28
28
|
|
@@ -30,6 +30,23 @@ class Entity:
|
|
30
30
|
name: name of the Entity.
|
31
31
|
join_keys: join keys associated with a FeatureView, used for feature retrieval.
|
32
32
|
desc: description of the Entity.
|
33
|
+
|
34
|
+
Example::
|
35
|
+
|
36
|
+
>>> fs = FeatureStore(...)
|
37
|
+
>>> e_1 = Entity(
|
38
|
+
... name="my_entity",
|
39
|
+
... join_keys=['col_1'],
|
40
|
+
... desc='My first entity.'
|
41
|
+
... )
|
42
|
+
>>> fs.register_entity(e_1)
|
43
|
+
>>> fs.list_entities().show()
|
44
|
+
-----------------------------------------------------------
|
45
|
+
|"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
|
46
|
+
-----------------------------------------------------------
|
47
|
+
|MY_ENTITY |["COL_1"] |My first entity. |REGTEST_RL |
|
48
|
+
-----------------------------------------------------------
|
49
|
+
|
33
50
|
"""
|
34
51
|
self._validate(name, join_keys)
|
35
52
|
|
@@ -65,7 +82,7 @@ class Entity:
|
|
65
82
|
|
66
83
|
@staticmethod
|
67
84
|
def _construct_entity(name: str, join_keys: List[str], desc: str, owner: str) -> "Entity":
|
68
|
-
e = Entity(name, join_keys, desc)
|
85
|
+
e = Entity(name, join_keys, desc=desc)
|
69
86
|
e.owner = owner
|
70
87
|
return e
|
71
88
|
|
@@ -0,0 +1,16 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from snowflake.ml.feature_store import Entity
|
4
|
+
|
5
|
+
zipcode_entity = Entity(
|
6
|
+
name="AIRPORT_ZIP_CODE",
|
7
|
+
join_keys=["AIRPORT_ZIP_CODE"],
|
8
|
+
desc="Zip code of the airport.",
|
9
|
+
)
|
10
|
+
|
11
|
+
plane_entity = Entity(name="PLANE_MODEL", join_keys=["PLANE_MODEL"], desc="The model of an airplane.")
|
12
|
+
|
13
|
+
|
14
|
+
# This will be invoked by example_helper.py. Do not change function name.
|
15
|
+
def get_all_entities() -> List[Entity]:
|
16
|
+
return [zipcode_entity, plane_entity]
|
@@ -0,0 +1,31 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from snowflake.ml.feature_store import FeatureView
|
4
|
+
from snowflake.ml.feature_store.examples.airline_features.entities import plane_entity
|
5
|
+
from snowflake.snowpark import DataFrame, Session
|
6
|
+
|
7
|
+
|
8
|
+
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
+
def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
|
10
|
+
"""Create a feature view about airplane model."""
|
11
|
+
query = session.sql(
|
12
|
+
"""
|
13
|
+
select
|
14
|
+
PLANE_MODEL,
|
15
|
+
SEATING_CAPACITY
|
16
|
+
from
|
17
|
+
PLANE_MODEL_ATTRIBUTES
|
18
|
+
"""
|
19
|
+
)
|
20
|
+
|
21
|
+
return FeatureView(
|
22
|
+
name="f_plane", # name of feature view
|
23
|
+
entities=[plane_entity], # entities
|
24
|
+
feature_df=query, # definition query
|
25
|
+
refresh_freq=None, # refresh frequency
|
26
|
+
desc="Plane features never refresh.",
|
27
|
+
).attach_feature_desc(
|
28
|
+
{
|
29
|
+
"SEATING_CAPACITY": "The seating capacity of a plane.",
|
30
|
+
}
|
31
|
+
)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from snowflake.ml.feature_store import FeatureView
|
4
|
+
from snowflake.ml.feature_store.examples.airline_features.entities import zipcode_entity
|
5
|
+
from snowflake.snowpark import DataFrame, Session
|
6
|
+
|
7
|
+
|
8
|
+
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
+
def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
|
10
|
+
"""Create a feature view about airport weather."""
|
11
|
+
query = session.sql(
|
12
|
+
"""
|
13
|
+
select
|
14
|
+
DATETIME_UTC AS TS,
|
15
|
+
AIRPORT_ZIP_CODE,
|
16
|
+
sum(RAIN_MM_H) over (
|
17
|
+
partition by AIRPORT_ZIP_CODE
|
18
|
+
order by DATETIME_UTC
|
19
|
+
range between interval '30 minutes' preceding and current row
|
20
|
+
) RAIN_SUM_30M,
|
21
|
+
sum(RAIN_MM_H) over (
|
22
|
+
partition by AIRPORT_ZIP_CODE
|
23
|
+
order by DATETIME_UTC
|
24
|
+
range between interval '1 day' preceding and current row
|
25
|
+
) RAIN_SUM_60M
|
26
|
+
from AIRPORT_WEATHER_STATION
|
27
|
+
"""
|
28
|
+
)
|
29
|
+
|
30
|
+
return FeatureView(
|
31
|
+
name="f_weather", # name of feature view
|
32
|
+
entities=[zipcode_entity], # entities
|
33
|
+
feature_df=query, # definition query
|
34
|
+
timestamp_col="TS", # timestamp column
|
35
|
+
refresh_freq="1d", # refresh frequency
|
36
|
+
desc="Airport weather features refreshed every day.",
|
37
|
+
).attach_feature_desc(
|
38
|
+
{
|
39
|
+
"RAIN_SUM_30M": "The sum of rain fall over past 30 minutes for one zipcode.",
|
40
|
+
"RAIN_SUM_60M": "The sum of rain fall over past 1 day for one zipcode.",
|
41
|
+
}
|
42
|
+
)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from snowflake.ml.feature_store import Entity
|
4
|
+
|
5
|
+
end_station_id = Entity(
|
6
|
+
name="end_station_id",
|
7
|
+
join_keys=["end_station_id"],
|
8
|
+
desc="The id of an end station.",
|
9
|
+
)
|
10
|
+
|
11
|
+
trip_id = Entity(
|
12
|
+
name="trip_id",
|
13
|
+
join_keys=["trip_id"],
|
14
|
+
desc="The id of a trip.",
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
# This will be invoked by example_helper.py. Do not change function name.
|
19
|
+
def get_all_entities() -> List[Entity]:
|
20
|
+
return [end_station_id, trip_id]
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from snowflake.ml.feature_store import FeatureView
|
4
|
+
from snowflake.ml.feature_store.examples.citibike_trip_features.entities import (
|
5
|
+
end_station_id,
|
6
|
+
)
|
7
|
+
from snowflake.snowpark import DataFrame, Session
|
8
|
+
|
9
|
+
|
10
|
+
# This function will be invoked by example_helper.py. Do not change the name.
|
11
|
+
def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
|
12
|
+
"""Create a feature view about trip station."""
|
13
|
+
query = session.sql(
|
14
|
+
f"""
|
15
|
+
select
|
16
|
+
end_station_id,
|
17
|
+
count(end_station_id) as f_count,
|
18
|
+
avg(end_station_latitude) as f_avg_latitude,
|
19
|
+
avg(end_station_longitude) as f_avg_longtitude
|
20
|
+
from {source_tables[0]}
|
21
|
+
group by end_station_id
|
22
|
+
"""
|
23
|
+
)
|
24
|
+
|
25
|
+
return FeatureView(
|
26
|
+
name="f_station", # name of feature view
|
27
|
+
entities=[end_station_id], # entities
|
28
|
+
feature_df=query, # definition query
|
29
|
+
refresh_freq="1d", # refresh frequency. '1d' means it refreshes everyday
|
30
|
+
desc="Station features refreshed every day.",
|
31
|
+
).attach_feature_desc(
|
32
|
+
{
|
33
|
+
"f_count": "How many times this station appears in 1 day.",
|
34
|
+
"f_avg_latitude": "Averaged latitude of a station.",
|
35
|
+
"f_avg_longtitude": "Averaged longtitude of a station.",
|
36
|
+
}
|
37
|
+
)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from snowflake.ml.feature_store import FeatureView
|
4
|
+
from snowflake.ml.feature_store.examples.citibike_trip_features.entities import trip_id
|
5
|
+
from snowflake.snowpark import DataFrame, Session, functions as F
|
6
|
+
|
7
|
+
|
8
|
+
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
+
def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
|
10
|
+
"""Create a feature view about trip."""
|
11
|
+
feature_df = source_dfs[0].select(
|
12
|
+
"trip_id",
|
13
|
+
F.col("birth_year").alias("f_birth_year"),
|
14
|
+
F.col("gender").alias("f_gender"),
|
15
|
+
F.col("bikeid").alias("f_bikeid"),
|
16
|
+
)
|
17
|
+
|
18
|
+
return FeatureView(
|
19
|
+
name="f_trip", # name of feature view
|
20
|
+
entities=[trip_id], # entities
|
21
|
+
feature_df=feature_df, # definition query
|
22
|
+
refresh_freq=None, # refresh frequency. None indicates it never refresh
|
23
|
+
desc="Static trip features",
|
24
|
+
).attach_feature_desc(
|
25
|
+
{
|
26
|
+
"f_birth_year": "The birth year of a trip passenger.",
|
27
|
+
"f_gender": "The gender of a trip passenger.",
|
28
|
+
"f_bikeid": "The bike id of a trip passenger.",
|
29
|
+
}
|
30
|
+
)
|