snowflake-ml-python 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +7 -33
- snowflake/ml/_internal/env_utils.py +11 -5
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +4 -1
- snowflake/ml/_internal/telemetry.py +14 -0
- snowflake/ml/_internal/utils/pkg_version_utils.py +8 -22
- snowflake/ml/data/_internal/arrow_ingestor.py +66 -10
- snowflake/ml/data/data_connector.py +59 -6
- snowflake/ml/data/data_ingestor.py +18 -1
- snowflake/ml/data/{_internal/ingestor_utils.py → ingestor_utils.py} +5 -1
- snowflake/ml/data/torch_dataset.py +33 -0
- snowflake/ml/dataset/dataset_metadata.py +3 -1
- snowflake/ml/dataset/dataset_reader.py +9 -3
- snowflake/ml/feature_store/examples/airline_features/entities.py +16 -0
- snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +31 -0
- snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +42 -0
- snowflake/ml/feature_store/examples/airline_features/source.yaml +7 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +10 -4
- snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +6 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +3 -0
- snowflake/ml/feature_store/examples/example_helper.py +69 -31
- snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +3 -3
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/{dropoff_features.py → location_features.py} +14 -9
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +36 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +5 -1
- snowflake/ml/feature_store/examples/source_data/airline.yaml +4 -0
- snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +1 -1
- snowflake/ml/feature_store/examples/wine_quality_features/entities.py +3 -3
- snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +13 -6
- snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +8 -5
- snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +3 -0
- snowflake/ml/feature_store/feature_store.py +59 -24
- snowflake/ml/feature_store/feature_view.py +148 -4
- snowflake/ml/model/_client/model/model_impl.py +11 -2
- snowflake/ml/model/_client/model/model_version_impl.py +171 -20
- snowflake/ml/model/_client/ops/model_ops.py +105 -27
- snowflake/ml/model/_client/ops/service_ops.py +121 -0
- snowflake/ml/model/_client/service/model_deployment_spec.py +95 -0
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +31 -0
- snowflake/ml/model/_client/sql/model_version.py +13 -4
- snowflake/ml/model/_client/sql/service.py +129 -0
- snowflake/ml/model/_model_composer/model_composer.py +3 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +10 -2
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +3 -0
- snowflake/ml/model/_packager/model_env/model_env.py +7 -2
- snowflake/ml/model/_packager/model_handlers/_base.py +29 -12
- snowflake/ml/model/_packager/model_handlers/catboost.py +19 -12
- snowflake/ml/model/_packager/model_handlers/custom.py +6 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +9 -5
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +27 -18
- snowflake/ml/model/_packager/model_handlers/llm.py +7 -3
- snowflake/ml/model/_packager/model_handlers/mlflow.py +8 -3
- snowflake/ml/model/_packager/model_handlers/pytorch.py +8 -3
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -3
- snowflake/ml/model/_packager/model_handlers/sklearn.py +87 -4
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +7 -2
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +9 -4
- snowflake/ml/model/_packager/model_handlers/torchscript.py +8 -3
- snowflake/ml/model/_packager/model_handlers/xgboost.py +25 -16
- snowflake/ml/model/_packager/model_meta/model_meta.py +32 -2
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +19 -0
- snowflake/ml/model/_packager/model_packager.py +2 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -2
- snowflake/ml/model/type_hints.py +1 -3
- snowflake/ml/modeling/framework/base.py +28 -19
- snowflake/ml/modeling/pipeline/pipeline.py +3 -0
- snowflake/ml/registry/_manager/model_manager.py +16 -2
- snowflake/ml/utils/sql_client.py +22 -0
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.1.dist-info}/METADATA +35 -2
- {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.1.dist-info}/RECORD +73 -62
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/pickup_features.py +0 -58
- {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,31 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from snowflake.ml.feature_store import FeatureView
|
4
|
+
from snowflake.ml.feature_store.examples.airline_features.entities import plane_entity
|
5
|
+
from snowflake.snowpark import DataFrame, Session
|
6
|
+
|
7
|
+
|
8
|
+
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
+
def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
|
10
|
+
"""Create a feature view about airplane model."""
|
11
|
+
query = session.sql(
|
12
|
+
"""
|
13
|
+
select
|
14
|
+
PLANE_MODEL,
|
15
|
+
SEATING_CAPACITY
|
16
|
+
from
|
17
|
+
PLANE_MODEL_ATTRIBUTES
|
18
|
+
"""
|
19
|
+
)
|
20
|
+
|
21
|
+
return FeatureView(
|
22
|
+
name="f_plane", # name of feature view
|
23
|
+
entities=[plane_entity], # entities
|
24
|
+
feature_df=query, # definition query
|
25
|
+
refresh_freq=None, # refresh frequency
|
26
|
+
desc="Plane features never refresh.",
|
27
|
+
).attach_feature_desc(
|
28
|
+
{
|
29
|
+
"SEATING_CAPACITY": "The seating capacity of a plane.",
|
30
|
+
}
|
31
|
+
)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from snowflake.ml.feature_store import FeatureView
|
4
|
+
from snowflake.ml.feature_store.examples.airline_features.entities import zipcode_entity
|
5
|
+
from snowflake.snowpark import DataFrame, Session
|
6
|
+
|
7
|
+
|
8
|
+
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
+
def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
|
10
|
+
"""Create a feature view about airport weather."""
|
11
|
+
query = session.sql(
|
12
|
+
"""
|
13
|
+
select
|
14
|
+
DATETIME_UTC AS TS,
|
15
|
+
AIRPORT_ZIP_CODE,
|
16
|
+
sum(RAIN_MM_H) over (
|
17
|
+
partition by AIRPORT_ZIP_CODE
|
18
|
+
order by DATETIME_UTC
|
19
|
+
range between interval '30 minutes' preceding and current row
|
20
|
+
) RAIN_SUM_30M,
|
21
|
+
sum(RAIN_MM_H) over (
|
22
|
+
partition by AIRPORT_ZIP_CODE
|
23
|
+
order by DATETIME_UTC
|
24
|
+
range between interval '1 day' preceding and current row
|
25
|
+
) RAIN_SUM_60M
|
26
|
+
from AIRPORT_WEATHER_STATION
|
27
|
+
"""
|
28
|
+
)
|
29
|
+
|
30
|
+
return FeatureView(
|
31
|
+
name="f_weather", # name of feature view
|
32
|
+
entities=[zipcode_entity], # entities
|
33
|
+
feature_df=query, # definition query
|
34
|
+
timestamp_col="TS", # timestamp column
|
35
|
+
refresh_freq="1d", # refresh frequency
|
36
|
+
desc="Airport weather features refreshed every day.",
|
37
|
+
).attach_feature_desc(
|
38
|
+
{
|
39
|
+
"RAIN_SUM_30M": "The sum of rain fall over past 30 minutes for one zipcode.",
|
40
|
+
"RAIN_SUM_60M": "The sum of rain fall over past 1 day for one zipcode.",
|
41
|
+
}
|
42
|
+
)
|
@@ -14,18 +14,24 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
|
|
14
14
|
f"""
|
15
15
|
select
|
16
16
|
end_station_id,
|
17
|
-
count(end_station_id) as
|
18
|
-
avg(end_station_latitude) as
|
19
|
-
avg(end_station_longitude) as
|
17
|
+
count(end_station_id) as f_count,
|
18
|
+
avg(end_station_latitude) as f_avg_latitude,
|
19
|
+
avg(end_station_longitude) as f_avg_longtitude
|
20
20
|
from {source_tables[0]}
|
21
21
|
group by end_station_id
|
22
22
|
"""
|
23
23
|
)
|
24
24
|
|
25
25
|
return FeatureView(
|
26
|
-
name="
|
26
|
+
name="f_station", # name of feature view
|
27
27
|
entities=[end_station_id], # entities
|
28
28
|
feature_df=query, # definition query
|
29
29
|
refresh_freq="1d", # refresh frequency. '1d' means it refreshes everyday
|
30
30
|
desc="Station features refreshed every day.",
|
31
|
+
).attach_feature_desc(
|
32
|
+
{
|
33
|
+
"f_count": "How many times this station appears in 1 day.",
|
34
|
+
"f_avg_latitude": "Averaged latitude of a station.",
|
35
|
+
"f_avg_longtitude": "Averaged longtitude of a station.",
|
36
|
+
}
|
31
37
|
)
|
@@ -21,4 +21,10 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
|
|
21
21
|
feature_df=feature_df, # definition query
|
22
22
|
refresh_freq=None, # refresh frequency. None indicates it never refresh
|
23
23
|
desc="Static trip features",
|
24
|
+
).attach_feature_desc(
|
25
|
+
{
|
26
|
+
"f_birth_year": "The birth year of a trip passenger.",
|
27
|
+
"f_gender": "The gender of a trip passenger.",
|
28
|
+
"f_bikeid": "The bike id of a trip passenger.",
|
29
|
+
}
|
24
30
|
)
|
@@ -10,7 +10,7 @@ import yaml
|
|
10
10
|
from snowflake.ml._internal.utils import identifier, sql_identifier
|
11
11
|
from snowflake.ml.feature_store import Entity, FeatureView # type: ignore[attr-defined]
|
12
12
|
from snowflake.snowpark import DataFrame, Session, functions as F
|
13
|
-
from snowflake.snowpark.types import TimestampType
|
13
|
+
from snowflake.snowpark.types import TimestampTimeZone, TimestampType
|
14
14
|
|
15
15
|
logger = logging.getLogger(__name__)
|
16
16
|
logger.setLevel(logging.INFO)
|
@@ -28,6 +28,9 @@ class ExampleHelper:
|
|
28
28
|
self._session = session
|
29
29
|
self._database_name = database_name
|
30
30
|
self._dataset_schema = dataset_schema
|
31
|
+
self._clear()
|
32
|
+
|
33
|
+
def _clear(self) -> None:
|
31
34
|
self._selected_example = None
|
32
35
|
self._source_tables: List[str] = []
|
33
36
|
self._source_dfs: List[DataFrame] = []
|
@@ -36,15 +39,18 @@ class ExampleHelper:
|
|
36
39
|
self._timestamp_column: Optional[sql_identifier.SqlIdentifier] = None
|
37
40
|
self._epoch_to_timestamp_cols: List[str] = []
|
38
41
|
self._add_id_column: Optional[sql_identifier.SqlIdentifier] = None
|
42
|
+
self._training_spine_table: str = ""
|
39
43
|
|
40
|
-
def list_examples(self) ->
|
41
|
-
"""Return a
|
44
|
+
def list_examples(self) -> Optional[DataFrame]:
|
45
|
+
"""Return a dataframe object about descriptions of all examples."""
|
42
46
|
root_dir = Path(__file__).parent
|
43
|
-
|
47
|
+
rows = []
|
44
48
|
for f_name in os.listdir(root_dir):
|
45
49
|
if os.path.isdir(os.path.join(root_dir, f_name)) and f_name[0].isalpha() and f_name != "source_data":
|
46
|
-
|
47
|
-
|
50
|
+
source_file_path = root_dir.joinpath(f"{f_name}/source.yaml")
|
51
|
+
source_dict = self._read_yaml(str(source_file_path))
|
52
|
+
rows.append((f_name, source_dict["model_category"], source_dict["desc"], source_dict["label_columns"]))
|
53
|
+
return self._session.create_dataframe(rows, schema=["NAME", "MODEL_CATEGORY", "DESC", "LABEL_COLS"])
|
48
54
|
|
49
55
|
def load_draft_feature_views(self) -> List[FeatureView]:
|
50
56
|
"""Return all feature views in an example.
|
@@ -101,7 +107,7 @@ class ExampleHelper:
|
|
101
107
|
"""
|
102
108
|
).collect()
|
103
109
|
|
104
|
-
def _load_csv(self, schema_dict: Dict[str, str],
|
110
|
+
def _load_csv(self, schema_dict: Dict[str, str], temp_stage_name: str) -> List[str]:
|
105
111
|
# create temp file format
|
106
112
|
file_format_name = f"{self._database_name}.{self._dataset_schema}.feature_store_temp_format"
|
107
113
|
format_str = ""
|
@@ -116,6 +122,8 @@ class ExampleHelper:
|
|
116
122
|
cols_type_str = (
|
117
123
|
f"{self._add_id_column.resolved()} number autoincrement start 1 increment 1, " + cols_type_str
|
118
124
|
)
|
125
|
+
|
126
|
+
destination_table = f"{self._database_name}.{self._dataset_schema}.{schema_dict['destination_table_name']}"
|
119
127
|
self._session.sql(
|
120
128
|
f"""
|
121
129
|
create or replace table {destination_table} ({cols_type_str})
|
@@ -132,25 +140,50 @@ class ExampleHelper:
|
|
132
140
|
"""
|
133
141
|
).collect()
|
134
142
|
|
135
|
-
|
143
|
+
return [destination_table]
|
144
|
+
|
145
|
+
def _load_parquet(self, schema_dict: Dict[str, str], temp_stage_name: str) -> List[str]:
|
136
146
|
regex_pattern = schema_dict["load_files_pattern"]
|
137
147
|
all_files = self._session.sql(f"list @{temp_stage_name}").collect()
|
138
148
|
filtered_files = [item["name"] for item in all_files if re.match(regex_pattern, item["name"])]
|
139
|
-
|
140
|
-
|
149
|
+
file_count = len(filtered_files)
|
150
|
+
result = []
|
151
|
+
|
152
|
+
for file in filtered_files:
|
153
|
+
file_name = file.rsplit("/", 1)[-1]
|
141
154
|
|
142
|
-
|
143
|
-
|
144
|
-
|
155
|
+
df = self._session.read.parquet(f"@{temp_stage_name}/{file_name}")
|
156
|
+
for old_col_name in df.columns:
|
157
|
+
df = df.with_column_renamed(old_col_name, identifier.get_unescaped_names(old_col_name))
|
145
158
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
159
|
+
# convert timestamp column to ntz
|
160
|
+
for name, type in dict(df.dtypes).items():
|
161
|
+
if type == "timestamp":
|
162
|
+
df = df.with_column(name, F.to_timestamp_ntz(name))
|
150
163
|
|
151
|
-
|
164
|
+
# convert epoch column to ntz timestamp
|
165
|
+
for ts_col in self._epoch_to_timestamp_cols:
|
166
|
+
if "timestamp" != dict(df.dtypes)[ts_col]:
|
167
|
+
df = df.with_column(ts_col, F.cast(df[ts_col] / 1000000, TimestampType(TimestampTimeZone.NTZ)))
|
152
168
|
|
153
|
-
|
169
|
+
if self._add_id_column:
|
170
|
+
df = df.withColumn(self._add_id_column, F.monotonically_increasing_id())
|
171
|
+
|
172
|
+
if file_count == 1:
|
173
|
+
dest_table_name = (
|
174
|
+
f"{self._database_name}.{self._dataset_schema}.{schema_dict['destination_table_name']}"
|
175
|
+
)
|
176
|
+
else:
|
177
|
+
regex_pattern = schema_dict["destination_table_name"]
|
178
|
+
dest_table_name = re.match(regex_pattern, file_name).group("table_name") # type: ignore[union-attr]
|
179
|
+
dest_table_name = f"{self._database_name}.{self._dataset_schema}.{dest_table_name}"
|
180
|
+
|
181
|
+
df.write.mode("overwrite").save_as_table(dest_table_name)
|
182
|
+
result.append(dest_table_name)
|
183
|
+
|
184
|
+
return result
|
185
|
+
|
186
|
+
def _load_source_data(self, schema_yaml_file: str) -> List[str]:
|
154
187
|
"""Parse a yaml schema file and load data into Snowflake.
|
155
188
|
|
156
189
|
Args:
|
@@ -162,7 +195,6 @@ class ExampleHelper:
|
|
162
195
|
# load schema file
|
163
196
|
schema_dict = self._read_yaml(schema_yaml_file)
|
164
197
|
temp_stage_name = f"{self._database_name}.{self._dataset_schema}.feature_store_temp_stage"
|
165
|
-
destination_table = f"{self._database_name}.{self._dataset_schema}.{schema_dict['destination_table_name']}"
|
166
198
|
|
167
199
|
# create a temp stage from S3 URL
|
168
200
|
self._session.sql(f"create or replace stage {temp_stage_name} url = '{schema_dict['s3_url']}'").collect()
|
@@ -170,11 +202,9 @@ class ExampleHelper:
|
|
170
202
|
# load csv or parquet
|
171
203
|
# TODO: this could be more flexible and robust.
|
172
204
|
if "parquet" in schema_dict["load_files_pattern"]:
|
173
|
-
self._load_parquet(schema_dict,
|
205
|
+
return self._load_parquet(schema_dict, temp_stage_name)
|
174
206
|
else:
|
175
|
-
self._load_csv(schema_dict,
|
176
|
-
|
177
|
-
return destination_table
|
207
|
+
return self._load_csv(schema_dict, temp_stage_name)
|
178
208
|
|
179
209
|
def load_example(self, example_name: str) -> List[str]:
|
180
210
|
"""Select the active example and load its datasets to Snowflake.
|
@@ -186,6 +216,7 @@ class ExampleHelper:
|
|
186
216
|
Returns:
|
187
217
|
Returns a list of table names with populated datasets.
|
188
218
|
"""
|
219
|
+
self._clear()
|
189
220
|
self._selected_example = example_name # type: ignore[assignment]
|
190
221
|
|
191
222
|
# load source yaml file
|
@@ -195,7 +226,7 @@ class ExampleHelper:
|
|
195
226
|
self._source_tables = []
|
196
227
|
self._source_dfs = []
|
197
228
|
|
198
|
-
|
229
|
+
source_yaml_data = source_dict["source_data"]
|
199
230
|
if "excluded_columns" in source_dict:
|
200
231
|
self._excluded_columns = sql_identifier.to_sql_identifiers(source_dict["excluded_columns"].split(","))
|
201
232
|
if "label_columns" in source_dict:
|
@@ -206,8 +237,11 @@ class ExampleHelper:
|
|
206
237
|
self._epoch_to_timestamp_cols = source_dict["epoch_to_timestamp_cols"].split(",")
|
207
238
|
if "add_id_column" in source_dict:
|
208
239
|
self._add_id_column = sql_identifier.SqlIdentifier(source_dict["add_id_column"])
|
240
|
+
self._training_spine_table = (
|
241
|
+
f"{self._database_name}.{self._dataset_schema}.{source_dict['training_spine_table']}"
|
242
|
+
)
|
209
243
|
|
210
|
-
return self.load_source_data(
|
244
|
+
return self.load_source_data(source_yaml_data)
|
211
245
|
|
212
246
|
def load_source_data(self, source_data_name: str) -> List[str]:
|
213
247
|
"""Load source data into Snowflake.
|
@@ -220,11 +254,12 @@ class ExampleHelper:
|
|
220
254
|
"""
|
221
255
|
root_dir = Path(__file__).parent
|
222
256
|
schema_file = root_dir.joinpath(f"source_data/{source_data_name}.yaml")
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
257
|
+
destination_tables = self._load_source_data(str(schema_file))
|
258
|
+
for dest_table in destination_tables:
|
259
|
+
source_df = self._session.table(dest_table)
|
260
|
+
self._source_tables.append(dest_table)
|
261
|
+
self._source_dfs.append(source_df)
|
262
|
+
logger.info(f"{dest_table} has been created successfully.")
|
228
263
|
return self._source_tables
|
229
264
|
|
230
265
|
def get_current_schema(self) -> str:
|
@@ -238,3 +273,6 @@ class ExampleHelper:
|
|
238
273
|
|
239
274
|
def get_training_data_timestamp_col(self) -> Optional[str]:
|
240
275
|
return self._timestamp_column.resolved() if self._timestamp_column is not None else None
|
276
|
+
|
277
|
+
def get_training_spine_table(self) -> str:
|
278
|
+
return self._training_spine_table
|
@@ -2,11 +2,11 @@ from typing import List
|
|
2
2
|
|
3
3
|
from snowflake.ml.feature_store import Entity
|
4
4
|
|
5
|
-
|
5
|
+
trip_id = Entity(name="TRIP_ID", join_keys=["TRIP_ID"], desc="Trip id.")
|
6
6
|
|
7
|
-
|
7
|
+
location_id = Entity(name="DOLOCATIONID", join_keys=["DOLOCATIONID"], desc="Drop off location id.")
|
8
8
|
|
9
9
|
|
10
10
|
# This will be invoked by example_helper.py. Do not change function name.
|
11
11
|
def get_all_entities() -> List[Entity]:
|
12
|
-
return [
|
12
|
+
return [trip_id, location_id]
|
@@ -2,7 +2,7 @@ from typing import List
|
|
2
2
|
|
3
3
|
from snowflake.ml.feature_store import FeatureView
|
4
4
|
from snowflake.ml.feature_store.examples.new_york_taxi_features.entities import (
|
5
|
-
|
5
|
+
location_id,
|
6
6
|
)
|
7
7
|
from snowflake.snowpark import DataFrame, Session
|
8
8
|
|
@@ -15,25 +15,30 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
|
|
15
15
|
select
|
16
16
|
TPEP_DROPOFF_DATETIME as TS,
|
17
17
|
DOLOCATIONID,
|
18
|
-
|
18
|
+
avg(FARE_AMOUNT) over (
|
19
19
|
partition by DOLOCATIONID
|
20
20
|
order by TPEP_DROPOFF_DATETIME
|
21
21
|
range between interval '1 hours' preceding and current row
|
22
|
-
)
|
23
|
-
|
22
|
+
) AVG_FARE_1H,
|
23
|
+
avg(FARE_AMOUNT) over (
|
24
24
|
partition by DOLOCATIONID
|
25
25
|
order by TPEP_DROPOFF_DATETIME
|
26
|
-
range between interval '
|
27
|
-
)
|
26
|
+
range between interval '10 hours' preceding and current row
|
27
|
+
) AVG_FARE_10h
|
28
28
|
from {source_tables[0]}
|
29
29
|
"""
|
30
30
|
)
|
31
31
|
|
32
32
|
return FeatureView(
|
33
|
-
name="
|
34
|
-
entities=[
|
33
|
+
name="f_location", # name of feature view
|
34
|
+
entities=[location_id], # entities
|
35
35
|
feature_df=feature_df, # definition query
|
36
36
|
refresh_freq="12h", # the frequency this feature view re-compute
|
37
37
|
timestamp_col="TS", # timestamp column. Used when generate training data
|
38
|
-
desc="
|
38
|
+
desc="Features aggregated by location id and refreshed every 12 hours.",
|
39
|
+
).attach_feature_desc(
|
40
|
+
{
|
41
|
+
"AVG_FARE_1H": "Averaged fare in past 1 hour window aggregated by location.",
|
42
|
+
"AVG_FARE_10H": "Averaged fare in past 10 hours aggregated by location.",
|
43
|
+
}
|
39
44
|
)
|
@@ -0,0 +1,36 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from snowflake.ml.feature_store import FeatureView
|
4
|
+
from snowflake.ml.feature_store.examples.new_york_taxi_features.entities import trip_id
|
5
|
+
from snowflake.snowpark import DataFrame, Session
|
6
|
+
|
7
|
+
|
8
|
+
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
+
def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
|
10
|
+
"""Create a draft feature view."""
|
11
|
+
feature_df = session.sql(
|
12
|
+
f"""
|
13
|
+
select
|
14
|
+
TRIP_ID,
|
15
|
+
PASSENGER_COUNT,
|
16
|
+
TRIP_DISTANCE,
|
17
|
+
FARE_AMOUNT
|
18
|
+
from
|
19
|
+
{source_tables[0]}
|
20
|
+
"""
|
21
|
+
)
|
22
|
+
|
23
|
+
return FeatureView(
|
24
|
+
name="f_trip", # name of feature view
|
25
|
+
entities=[trip_id], # entities
|
26
|
+
feature_df=feature_df, # definition query
|
27
|
+
refresh_freq="1d", # the frequency this feature view re-compute
|
28
|
+
timestamp_col=None, # timestamp column. Used when generate training data
|
29
|
+
desc="Features per trip refreshed every day.",
|
30
|
+
).attach_feature_desc(
|
31
|
+
{
|
32
|
+
"PASSENGER_COUNT": "The count of passenger of a trip.",
|
33
|
+
"TRIP_DISTANCE": "The distance of a trip.",
|
34
|
+
"FARE_AMOUNT": "The fare of a trip.",
|
35
|
+
}
|
36
|
+
)
|
@@ -1,5 +1,9 @@
|
|
1
1
|
---
|
2
2
|
source_data: nyc_yellow_trips
|
3
|
-
|
3
|
+
training_spine_table: nyc_yellow_trips
|
4
|
+
label_columns: TOTAL_AMOUNT
|
5
|
+
add_id_column: TRIP_ID
|
4
6
|
timestamp_column: TPEP_PICKUP_DATETIME
|
5
7
|
epoch_to_timestamp_cols: TPEP_PICKUP_DATETIME,TPEP_DROPOFF_DATETIME
|
8
|
+
desc: Features using taxi trip data trying to predict the total fare of a trip.
|
9
|
+
model_category: regression
|
@@ -2,13 +2,13 @@ from typing import List
|
|
2
2
|
|
3
3
|
from snowflake.ml.feature_store import Entity
|
4
4
|
|
5
|
-
|
5
|
+
wine_id = Entity(
|
6
6
|
name="WINE",
|
7
7
|
join_keys=["WINE_ID"],
|
8
|
-
desc="Wine ID
|
8
|
+
desc="Wine ID.",
|
9
9
|
)
|
10
10
|
|
11
11
|
|
12
12
|
# This will be invoked by example_helper.py. Do not change function name.
|
13
13
|
def get_all_entities() -> List[Entity]:
|
14
|
-
return [
|
14
|
+
return [wine_id]
|
@@ -1,9 +1,7 @@
|
|
1
1
|
from typing import List
|
2
2
|
|
3
3
|
from snowflake.ml.feature_store import FeatureView
|
4
|
-
from snowflake.ml.feature_store.examples.wine_quality_features.entities import
|
5
|
-
wine_entity,
|
6
|
-
)
|
4
|
+
from snowflake.ml.feature_store.examples.wine_quality_features.entities import wine_id
|
7
5
|
from snowflake.snowpark import DataFrame, Session, functions as F
|
8
6
|
|
9
7
|
|
@@ -17,13 +15,22 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
|
|
17
15
|
"CHLORIDES",
|
18
16
|
"TOTAL_SULFUR_DIOXIDE",
|
19
17
|
"PH",
|
20
|
-
(F.col("FIXED_ACIDITY") * F.col("CITRIC_ACID")).alias("
|
18
|
+
(F.col("FIXED_ACIDITY") * F.col("CITRIC_ACID")).alias("HYBRID_ACID"),
|
21
19
|
)
|
22
20
|
|
23
21
|
return FeatureView(
|
24
22
|
name="WINE_FEATURES", # name of feature view
|
25
|
-
entities=[
|
23
|
+
entities=[wine_id], # entities
|
26
24
|
feature_df=feature_df, # definition dataframe
|
27
25
|
refresh_freq="1d", # refresh frequency. '1d' means it refreshes everyday
|
28
|
-
desc="Managed
|
26
|
+
desc="Managed features about wine quality which refreshes everyday.",
|
27
|
+
).attach_feature_desc(
|
28
|
+
{
|
29
|
+
"FIXED_ACIDITY": "Fixed acidity.",
|
30
|
+
"CITRIC_ACID": "Citric acid.",
|
31
|
+
"CHLORIDES": "Chlorides",
|
32
|
+
"TOTAL_SULFUR_DIOXIDE": "Total sulfur dioxide.",
|
33
|
+
"PH": "PH.",
|
34
|
+
"HYBRID_ACID": "Hybrid acid generated by a production of fixed and citric acid.",
|
35
|
+
}
|
29
36
|
)
|
@@ -1,9 +1,7 @@
|
|
1
1
|
from typing import List
|
2
2
|
|
3
3
|
from snowflake.ml.feature_store import FeatureView
|
4
|
-
from snowflake.ml.feature_store.examples.wine_quality_features.entities import
|
5
|
-
wine_entity,
|
6
|
-
)
|
4
|
+
from snowflake.ml.feature_store.examples.wine_quality_features.entities import wine_id
|
7
5
|
from snowflake.snowpark import DataFrame, Session
|
8
6
|
|
9
7
|
|
@@ -14,8 +12,13 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
|
|
14
12
|
|
15
13
|
return FeatureView(
|
16
14
|
name="EXTRA_WINE_FEATURES", # name of feature view
|
17
|
-
entities=[
|
15
|
+
entities=[wine_id], # entities
|
18
16
|
feature_df=feature_df, # feature dataframe
|
19
17
|
refresh_freq=None, # refresh frequency. None means it never refresh
|
20
|
-
desc="Static
|
18
|
+
desc="Static features about wine quality which never refresh.",
|
19
|
+
).attach_feature_desc(
|
20
|
+
{
|
21
|
+
"SULPHATES": "Sulphates.",
|
22
|
+
"ALCOHOL": "Alcohol.",
|
23
|
+
}
|
21
24
|
)
|