snowflake-ml-python 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. snowflake/cortex/_complete.py +7 -33
  2. snowflake/ml/_internal/env_utils.py +11 -5
  3. snowflake/ml/_internal/exceptions/modeling_error_messages.py +4 -1
  4. snowflake/ml/_internal/telemetry.py +14 -0
  5. snowflake/ml/_internal/utils/pkg_version_utils.py +8 -22
  6. snowflake/ml/data/_internal/arrow_ingestor.py +66 -10
  7. snowflake/ml/data/data_connector.py +59 -6
  8. snowflake/ml/data/data_ingestor.py +18 -1
  9. snowflake/ml/data/{_internal/ingestor_utils.py → ingestor_utils.py} +5 -1
  10. snowflake/ml/data/torch_dataset.py +33 -0
  11. snowflake/ml/dataset/dataset_metadata.py +3 -1
  12. snowflake/ml/dataset/dataset_reader.py +9 -3
  13. snowflake/ml/feature_store/examples/airline_features/entities.py +16 -0
  14. snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +31 -0
  15. snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +42 -0
  16. snowflake/ml/feature_store/examples/airline_features/source.yaml +7 -0
  17. snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +10 -4
  18. snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +6 -0
  19. snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +3 -0
  20. snowflake/ml/feature_store/examples/example_helper.py +69 -31
  21. snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +3 -3
  22. snowflake/ml/feature_store/examples/new_york_taxi_features/features/{dropoff_features.py → location_features.py} +14 -9
  23. snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +36 -0
  24. snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +5 -1
  25. snowflake/ml/feature_store/examples/source_data/airline.yaml +4 -0
  26. snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +1 -1
  27. snowflake/ml/feature_store/examples/wine_quality_features/entities.py +3 -3
  28. snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +13 -6
  29. snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +8 -5
  30. snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +3 -0
  31. snowflake/ml/feature_store/feature_store.py +59 -24
  32. snowflake/ml/feature_store/feature_view.py +148 -4
  33. snowflake/ml/model/_client/model/model_impl.py +11 -2
  34. snowflake/ml/model/_client/model/model_version_impl.py +171 -20
  35. snowflake/ml/model/_client/ops/model_ops.py +105 -27
  36. snowflake/ml/model/_client/ops/service_ops.py +121 -0
  37. snowflake/ml/model/_client/service/model_deployment_spec.py +95 -0
  38. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +31 -0
  39. snowflake/ml/model/_client/sql/model_version.py +13 -4
  40. snowflake/ml/model/_client/sql/service.py +129 -0
  41. snowflake/ml/model/_model_composer/model_composer.py +3 -0
  42. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +10 -2
  43. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +3 -0
  44. snowflake/ml/model/_packager/model_env/model_env.py +7 -2
  45. snowflake/ml/model/_packager/model_handlers/_base.py +29 -12
  46. snowflake/ml/model/_packager/model_handlers/catboost.py +19 -12
  47. snowflake/ml/model/_packager/model_handlers/custom.py +6 -2
  48. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +9 -5
  49. snowflake/ml/model/_packager/model_handlers/lightgbm.py +27 -18
  50. snowflake/ml/model/_packager/model_handlers/llm.py +7 -3
  51. snowflake/ml/model/_packager/model_handlers/mlflow.py +8 -3
  52. snowflake/ml/model/_packager/model_handlers/pytorch.py +8 -3
  53. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -3
  54. snowflake/ml/model/_packager/model_handlers/sklearn.py +87 -4
  55. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +7 -2
  56. snowflake/ml/model/_packager/model_handlers/tensorflow.py +9 -4
  57. snowflake/ml/model/_packager/model_handlers/torchscript.py +8 -3
  58. snowflake/ml/model/_packager/model_handlers/xgboost.py +25 -16
  59. snowflake/ml/model/_packager/model_meta/model_meta.py +32 -2
  60. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +19 -0
  61. snowflake/ml/model/_packager/model_packager.py +2 -1
  62. snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -2
  63. snowflake/ml/model/type_hints.py +1 -3
  64. snowflake/ml/modeling/framework/base.py +28 -19
  65. snowflake/ml/modeling/pipeline/pipeline.py +3 -0
  66. snowflake/ml/registry/_manager/model_manager.py +16 -2
  67. snowflake/ml/utils/sql_client.py +22 -0
  68. snowflake/ml/version.py +1 -1
  69. {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.1.dist-info}/METADATA +35 -2
  70. {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.1.dist-info}/RECORD +73 -62
  71. snowflake/ml/feature_store/examples/new_york_taxi_features/features/pickup_features.py +0 -58
  72. {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.1.dist-info}/LICENSE.txt +0 -0
  73. {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.1.dist-info}/WHEEL +0 -0
  74. {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,31 @@
1
+ from typing import List
2
+
3
+ from snowflake.ml.feature_store import FeatureView
4
+ from snowflake.ml.feature_store.examples.airline_features.entities import plane_entity
5
+ from snowflake.snowpark import DataFrame, Session
6
+
7
+
8
+ # This function will be invoked by example_helper.py. Do not change the name.
9
+ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
10
+ """Create a feature view about airplane model."""
11
+ query = session.sql(
12
+ """
13
+ select
14
+ PLANE_MODEL,
15
+ SEATING_CAPACITY
16
+ from
17
+ PLANE_MODEL_ATTRIBUTES
18
+ """
19
+ )
20
+
21
+ return FeatureView(
22
+ name="f_plane", # name of feature view
23
+ entities=[plane_entity], # entities
24
+ feature_df=query, # definition query
25
+ refresh_freq=None, # refresh frequency
26
+ desc="Plane features never refresh.",
27
+ ).attach_feature_desc(
28
+ {
29
+ "SEATING_CAPACITY": "The seating capacity of a plane.",
30
+ }
31
+ )
@@ -0,0 +1,42 @@
1
+ from typing import List
2
+
3
+ from snowflake.ml.feature_store import FeatureView
4
+ from snowflake.ml.feature_store.examples.airline_features.entities import zipcode_entity
5
+ from snowflake.snowpark import DataFrame, Session
6
+
7
+
8
+ # This function will be invoked by example_helper.py. Do not change the name.
9
+ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
10
+ """Create a feature view about airport weather."""
11
+ query = session.sql(
12
+ """
13
+ select
14
+ DATETIME_UTC AS TS,
15
+ AIRPORT_ZIP_CODE,
16
+ sum(RAIN_MM_H) over (
17
+ partition by AIRPORT_ZIP_CODE
18
+ order by DATETIME_UTC
19
+ range between interval '30 minutes' preceding and current row
20
+ ) RAIN_SUM_30M,
21
+ sum(RAIN_MM_H) over (
22
+ partition by AIRPORT_ZIP_CODE
23
+ order by DATETIME_UTC
24
+ range between interval '1 day' preceding and current row
25
+ ) RAIN_SUM_60M
26
+ from AIRPORT_WEATHER_STATION
27
+ """
28
+ )
29
+
30
+ return FeatureView(
31
+ name="f_weather", # name of feature view
32
+ entities=[zipcode_entity], # entities
33
+ feature_df=query, # definition query
34
+ timestamp_col="TS", # timestamp column
35
+ refresh_freq="1d", # refresh frequency
36
+ desc="Airport weather features refreshed every day.",
37
+ ).attach_feature_desc(
38
+ {
39
+ "RAIN_SUM_30M": "The sum of rain fall over past 30 minutes for one zipcode.",
40
+ "RAIN_SUM_60M": "The sum of rain fall over past 1 day for one zipcode.",
41
+ }
42
+ )
@@ -0,0 +1,7 @@
1
+ ---
2
+ source_data: airline
3
+ label_columns: DEPARTING_DELAY
4
+ timestamp_column: SCHEDULED_DEPARTURE_UTC
5
+ desc: Features using synthetic airline data to predict the departing delay.
6
+ model_category: classification
7
+ training_spine_table: US_FLIGHT_SCHEDULES
@@ -14,18 +14,24 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
14
14
  f"""
15
15
  select
16
16
  end_station_id,
17
- count(end_station_id) as f_count_1d,
18
- avg(end_station_latitude) as f_avg_latitude_1d,
19
- avg(end_station_longitude) as f_avg_longtitude_1d
17
+ count(end_station_id) as f_count,
18
+ avg(end_station_latitude) as f_avg_latitude,
19
+ avg(end_station_longitude) as f_avg_longtitude
20
20
  from {source_tables[0]}
21
21
  group by end_station_id
22
22
  """
23
23
  )
24
24
 
25
25
  return FeatureView(
26
- name="f_station_1d", # name of feature view
26
+ name="f_station", # name of feature view
27
27
  entities=[end_station_id], # entities
28
28
  feature_df=query, # definition query
29
29
  refresh_freq="1d", # refresh frequency. '1d' means it refreshes everyday
30
30
  desc="Station features refreshed every day.",
31
+ ).attach_feature_desc(
32
+ {
33
+ "f_count": "How many times this station appears in 1 day.",
34
+ "f_avg_latitude": "Averaged latitude of a station.",
35
+ "f_avg_longtitude": "Averaged longtitude of a station.",
36
+ }
31
37
  )
@@ -21,4 +21,10 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
21
21
  feature_df=feature_df, # definition query
22
22
  refresh_freq=None, # refresh frequency. None indicates it never refresh
23
23
  desc="Static trip features",
24
+ ).attach_feature_desc(
25
+ {
26
+ "f_birth_year": "The birth year of a trip passenger.",
27
+ "f_gender": "The gender of a trip passenger.",
28
+ "f_bikeid": "The bike id of a trip passenger.",
29
+ }
24
30
  )
@@ -1,4 +1,7 @@
1
1
  ---
2
2
  source_data: citibike_trips
3
+ training_spine_table: citibike_trips
3
4
  label_columns: tripduration
4
5
  add_id_column: trip_id
6
+ desc: Features using citibike trip data trying to predict the duration of a trip.
7
+ model_category: regression
@@ -10,7 +10,7 @@ import yaml
10
10
  from snowflake.ml._internal.utils import identifier, sql_identifier
11
11
  from snowflake.ml.feature_store import Entity, FeatureView # type: ignore[attr-defined]
12
12
  from snowflake.snowpark import DataFrame, Session, functions as F
13
- from snowflake.snowpark.types import TimestampType
13
+ from snowflake.snowpark.types import TimestampTimeZone, TimestampType
14
14
 
15
15
  logger = logging.getLogger(__name__)
16
16
  logger.setLevel(logging.INFO)
@@ -28,6 +28,9 @@ class ExampleHelper:
28
28
  self._session = session
29
29
  self._database_name = database_name
30
30
  self._dataset_schema = dataset_schema
31
+ self._clear()
32
+
33
+ def _clear(self) -> None:
31
34
  self._selected_example = None
32
35
  self._source_tables: List[str] = []
33
36
  self._source_dfs: List[DataFrame] = []
@@ -36,15 +39,18 @@ class ExampleHelper:
36
39
  self._timestamp_column: Optional[sql_identifier.SqlIdentifier] = None
37
40
  self._epoch_to_timestamp_cols: List[str] = []
38
41
  self._add_id_column: Optional[sql_identifier.SqlIdentifier] = None
42
+ self._training_spine_table: str = ""
39
43
 
40
- def list_examples(self) -> List[str]:
41
- """Return a list of examples."""
44
+ def list_examples(self) -> Optional[DataFrame]:
45
+ """Return a dataframe object about descriptions of all examples."""
42
46
  root_dir = Path(__file__).parent
43
- result = []
47
+ rows = []
44
48
  for f_name in os.listdir(root_dir):
45
49
  if os.path.isdir(os.path.join(root_dir, f_name)) and f_name[0].isalpha() and f_name != "source_data":
46
- result.append(f_name)
47
- return result
50
+ source_file_path = root_dir.joinpath(f"{f_name}/source.yaml")
51
+ source_dict = self._read_yaml(str(source_file_path))
52
+ rows.append((f_name, source_dict["model_category"], source_dict["desc"], source_dict["label_columns"]))
53
+ return self._session.create_dataframe(rows, schema=["NAME", "MODEL_CATEGORY", "DESC", "LABEL_COLS"])
48
54
 
49
55
  def load_draft_feature_views(self) -> List[FeatureView]:
50
56
  """Return all feature views in an example.
@@ -101,7 +107,7 @@ class ExampleHelper:
101
107
  """
102
108
  ).collect()
103
109
 
104
- def _load_csv(self, schema_dict: Dict[str, str], destination_table: str, temp_stage_name: str) -> None:
110
+ def _load_csv(self, schema_dict: Dict[str, str], temp_stage_name: str) -> List[str]:
105
111
  # create temp file format
106
112
  file_format_name = f"{self._database_name}.{self._dataset_schema}.feature_store_temp_format"
107
113
  format_str = ""
@@ -116,6 +122,8 @@ class ExampleHelper:
116
122
  cols_type_str = (
117
123
  f"{self._add_id_column.resolved()} number autoincrement start 1 increment 1, " + cols_type_str
118
124
  )
125
+
126
+ destination_table = f"{self._database_name}.{self._dataset_schema}.{schema_dict['destination_table_name']}"
119
127
  self._session.sql(
120
128
  f"""
121
129
  create or replace table {destination_table} ({cols_type_str})
@@ -132,25 +140,50 @@ class ExampleHelper:
132
140
  """
133
141
  ).collect()
134
142
 
135
- def _load_parquet(self, schema_dict: Dict[str, str], destination_table: str, temp_stage_name: str) -> None:
143
+ return [destination_table]
144
+
145
+ def _load_parquet(self, schema_dict: Dict[str, str], temp_stage_name: str) -> List[str]:
136
146
  regex_pattern = schema_dict["load_files_pattern"]
137
147
  all_files = self._session.sql(f"list @{temp_stage_name}").collect()
138
148
  filtered_files = [item["name"] for item in all_files if re.match(regex_pattern, item["name"])]
139
- assert len(filtered_files) == 1, "Current code only works for one file"
140
- file_name = filtered_files[0].rsplit("/", 1)[-1]
149
+ file_count = len(filtered_files)
150
+ result = []
151
+
152
+ for file in filtered_files:
153
+ file_name = file.rsplit("/", 1)[-1]
141
154
 
142
- df = self._session.read.parquet(f"@{temp_stage_name}/{file_name}")
143
- for old_col_name in df.columns:
144
- df = df.with_column_renamed(old_col_name, identifier.get_unescaped_names(old_col_name))
155
+ df = self._session.read.parquet(f"@{temp_stage_name}/{file_name}")
156
+ for old_col_name in df.columns:
157
+ df = df.with_column_renamed(old_col_name, identifier.get_unescaped_names(old_col_name))
145
158
 
146
- for ts_col in self._epoch_to_timestamp_cols:
147
- if "timestamp" != dict(df.dtypes)[ts_col]:
148
- df = df.with_column(f"{ts_col}_NEW", F.cast(df[ts_col] / 1000000, TimestampType()))
149
- df = df.drop(ts_col).rename(f"{ts_col}_NEW", ts_col)
159
+ # convert timestamp column to ntz
160
+ for name, type in dict(df.dtypes).items():
161
+ if type == "timestamp":
162
+ df = df.with_column(name, F.to_timestamp_ntz(name))
150
163
 
151
- df.write.mode("overwrite").save_as_table(destination_table)
164
+ # convert epoch column to ntz timestamp
165
+ for ts_col in self._epoch_to_timestamp_cols:
166
+ if "timestamp" != dict(df.dtypes)[ts_col]:
167
+ df = df.with_column(ts_col, F.cast(df[ts_col] / 1000000, TimestampType(TimestampTimeZone.NTZ)))
152
168
 
153
- def _load_source_data(self, schema_yaml_file: str) -> str:
169
+ if self._add_id_column:
170
+ df = df.withColumn(self._add_id_column, F.monotonically_increasing_id())
171
+
172
+ if file_count == 1:
173
+ dest_table_name = (
174
+ f"{self._database_name}.{self._dataset_schema}.{schema_dict['destination_table_name']}"
175
+ )
176
+ else:
177
+ regex_pattern = schema_dict["destination_table_name"]
178
+ dest_table_name = re.match(regex_pattern, file_name).group("table_name") # type: ignore[union-attr]
179
+ dest_table_name = f"{self._database_name}.{self._dataset_schema}.{dest_table_name}"
180
+
181
+ df.write.mode("overwrite").save_as_table(dest_table_name)
182
+ result.append(dest_table_name)
183
+
184
+ return result
185
+
186
+ def _load_source_data(self, schema_yaml_file: str) -> List[str]:
154
187
  """Parse a yaml schema file and load data into Snowflake.
155
188
 
156
189
  Args:
@@ -162,7 +195,6 @@ class ExampleHelper:
162
195
  # load schema file
163
196
  schema_dict = self._read_yaml(schema_yaml_file)
164
197
  temp_stage_name = f"{self._database_name}.{self._dataset_schema}.feature_store_temp_stage"
165
- destination_table = f"{self._database_name}.{self._dataset_schema}.{schema_dict['destination_table_name']}"
166
198
 
167
199
  # create a temp stage from S3 URL
168
200
  self._session.sql(f"create or replace stage {temp_stage_name} url = '{schema_dict['s3_url']}'").collect()
@@ -170,11 +202,9 @@ class ExampleHelper:
170
202
  # load csv or parquet
171
203
  # TODO: this could be more flexible and robust.
172
204
  if "parquet" in schema_dict["load_files_pattern"]:
173
- self._load_parquet(schema_dict, destination_table, temp_stage_name)
205
+ return self._load_parquet(schema_dict, temp_stage_name)
174
206
  else:
175
- self._load_csv(schema_dict, destination_table, temp_stage_name)
176
-
177
- return destination_table
207
+ return self._load_csv(schema_dict, temp_stage_name)
178
208
 
179
209
  def load_example(self, example_name: str) -> List[str]:
180
210
  """Select the active example and load its datasets to Snowflake.
@@ -186,6 +216,7 @@ class ExampleHelper:
186
216
  Returns:
187
217
  Returns a list of table names with populated datasets.
188
218
  """
219
+ self._clear()
189
220
  self._selected_example = example_name # type: ignore[assignment]
190
221
 
191
222
  # load source yaml file
@@ -195,7 +226,7 @@ class ExampleHelper:
195
226
  self._source_tables = []
196
227
  self._source_dfs = []
197
228
 
198
- source_ymal_data = source_dict["source_data"]
229
+ source_yaml_data = source_dict["source_data"]
199
230
  if "excluded_columns" in source_dict:
200
231
  self._excluded_columns = sql_identifier.to_sql_identifiers(source_dict["excluded_columns"].split(","))
201
232
  if "label_columns" in source_dict:
@@ -206,8 +237,11 @@ class ExampleHelper:
206
237
  self._epoch_to_timestamp_cols = source_dict["epoch_to_timestamp_cols"].split(",")
207
238
  if "add_id_column" in source_dict:
208
239
  self._add_id_column = sql_identifier.SqlIdentifier(source_dict["add_id_column"])
240
+ self._training_spine_table = (
241
+ f"{self._database_name}.{self._dataset_schema}.{source_dict['training_spine_table']}"
242
+ )
209
243
 
210
- return self.load_source_data(source_ymal_data)
244
+ return self.load_source_data(source_yaml_data)
211
245
 
212
246
  def load_source_data(self, source_data_name: str) -> List[str]:
213
247
  """Load source data into Snowflake.
@@ -220,11 +254,12 @@ class ExampleHelper:
220
254
  """
221
255
  root_dir = Path(__file__).parent
222
256
  schema_file = root_dir.joinpath(f"source_data/{source_data_name}.yaml")
223
- destination_table = self._load_source_data(str(schema_file))
224
- source_df = self._session.table(destination_table)
225
- self._source_tables.append(destination_table)
226
- self._source_dfs.append(source_df)
227
- logger.info(f"source data {source_data_name} has been successfully loaded into table {destination_table}.")
257
+ destination_tables = self._load_source_data(str(schema_file))
258
+ for dest_table in destination_tables:
259
+ source_df = self._session.table(dest_table)
260
+ self._source_tables.append(dest_table)
261
+ self._source_dfs.append(source_df)
262
+ logger.info(f"{dest_table} has been created successfully.")
228
263
  return self._source_tables
229
264
 
230
265
  def get_current_schema(self) -> str:
@@ -238,3 +273,6 @@ class ExampleHelper:
238
273
 
239
274
  def get_training_data_timestamp_col(self) -> Optional[str]:
240
275
  return self._timestamp_column.resolved() if self._timestamp_column is not None else None
276
+
277
+ def get_training_spine_table(self) -> str:
278
+ return self._training_spine_table
@@ -2,11 +2,11 @@ from typing import List
2
2
 
3
3
  from snowflake.ml.feature_store import Entity
4
4
 
5
- trip_pickup = Entity(name="TRIP_PICKUP", join_keys=["PULOCATIONID"], desc="Trip pickup entity.")
5
+ trip_id = Entity(name="TRIP_ID", join_keys=["TRIP_ID"], desc="Trip id.")
6
6
 
7
- trip_dropoff = Entity(name="TRIP_DROPOFF", join_keys=["DOLOCATIONID"], desc="Trip dropoff entity.")
7
+ location_id = Entity(name="DOLOCATIONID", join_keys=["DOLOCATIONID"], desc="Drop off location id.")
8
8
 
9
9
 
10
10
  # This will be invoked by example_helper.py. Do not change function name.
11
11
  def get_all_entities() -> List[Entity]:
12
- return [trip_pickup, trip_dropoff]
12
+ return [trip_id, location_id]
@@ -2,7 +2,7 @@ from typing import List
2
2
 
3
3
  from snowflake.ml.feature_store import FeatureView
4
4
  from snowflake.ml.feature_store.examples.new_york_taxi_features.entities import (
5
- trip_dropoff,
5
+ location_id,
6
6
  )
7
7
  from snowflake.snowpark import DataFrame, Session
8
8
 
@@ -15,25 +15,30 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
15
15
  select
16
16
  TPEP_DROPOFF_DATETIME as TS,
17
17
  DOLOCATIONID,
18
- count(FARE_AMOUNT) over (
18
+ avg(FARE_AMOUNT) over (
19
19
  partition by DOLOCATIONID
20
20
  order by TPEP_DROPOFF_DATETIME
21
21
  range between interval '1 hours' preceding and current row
22
- ) TRIP_COUNT_1H,
23
- count(FARE_AMOUNT) over (
22
+ ) AVG_FARE_1H,
23
+ avg(FARE_AMOUNT) over (
24
24
  partition by DOLOCATIONID
25
25
  order by TPEP_DROPOFF_DATETIME
26
- range between interval '5 hours' preceding and current row
27
- ) TRIP_COUNT_5H
26
+ range between interval '10 hours' preceding and current row
27
+ ) AVG_FARE_10h
28
28
  from {source_tables[0]}
29
29
  """
30
30
  )
31
31
 
32
32
  return FeatureView(
33
- name="f_trip_dropoff", # name of feature view
34
- entities=[trip_dropoff], # entities
33
+ name="f_location", # name of feature view
34
+ entities=[location_id], # entities
35
35
  feature_df=feature_df, # definition query
36
36
  refresh_freq="12h", # the frequency this feature view re-compute
37
37
  timestamp_col="TS", # timestamp column. Used when generate training data
38
- desc="Managed feature view trip dropoff refreshed every 12 hours.",
38
+ desc="Features aggregated by location id and refreshed every 12 hours.",
39
+ ).attach_feature_desc(
40
+ {
41
+ "AVG_FARE_1H": "Averaged fare in past 1 hour window aggregated by location.",
42
+ "AVG_FARE_10H": "Averaged fare in past 10 hours aggregated by location.",
43
+ }
39
44
  )
@@ -0,0 +1,36 @@
1
+ from typing import List
2
+
3
+ from snowflake.ml.feature_store import FeatureView
4
+ from snowflake.ml.feature_store.examples.new_york_taxi_features.entities import trip_id
5
+ from snowflake.snowpark import DataFrame, Session
6
+
7
+
8
+ # This function will be invoked by example_helper.py. Do not change the name.
9
+ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
10
+ """Create a draft feature view."""
11
+ feature_df = session.sql(
12
+ f"""
13
+ select
14
+ TRIP_ID,
15
+ PASSENGER_COUNT,
16
+ TRIP_DISTANCE,
17
+ FARE_AMOUNT
18
+ from
19
+ {source_tables[0]}
20
+ """
21
+ )
22
+
23
+ return FeatureView(
24
+ name="f_trip", # name of feature view
25
+ entities=[trip_id], # entities
26
+ feature_df=feature_df, # definition query
27
+ refresh_freq="1d", # the frequency this feature view re-compute
28
+ timestamp_col=None, # timestamp column. Used when generate training data
29
+ desc="Features per trip refreshed every day.",
30
+ ).attach_feature_desc(
31
+ {
32
+ "PASSENGER_COUNT": "The count of passenger of a trip.",
33
+ "TRIP_DISTANCE": "The distance of a trip.",
34
+ "FARE_AMOUNT": "The fare of a trip.",
35
+ }
36
+ )
@@ -1,5 +1,9 @@
1
1
  ---
2
2
  source_data: nyc_yellow_trips
3
- label_columns: FARE_AMOUNT
3
+ training_spine_table: nyc_yellow_trips
4
+ label_columns: TOTAL_AMOUNT
5
+ add_id_column: TRIP_ID
4
6
  timestamp_column: TPEP_PICKUP_DATETIME
5
7
  epoch_to_timestamp_cols: TPEP_PICKUP_DATETIME,TPEP_DROPOFF_DATETIME
8
+ desc: Features using taxi trip data trying to predict the total fare of a trip.
9
+ model_category: regression
@@ -0,0 +1,4 @@
1
+ ---
2
+ s3_url: s3://sfquickstarts/misc/demos/airline/
3
+ load_files_pattern: .*[.]parquet
4
+ destination_table_name: (?P<table_name>.*)_0_0_0[.]snappy[.]parquet
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  s3_url: s3://snowflake-workshop-lab/citibike-trips-csv/
3
3
  destination_table_name: citibike_trips
4
- load_files_pattern: trips_2013_6_4_0.csv.gz
4
+ load_files_pattern: .*trips_2013_6_.*[.]csv[.]gz
5
5
  format:
6
6
  type: csv
7
7
  compression: auto
@@ -2,13 +2,13 @@ from typing import List
2
2
 
3
3
  from snowflake.ml.feature_store import Entity
4
4
 
5
- wine_entity = Entity(
5
+ wine_id = Entity(
6
6
  name="WINE",
7
7
  join_keys=["WINE_ID"],
8
- desc="Wine ID column.",
8
+ desc="Wine ID.",
9
9
  )
10
10
 
11
11
 
12
12
  # This will be invoked by example_helper.py. Do not change function name.
13
13
  def get_all_entities() -> List[Entity]:
14
- return [wine_entity]
14
+ return [wine_id]
@@ -1,9 +1,7 @@
1
1
  from typing import List
2
2
 
3
3
  from snowflake.ml.feature_store import FeatureView
4
- from snowflake.ml.feature_store.examples.wine_quality_features.entities import (
5
- wine_entity,
6
- )
4
+ from snowflake.ml.feature_store.examples.wine_quality_features.entities import wine_id
7
5
  from snowflake.snowpark import DataFrame, Session, functions as F
8
6
 
9
7
 
@@ -17,13 +15,22 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
17
15
  "CHLORIDES",
18
16
  "TOTAL_SULFUR_DIOXIDE",
19
17
  "PH",
20
- (F.col("FIXED_ACIDITY") * F.col("CITRIC_ACID")).alias("MY_NEW_FEATURE"),
18
+ (F.col("FIXED_ACIDITY") * F.col("CITRIC_ACID")).alias("HYBRID_ACID"),
21
19
  )
22
20
 
23
21
  return FeatureView(
24
22
  name="WINE_FEATURES", # name of feature view
25
- entities=[wine_entity], # entities
23
+ entities=[wine_id], # entities
26
24
  feature_df=feature_df, # definition dataframe
27
25
  refresh_freq="1d", # refresh frequency. '1d' means it refreshes everyday
28
- desc="Managed feature view about wine quality which refreshes everyday.",
26
+ desc="Managed features about wine quality which refreshes everyday.",
27
+ ).attach_feature_desc(
28
+ {
29
+ "FIXED_ACIDITY": "Fixed acidity.",
30
+ "CITRIC_ACID": "Citric acid.",
31
+ "CHLORIDES": "Chlorides",
32
+ "TOTAL_SULFUR_DIOXIDE": "Total sulfur dioxide.",
33
+ "PH": "PH.",
34
+ "HYBRID_ACID": "Hybrid acid generated by a production of fixed and citric acid.",
35
+ }
29
36
  )
@@ -1,9 +1,7 @@
1
1
  from typing import List
2
2
 
3
3
  from snowflake.ml.feature_store import FeatureView
4
- from snowflake.ml.feature_store.examples.wine_quality_features.entities import (
5
- wine_entity,
6
- )
4
+ from snowflake.ml.feature_store.examples.wine_quality_features.entities import wine_id
7
5
  from snowflake.snowpark import DataFrame, Session
8
6
 
9
7
 
@@ -14,8 +12,13 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
14
12
 
15
13
  return FeatureView(
16
14
  name="EXTRA_WINE_FEATURES", # name of feature view
17
- entities=[wine_entity], # entities
15
+ entities=[wine_id], # entities
18
16
  feature_df=feature_df, # feature dataframe
19
17
  refresh_freq=None, # refresh frequency. None means it never refresh
20
- desc="Static feature view about wine quality which never refresh.",
18
+ desc="Static features about wine quality which never refresh.",
19
+ ).attach_feature_desc(
20
+ {
21
+ "SULPHATES": "Sulphates.",
22
+ "ALCOHOL": "Alcohol.",
23
+ }
21
24
  )
@@ -1,5 +1,8 @@
1
1
  ---
2
2
  source_data: winequality_red
3
+ training_spine_table: winedata
3
4
  add_id_column: wine_id
4
5
  label_columns: quality
5
6
  excluded_columns: wine_id
7
+ desc: Features using wine quality data trying to predict the quality of wine.
8
+ model_category: regression