snowflake-ml-python 1.5.4__py3-none-any.whl → 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. snowflake/cortex/__init__.py +2 -0
  2. snowflake/cortex/_classify_text.py +36 -0
  3. snowflake/cortex/_complete.py +66 -35
  4. snowflake/cortex/_util.py +4 -4
  5. snowflake/ml/_internal/env_utils.py +11 -5
  6. snowflake/ml/_internal/exceptions/modeling_error_messages.py +4 -1
  7. snowflake/ml/_internal/lineage/lineage_utils.py +4 -4
  8. snowflake/ml/_internal/telemetry.py +26 -2
  9. snowflake/ml/_internal/utils/pkg_version_utils.py +8 -22
  10. snowflake/ml/data/_internal/arrow_ingestor.py +284 -0
  11. snowflake/ml/data/data_connector.py +186 -0
  12. snowflake/ml/data/data_ingestor.py +45 -0
  13. snowflake/ml/data/data_source.py +23 -0
  14. snowflake/ml/data/ingestor_utils.py +62 -0
  15. snowflake/ml/data/torch_dataset.py +33 -0
  16. snowflake/ml/dataset/dataset.py +1 -13
  17. snowflake/ml/dataset/dataset_metadata.py +3 -1
  18. snowflake/ml/dataset/dataset_reader.py +23 -117
  19. snowflake/ml/feature_store/access_manager.py +7 -1
  20. snowflake/ml/feature_store/entity.py +19 -2
  21. snowflake/ml/feature_store/examples/airline_features/entities.py +16 -0
  22. snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +31 -0
  23. snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +42 -0
  24. snowflake/ml/feature_store/examples/airline_features/source.yaml +7 -0
  25. snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +20 -0
  26. snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +37 -0
  27. snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +30 -0
  28. snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +7 -0
  29. snowflake/ml/feature_store/examples/example_helper.py +278 -0
  30. snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +12 -0
  31. snowflake/ml/feature_store/examples/new_york_taxi_features/features/location_features.py +44 -0
  32. snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +36 -0
  33. snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +9 -0
  34. snowflake/ml/feature_store/examples/source_data/airline.yaml +4 -0
  35. snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +36 -0
  36. snowflake/ml/feature_store/examples/source_data/fraud_transactions.yaml +29 -0
  37. snowflake/ml/feature_store/examples/source_data/nyc_yellow_trips.yaml +4 -0
  38. snowflake/ml/feature_store/examples/source_data/winequality_red.yaml +32 -0
  39. snowflake/ml/feature_store/examples/wine_quality_features/entities.py +14 -0
  40. snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +36 -0
  41. snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +24 -0
  42. snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +8 -0
  43. snowflake/ml/feature_store/feature_store.py +637 -76
  44. snowflake/ml/feature_store/feature_view.py +316 -9
  45. snowflake/ml/fileset/stage_fs.py +18 -10
  46. snowflake/ml/lineage/lineage_node.py +1 -1
  47. snowflake/ml/model/_client/model/model_impl.py +11 -2
  48. snowflake/ml/model/_client/model/model_version_impl.py +171 -20
  49. snowflake/ml/model/_client/ops/model_ops.py +105 -27
  50. snowflake/ml/model/_client/ops/service_ops.py +121 -0
  51. snowflake/ml/model/_client/service/model_deployment_spec.py +95 -0
  52. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +31 -0
  53. snowflake/ml/model/_client/sql/model_version.py +13 -4
  54. snowflake/ml/model/_client/sql/service.py +129 -0
  55. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +2 -3
  56. snowflake/ml/model/_model_composer/model_composer.py +14 -14
  57. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +33 -17
  58. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +5 -1
  59. snowflake/ml/model/_model_composer/model_method/function_generator.py +3 -3
  60. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +3 -32
  61. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +3 -27
  62. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -32
  63. snowflake/ml/model/_model_composer/model_method/model_method.py +5 -2
  64. snowflake/ml/model/_packager/model_env/model_env.py +7 -2
  65. snowflake/ml/model/_packager/model_handlers/_base.py +30 -3
  66. snowflake/ml/model/_packager/model_handlers/_utils.py +58 -1
  67. snowflake/ml/model/_packager/model_handlers/catboost.py +52 -3
  68. snowflake/ml/model/_packager/model_handlers/custom.py +6 -2
  69. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +9 -5
  70. snowflake/ml/model/_packager/model_handlers/lightgbm.py +80 -3
  71. snowflake/ml/model/_packager/model_handlers/llm.py +7 -3
  72. snowflake/ml/model/_packager/model_handlers/mlflow.py +8 -3
  73. snowflake/ml/model/_packager/model_handlers/pytorch.py +8 -3
  74. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -3
  75. snowflake/ml/model/_packager/model_handlers/sklearn.py +87 -4
  76. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +7 -2
  77. snowflake/ml/model/_packager/model_handlers/tensorflow.py +9 -4
  78. snowflake/ml/model/_packager/model_handlers/torchscript.py +8 -3
  79. snowflake/ml/model/_packager/model_handlers/xgboost.py +71 -3
  80. snowflake/ml/model/_packager/model_meta/model_meta.py +32 -2
  81. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +19 -0
  82. snowflake/ml/model/_packager/model_packager.py +2 -1
  83. snowflake/ml/model/_packager/model_runtime/model_runtime.py +7 -7
  84. snowflake/ml/model/model_signature.py +4 -4
  85. snowflake/ml/model/type_hints.py +2 -0
  86. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +1 -1
  87. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +13 -1
  88. snowflake/ml/modeling/framework/base.py +28 -19
  89. snowflake/ml/modeling/impute/simple_imputer.py +26 -0
  90. snowflake/ml/modeling/pipeline/pipeline.py +7 -4
  91. snowflake/ml/registry/_manager/model_manager.py +16 -2
  92. snowflake/ml/registry/registry.py +100 -13
  93. snowflake/ml/utils/sql_client.py +22 -0
  94. snowflake/ml/version.py +1 -1
  95. {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/METADATA +81 -2
  96. {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/RECORD +99 -66
  97. {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/WHEEL +1 -1
  98. snowflake/ml/_internal/lineage/data_source.py +0 -10
  99. {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/LICENSE.txt +0 -0
  100. {snowflake_ml_python-1.5.4.dist-info → snowflake_ml_python-1.6.1.dist-info}/top_level.txt +0 -0
@@ -1,48 +1,37 @@
1
- from typing import Any, List
2
-
3
- import pandas as pd
4
- from pyarrow import parquet as pq
1
+ from typing import Any, List, Optional, Type
5
2
 
6
3
  from snowflake import snowpark
7
4
  from snowflake.ml._internal import telemetry
8
- from snowflake.ml._internal.lineage import data_source, lineage_utils
9
- from snowflake.ml._internal.utils import import_utils
5
+ from snowflake.ml._internal.lineage import lineage_utils
6
+ from snowflake.ml.data import data_connector, data_ingestor, data_source, ingestor_utils
10
7
  from snowflake.ml.fileset import snowfs
11
8
 
12
9
  _PROJECT = "Dataset"
13
10
  _SUBPROJECT = "DatasetReader"
14
- TARGET_FILE_SIZE = 32 * 2**20 # The max file size for data loading.
15
11
 
16
12
 
17
- class DatasetReader:
13
+ class DatasetReader(data_connector.DataConnector):
18
14
  """Snowflake Dataset abstraction which provides application integration connectors"""
19
15
 
20
16
  @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
21
17
  def __init__(
22
18
  self,
23
- session: snowpark.Session,
24
- sources: List[data_source.DataSource],
19
+ ingestor: data_ingestor.DataIngestor,
20
+ *,
21
+ snowpark_session: snowpark.Session,
25
22
  ) -> None:
26
- """Initialize a DatasetVersion object.
23
+ super().__init__(ingestor)
27
24
 
28
- Args:
29
- session: Snowpark Session to interact with Snowflake backend.
30
- sources: Data sources to read from.
25
+ self._session: snowpark.Session = snowpark_session
26
+ self._fs: snowfs.SnowFileSystem = ingestor_utils.get_dataset_filesystem(self._session)
27
+ self._files: Optional[List[str]] = None
31
28
 
32
- Raises:
33
- ValueError: `sources` arg was empty or null
34
- """
35
- if not sources:
36
- raise ValueError("Invalid input: empty `sources` list not allowed")
37
- self._session = session
38
- self._sources = sources
39
- self._fs: snowfs.SnowFileSystem = snowfs.SnowFileSystem(
40
- snowpark_session=self._session,
41
- cache_type="bytes",
42
- block_size=2 * TARGET_FILE_SIZE,
43
- )
44
-
45
- self._files: List[str] = []
29
+ @classmethod
30
+ def from_dataframe(
31
+ cls, df: snowpark.DataFrame, ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None, **kwargs: Any
32
+ ) -> "DatasetReader":
33
+ # Block superclass constructor from Snowpark DataFrames
34
+ raise RuntimeError("Creating DatasetReader from DataFrames not supported")
46
35
 
47
36
  def _list_files(self) -> List[str]:
48
37
  """Private helper function that lists all files in this DatasetVersion and caches the results."""
@@ -50,18 +39,14 @@ class DatasetReader:
50
39
  return self._files
51
40
 
52
41
  files: List[str] = []
53
- for source in self._sources:
54
- # Sort within each source for consistent ordering
55
- files.extend(sorted(self._fs.ls(source.url))) # type: ignore[arg-type]
42
+ for source in self.data_sources:
43
+ assert isinstance(source, data_source.DatasetInfo)
44
+ files.extend(ingestor_utils.get_dataset_files(self._session, source, filesystem=self._fs))
56
45
  files.sort()
57
46
 
58
47
  self._files = files
59
48
  return self._files
60
49
 
61
- @property
62
- def data_sources(self) -> List[data_source.DataSource]:
63
- return self._sources
64
-
65
50
  @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
66
51
  def files(self) -> List[str]:
67
52
  """Get the list of remote file paths for the current DatasetVersion.
@@ -85,76 +70,6 @@ class DatasetReader:
85
70
  """Return an fsspec FileSystem which can be used to load the DatasetVersion's `files()`"""
86
71
  return self._fs
87
72
 
88
- @telemetry.send_api_usage_telemetry(
89
- project=_PROJECT,
90
- subproject=_SUBPROJECT,
91
- func_params_to_log=["batch_size", "shuffle", "drop_last_batch"],
92
- )
93
- def to_torch_datapipe(self, *, batch_size: int, shuffle: bool = False, drop_last_batch: bool = True) -> Any:
94
- """Transform the Snowflake data into a ready-to-use Pytorch datapipe.
95
-
96
- Return a Pytorch datapipe which iterates on rows of data.
97
-
98
- Args:
99
- batch_size: It specifies the size of each data batch which will be
100
- yield in the result datapipe
101
- shuffle: It specifies whether the data will be shuffled. If True, files will be shuffled, and
102
- rows in each file will also be shuffled.
103
- drop_last_batch: Whether the last batch of data should be dropped. If set to be true,
104
- then the last batch will get dropped if its size is smaller than the given batch_size.
105
-
106
- Returns:
107
- A Pytorch iterable datapipe that yield data.
108
-
109
- Examples:
110
- >>> dp = dataset.to_torch_datapipe(batch_size=1)
111
- >>> for data in dp:
112
- >>> print(data)
113
- ----
114
- {'_COL_1':[10]}
115
- """
116
- IterableWrapper, _ = import_utils.import_or_get_dummy("torchdata.datapipes.iter.IterableWrapper")
117
- torch_datapipe_module, _ = import_utils.import_or_get_dummy("snowflake.ml.fileset.torch_datapipe")
118
-
119
- self._fs.optimize_read(self._list_files())
120
-
121
- input_dp = IterableWrapper(self._list_files())
122
- return torch_datapipe_module.ReadAndParseParquet(input_dp, self._fs, batch_size, shuffle, drop_last_batch)
123
-
124
- @telemetry.send_api_usage_telemetry(
125
- project=_PROJECT,
126
- subproject=_SUBPROJECT,
127
- func_params_to_log=["batch_size", "shuffle", "drop_last_batch"],
128
- )
129
- def to_tf_dataset(self, *, batch_size: int, shuffle: bool = False, drop_last_batch: bool = True) -> Any:
130
- """Transform the Snowflake data into a ready-to-use TensorFlow tf.data.Dataset.
131
-
132
- Args:
133
- batch_size: It specifies the size of each data batch which will be
134
- yield in the result datapipe
135
- shuffle: It specifies whether the data will be shuffled. If True, files will be shuffled, and
136
- rows in each file will also be shuffled.
137
- drop_last_batch: Whether the last batch of data should be dropped. If set to be true,
138
- then the last batch will get dropped if its size is smaller than the given batch_size.
139
-
140
- Returns:
141
- A tf.data.Dataset that yields batched tf.Tensors.
142
-
143
- Examples:
144
- >>> dp = dataset.to_tf_dataset(batch_size=1)
145
- >>> for data in dp:
146
- >>> print(data)
147
- ----
148
- {'_COL_1': <tf.Tensor: shape=(1,), dtype=int64, numpy=[10]>}
149
- """
150
- tf_dataset_module, _ = import_utils.import_or_get_dummy("snowflake.ml.fileset.tf_dataset")
151
-
152
- self._fs.optimize_read(self._list_files())
153
-
154
- return tf_dataset_module.read_and_parse_parquet(
155
- self._list_files(), self._fs, batch_size, shuffle, drop_last_batch
156
- )
157
-
158
73
  @telemetry.send_api_usage_telemetry(
159
74
  project=_PROJECT,
160
75
  subproject=_SUBPROJECT,
@@ -177,7 +92,8 @@ class DatasetReader:
177
92
  """
178
93
  file_path_pattern = ".*data_.*[.]parquet"
179
94
  dfs: List[snowpark.DataFrame] = []
180
- for source in self._sources:
95
+ for source in self.data_sources:
96
+ assert isinstance(source, data_source.DatasetInfo) and source.url is not None
181
97
  df = self._session.read.option("pattern", file_path_pattern).parquet(source.url)
182
98
  if only_feature_cols and source.exclude_cols:
183
99
  df = df.drop(source.exclude_cols)
@@ -186,14 +102,4 @@ class DatasetReader:
186
102
  combined_df = dfs[0]
187
103
  for df in dfs[1:]:
188
104
  combined_df = combined_df.union_all_by_name(df)
189
- return lineage_utils.patch_dataframe(combined_df, data_sources=self._sources, inplace=True)
190
-
191
- @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
192
- def to_pandas(self) -> pd.DataFrame:
193
- """Retrieve the DatasetVersion contents as a Pandas Dataframe"""
194
- files = self._list_files()
195
- if not files:
196
- return pd.DataFrame() # Return empty DataFrame
197
- self._fs.optimize_read(files)
198
- pd_ds = pq.ParquetDataset(files, filesystem=self._fs)
199
- return pd_ds.read_pandas().to_pandas()
105
+ return lineage_utils.patch_dataframe(combined_df, data_sources=self.data_sources, inplace=True)
@@ -273,7 +273,13 @@ def setup_feature_store(
273
273
  assert current_role is not None # to make mypy happy
274
274
  try:
275
275
  session.use_role(producer_role)
276
- fs = FeatureStore(session, database, schema, warehouse, creation_mode=CreationMode.CREATE_IF_NOT_EXIST)
276
+ fs = FeatureStore(
277
+ session,
278
+ database,
279
+ schema,
280
+ default_warehouse=warehouse,
281
+ creation_mode=CreationMode.CREATE_IF_NOT_EXIST,
282
+ )
277
283
  finally:
278
284
  session.use_role(current_role)
279
285
 
@@ -22,7 +22,7 @@ class Entity:
22
22
  It can also be used for FeatureView search and lineage tracking.
23
23
  """
24
24
 
25
- def __init__(self, name: str, join_keys: List[str], desc: str = "") -> None:
25
+ def __init__(self, name: str, join_keys: List[str], *, desc: str = "") -> None:
26
26
  """
27
27
  Creates an Entity instance.
28
28
 
@@ -30,6 +30,23 @@ class Entity:
30
30
  name: name of the Entity.
31
31
  join_keys: join keys associated with a FeatureView, used for feature retrieval.
32
32
  desc: description of the Entity.
33
+
34
+ Example::
35
+
36
+ >>> fs = FeatureStore(...)
37
+ >>> e_1 = Entity(
38
+ ... name="my_entity",
39
+ ... join_keys=['col_1'],
40
+ ... desc='My first entity.'
41
+ ... )
42
+ >>> fs.register_entity(e_1)
43
+ >>> fs.list_entities().show()
44
+ -----------------------------------------------------------
45
+ |"NAME" |"JOIN_KEYS" |"DESC" |"OWNER" |
46
+ -----------------------------------------------------------
47
+ |MY_ENTITY |["COL_1"] |My first entity. |REGTEST_RL |
48
+ -----------------------------------------------------------
49
+
33
50
  """
34
51
  self._validate(name, join_keys)
35
52
 
@@ -65,7 +82,7 @@ class Entity:
65
82
 
66
83
  @staticmethod
67
84
  def _construct_entity(name: str, join_keys: List[str], desc: str, owner: str) -> "Entity":
68
- e = Entity(name, join_keys, desc)
85
+ e = Entity(name, join_keys, desc=desc)
69
86
  e.owner = owner
70
87
  return e
71
88
 
@@ -0,0 +1,16 @@
1
+ from typing import List
2
+
3
+ from snowflake.ml.feature_store import Entity
4
+
5
+ zipcode_entity = Entity(
6
+ name="AIRPORT_ZIP_CODE",
7
+ join_keys=["AIRPORT_ZIP_CODE"],
8
+ desc="Zip code of the airport.",
9
+ )
10
+
11
+ plane_entity = Entity(name="PLANE_MODEL", join_keys=["PLANE_MODEL"], desc="The model of an airplane.")
12
+
13
+
14
+ # This will be invoked by example_helper.py. Do not change function name.
15
+ def get_all_entities() -> List[Entity]:
16
+ return [zipcode_entity, plane_entity]
@@ -0,0 +1,31 @@
1
+ from typing import List
2
+
3
+ from snowflake.ml.feature_store import FeatureView
4
+ from snowflake.ml.feature_store.examples.airline_features.entities import plane_entity
5
+ from snowflake.snowpark import DataFrame, Session
6
+
7
+
8
+ # This function will be invoked by example_helper.py. Do not change the name.
9
+ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
10
+ """Create a feature view about airplane model."""
11
+ query = session.sql(
12
+ """
13
+ select
14
+ PLANE_MODEL,
15
+ SEATING_CAPACITY
16
+ from
17
+ PLANE_MODEL_ATTRIBUTES
18
+ """
19
+ )
20
+
21
+ return FeatureView(
22
+ name="f_plane", # name of feature view
23
+ entities=[plane_entity], # entities
24
+ feature_df=query, # definition query
25
+ refresh_freq=None, # refresh frequency
26
+ desc="Plane features never refresh.",
27
+ ).attach_feature_desc(
28
+ {
29
+ "SEATING_CAPACITY": "The seating capacity of a plane.",
30
+ }
31
+ )
@@ -0,0 +1,42 @@
1
+ from typing import List
2
+
3
+ from snowflake.ml.feature_store import FeatureView
4
+ from snowflake.ml.feature_store.examples.airline_features.entities import zipcode_entity
5
+ from snowflake.snowpark import DataFrame, Session
6
+
7
+
8
+ # This function will be invoked by example_helper.py. Do not change the name.
9
+ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
10
+ """Create a feature view about airport weather."""
11
+ query = session.sql(
12
+ """
13
+ select
14
+ DATETIME_UTC AS TS,
15
+ AIRPORT_ZIP_CODE,
16
+ sum(RAIN_MM_H) over (
17
+ partition by AIRPORT_ZIP_CODE
18
+ order by DATETIME_UTC
19
+ range between interval '30 minutes' preceding and current row
20
+ ) RAIN_SUM_30M,
21
+ sum(RAIN_MM_H) over (
22
+ partition by AIRPORT_ZIP_CODE
23
+ order by DATETIME_UTC
24
+ range between interval '1 day' preceding and current row
25
+ ) RAIN_SUM_60M
26
+ from AIRPORT_WEATHER_STATION
27
+ """
28
+ )
29
+
30
+ return FeatureView(
31
+ name="f_weather", # name of feature view
32
+ entities=[zipcode_entity], # entities
33
+ feature_df=query, # definition query
34
+ timestamp_col="TS", # timestamp column
35
+ refresh_freq="1d", # refresh frequency
36
+ desc="Airport weather features refreshed every day.",
37
+ ).attach_feature_desc(
38
+ {
39
+ "RAIN_SUM_30M": "The sum of rain fall over past 30 minutes for one zipcode.",
40
+ "RAIN_SUM_60M": "The sum of rain fall over past 1 day for one zipcode.",
41
+ }
42
+ )
@@ -0,0 +1,7 @@
1
+ ---
2
+ source_data: airline
3
+ label_columns: DEPARTING_DELAY
4
+ timestamp_column: SCHEDULED_DEPARTURE_UTC
5
+ desc: Features using synthetic airline data to predict the departing delay.
6
+ model_category: classification
7
+ training_spine_table: US_FLIGHT_SCHEDULES
@@ -0,0 +1,20 @@
1
+ from typing import List
2
+
3
+ from snowflake.ml.feature_store import Entity
4
+
5
+ end_station_id = Entity(
6
+ name="end_station_id",
7
+ join_keys=["end_station_id"],
8
+ desc="The id of an end station.",
9
+ )
10
+
11
+ trip_id = Entity(
12
+ name="trip_id",
13
+ join_keys=["trip_id"],
14
+ desc="The id of a trip.",
15
+ )
16
+
17
+
18
+ # This will be invoked by example_helper.py. Do not change function name.
19
+ def get_all_entities() -> List[Entity]:
20
+ return [end_station_id, trip_id]
@@ -0,0 +1,37 @@
1
+ from typing import List
2
+
3
+ from snowflake.ml.feature_store import FeatureView
4
+ from snowflake.ml.feature_store.examples.citibike_trip_features.entities import (
5
+ end_station_id,
6
+ )
7
+ from snowflake.snowpark import DataFrame, Session
8
+
9
+
10
+ # This function will be invoked by example_helper.py. Do not change the name.
11
+ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
12
+ """Create a feature view about trip station."""
13
+ query = session.sql(
14
+ f"""
15
+ select
16
+ end_station_id,
17
+ count(end_station_id) as f_count,
18
+ avg(end_station_latitude) as f_avg_latitude,
19
+ avg(end_station_longitude) as f_avg_longtitude
20
+ from {source_tables[0]}
21
+ group by end_station_id
22
+ """
23
+ )
24
+
25
+ return FeatureView(
26
+ name="f_station", # name of feature view
27
+ entities=[end_station_id], # entities
28
+ feature_df=query, # definition query
29
+ refresh_freq="1d", # refresh frequency. '1d' means it refreshes everyday
30
+ desc="Station features refreshed every day.",
31
+ ).attach_feature_desc(
32
+ {
33
+ "f_count": "How many times this station appears in 1 day.",
34
+ "f_avg_latitude": "Averaged latitude of a station.",
35
+ "f_avg_longtitude": "Averaged longtitude of a station.",
36
+ }
37
+ )
@@ -0,0 +1,30 @@
1
+ from typing import List
2
+
3
+ from snowflake.ml.feature_store import FeatureView
4
+ from snowflake.ml.feature_store.examples.citibike_trip_features.entities import trip_id
5
+ from snowflake.snowpark import DataFrame, Session, functions as F
6
+
7
+
8
+ # This function will be invoked by example_helper.py. Do not change the name.
9
+ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
10
+ """Create a feature view about trip."""
11
+ feature_df = source_dfs[0].select(
12
+ "trip_id",
13
+ F.col("birth_year").alias("f_birth_year"),
14
+ F.col("gender").alias("f_gender"),
15
+ F.col("bikeid").alias("f_bikeid"),
16
+ )
17
+
18
+ return FeatureView(
19
+ name="f_trip", # name of feature view
20
+ entities=[trip_id], # entities
21
+ feature_df=feature_df, # definition query
22
+ refresh_freq=None, # refresh frequency. None indicates it never refresh
23
+ desc="Static trip features",
24
+ ).attach_feature_desc(
25
+ {
26
+ "f_birth_year": "The birth year of a trip passenger.",
27
+ "f_gender": "The gender of a trip passenger.",
28
+ "f_bikeid": "The bike id of a trip passenger.",
29
+ }
30
+ )
@@ -0,0 +1,7 @@
1
+ ---
2
+ source_data: citibike_trips
3
+ training_spine_table: citibike_trips
4
+ label_columns: tripduration
5
+ add_id_column: trip_id
6
+ desc: Features using citibike trip data trying to predict the duration of a trip.
7
+ model_category: regression