snowflake-ml-python 1.23.0__py3-none-any.whl → 1.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/platform_capabilities.py +0 -4
- snowflake/ml/feature_store/__init__.py +2 -0
- snowflake/ml/feature_store/aggregation.py +367 -0
- snowflake/ml/feature_store/feature.py +366 -0
- snowflake/ml/feature_store/feature_store.py +234 -20
- snowflake/ml/feature_store/feature_view.py +189 -4
- snowflake/ml/feature_store/metadata_manager.py +425 -0
- snowflake/ml/feature_store/tile_sql_generator.py +1079 -0
- snowflake/ml/model/__init__.py +4 -0
- snowflake/ml/model/_client/model/batch_inference_specs.py +38 -2
- snowflake/ml/model/_client/model/model_version_impl.py +31 -14
- snowflake/ml/model/_client/ops/model_ops.py +2 -8
- snowflake/ml/model/_client/ops/service_ops.py +0 -5
- snowflake/ml/model/_client/sql/service.py +21 -29
- snowflake/ml/model/_model_composer/model_method/model_method.py +2 -1
- snowflake/ml/model/_packager/model_handlers/huggingface.py +20 -0
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +70 -14
- snowflake/ml/model/_signatures/utils.py +76 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/METADATA +39 -1
- {snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/RECORD +24 -20
- {snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/WHEEL +1 -1
- {snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
"""User-facing Feature builder for defining feature view features.
|
|
2
|
+
|
|
3
|
+
This module provides the Feature class for defining features in a FeatureView,
|
|
4
|
+
including both aggregated features (for tiled feature views) and non-aggregated
|
|
5
|
+
fields (for standard feature views).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any, Optional
|
|
11
|
+
|
|
12
|
+
from snowflake.ml.feature_store.aggregation import (
|
|
13
|
+
AggregationSpec,
|
|
14
|
+
AggregationType,
|
|
15
|
+
is_lifetime_window,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Feature:
|
|
20
|
+
"""Fluent builder for defining features in a FeatureView.
|
|
21
|
+
|
|
22
|
+
This class provides a user-friendly API for defining features with
|
|
23
|
+
time-series aggregations. It supports method chaining for setting
|
|
24
|
+
options like aliases.
|
|
25
|
+
|
|
26
|
+
Example::
|
|
27
|
+
|
|
28
|
+
>>> from snowflake.ml.feature_store import Feature
|
|
29
|
+
>>>
|
|
30
|
+
>>> # Define features with factory methods
|
|
31
|
+
>>> amount_sum = Feature.sum("amount", "24h").alias("total_amount_24h")
|
|
32
|
+
>>> recent_pages = Feature.last_n("page_id", "1h", n=10).alias("recent_pages")
|
|
33
|
+
>>> txn_count = Feature.count("transaction_id", "7d")
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
function: AggregationType,
|
|
39
|
+
column: str,
|
|
40
|
+
window: str,
|
|
41
|
+
offset: str = "0",
|
|
42
|
+
**params: Any,
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Create a Feature with the specified aggregation.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
function: The aggregation function type.
|
|
48
|
+
column: The source column to aggregate.
|
|
49
|
+
window: The lookback window (e.g., "24h", "7d").
|
|
50
|
+
offset: Offset to shift window into past (e.g., "1d" = [t-window-1d, t-1d]).
|
|
51
|
+
Must be a multiple of feature_granularity. Default is "0" (no offset).
|
|
52
|
+
**params: Additional parameters for the aggregation.
|
|
53
|
+
"""
|
|
54
|
+
self._function = function
|
|
55
|
+
self._column = column
|
|
56
|
+
self._window = window
|
|
57
|
+
self._offset = offset
|
|
58
|
+
self._params = params
|
|
59
|
+
self._alias: Optional[str] = None
|
|
60
|
+
|
|
61
|
+
def alias(self, name: str, case_sensitive: bool = False) -> Feature:
|
|
62
|
+
"""Set the output column name for this feature.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
name: The output column name.
|
|
66
|
+
case_sensitive: If True, preserve the exact case of the name (will be quoted in SQL).
|
|
67
|
+
If False (default), the name will be converted to uppercase (Snowflake default).
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Self for method chaining.
|
|
71
|
+
"""
|
|
72
|
+
# Store with quotes if case-sensitive, uppercase if case-insensitive
|
|
73
|
+
self._alias = f'"{name}"' if case_sensitive else name.upper()
|
|
74
|
+
return self
|
|
75
|
+
|
|
76
|
+
def to_spec(self) -> AggregationSpec:
|
|
77
|
+
"""Convert to an AggregationSpec for internal use.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
The AggregationSpec representation.
|
|
81
|
+
"""
|
|
82
|
+
output_column = self._alias if self._alias else self._default_output_name()
|
|
83
|
+
return AggregationSpec(
|
|
84
|
+
function=self._function,
|
|
85
|
+
source_column=self._column,
|
|
86
|
+
window=self._window,
|
|
87
|
+
output_column=output_column,
|
|
88
|
+
offset=self._offset,
|
|
89
|
+
params=self._params,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def _default_output_name(self) -> str:
|
|
93
|
+
"""Generate a default output column name."""
|
|
94
|
+
if is_lifetime_window(self._window):
|
|
95
|
+
window_suffix = "lifetime"
|
|
96
|
+
else:
|
|
97
|
+
window_suffix = self._window.replace(" ", "").lower()
|
|
98
|
+
base_name = f"{self._column}_{self._function.value}_{window_suffix}"
|
|
99
|
+
return base_name.upper()
|
|
100
|
+
|
|
101
|
+
# Factory methods for creating features
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def sum(cls, column: str, window: str, offset: str = "0") -> Feature:
|
|
105
|
+
"""Create a SUM aggregation feature.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
column: The column to sum.
|
|
109
|
+
window: The lookback window (e.g., "24h").
|
|
110
|
+
offset: Offset to shift window into past (e.g., "7d" = previous week).
|
|
111
|
+
Default is "0" (no offset).
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
A Feature configured for SUM aggregation.
|
|
115
|
+
|
|
116
|
+
Example::
|
|
117
|
+
|
|
118
|
+
>>> amount_sum = Feature.sum("amount", "24h").alias("total_amount")
|
|
119
|
+
>>> prev_week_sum = Feature.sum("amount", "7d", offset="7d").alias("prev_week")
|
|
120
|
+
"""
|
|
121
|
+
return cls(AggregationType.SUM, column, window, offset)
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def count(cls, column: str, window: str, offset: str = "0") -> Feature:
|
|
125
|
+
"""Create a COUNT aggregation feature.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
column: The column to count.
|
|
129
|
+
window: The lookback window (e.g., "7d").
|
|
130
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
A Feature configured for COUNT aggregation.
|
|
134
|
+
|
|
135
|
+
Example::
|
|
136
|
+
|
|
137
|
+
>>> txn_count = Feature.count("transaction_id", "7d")
|
|
138
|
+
"""
|
|
139
|
+
return cls(AggregationType.COUNT, column, window, offset)
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def avg(cls, column: str, window: str, offset: str = "0") -> Feature:
|
|
143
|
+
"""Create an AVG aggregation feature.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
column: The column to average.
|
|
147
|
+
window: The lookback window.
|
|
148
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
A Feature configured for AVG aggregation.
|
|
152
|
+
|
|
153
|
+
Example::
|
|
154
|
+
|
|
155
|
+
>>> avg_amount = Feature.avg("amount", "24h")
|
|
156
|
+
"""
|
|
157
|
+
return cls(AggregationType.AVG, column, window, offset)
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
def min(cls, column: str, window: str, offset: str = "0") -> Feature:
|
|
161
|
+
"""Create a MIN aggregation feature.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
column: The column to find minimum of.
|
|
165
|
+
window: The lookback window (e.g., "24h").
|
|
166
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
A Feature configured for MIN aggregation.
|
|
170
|
+
|
|
171
|
+
Example::
|
|
172
|
+
|
|
173
|
+
>>> min_price = Feature.min("price", "24h")
|
|
174
|
+
"""
|
|
175
|
+
return cls(AggregationType.MIN, column, window, offset)
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
def max(cls, column: str, window: str, offset: str = "0") -> Feature:
|
|
179
|
+
"""Create a MAX aggregation feature.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
column: The column to find maximum of.
|
|
183
|
+
window: The lookback window (e.g., "24h").
|
|
184
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
A Feature configured for MAX aggregation.
|
|
188
|
+
|
|
189
|
+
Example::
|
|
190
|
+
|
|
191
|
+
>>> max_price = Feature.max("price", "24h")
|
|
192
|
+
"""
|
|
193
|
+
return cls(AggregationType.MAX, column, window, offset)
|
|
194
|
+
|
|
195
|
+
@classmethod
|
|
196
|
+
def std(cls, column: str, window: str, offset: str = "0") -> Feature:
|
|
197
|
+
"""Create a STD (standard deviation) aggregation feature.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
column: The column to compute standard deviation for.
|
|
201
|
+
window: The lookback window.
|
|
202
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
A Feature configured for STD aggregation.
|
|
206
|
+
|
|
207
|
+
Example::
|
|
208
|
+
|
|
209
|
+
>>> price_std = Feature.std("price", "24h")
|
|
210
|
+
"""
|
|
211
|
+
return cls(AggregationType.STD, column, window, offset)
|
|
212
|
+
|
|
213
|
+
@classmethod
|
|
214
|
+
def var(cls, column: str, window: str, offset: str = "0") -> Feature:
|
|
215
|
+
"""Create a VAR (variance) aggregation feature.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
column: The column to compute variance for.
|
|
219
|
+
window: The lookback window.
|
|
220
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
A Feature configured for VAR aggregation.
|
|
224
|
+
|
|
225
|
+
Example::
|
|
226
|
+
|
|
227
|
+
>>> price_var = Feature.var("price", "24h")
|
|
228
|
+
"""
|
|
229
|
+
return cls(AggregationType.VAR, column, window, offset)
|
|
230
|
+
|
|
231
|
+
@classmethod
|
|
232
|
+
def last_n(cls, column: str, window: str, *, n: int, offset: str = "0") -> Feature:
|
|
233
|
+
"""Create a LAST_N aggregation feature.
|
|
234
|
+
|
|
235
|
+
Collects the N most recent values within the window.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
column: The column to collect values from.
|
|
239
|
+
window: The lookback window.
|
|
240
|
+
n: Number of values to collect.
|
|
241
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
A Feature configured for LAST_N aggregation.
|
|
245
|
+
|
|
246
|
+
Example::
|
|
247
|
+
|
|
248
|
+
>>> recent_pages = Feature.last_n("page_id", "1h", n=10)
|
|
249
|
+
"""
|
|
250
|
+
return cls(AggregationType.LAST_N, column, window, offset, n=n)
|
|
251
|
+
|
|
252
|
+
@classmethod
|
|
253
|
+
def last_distinct_n(cls, column: str, window: str, *, n: int, offset: str = "0") -> Feature:
|
|
254
|
+
"""Create a LAST_DISTINCT_N aggregation feature.
|
|
255
|
+
|
|
256
|
+
Collects the N most recent distinct values within the window.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
column: The column to collect values from.
|
|
260
|
+
window: The lookback window.
|
|
261
|
+
n: Number of distinct values to collect.
|
|
262
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
A Feature configured for LAST_DISTINCT_N aggregation.
|
|
266
|
+
|
|
267
|
+
Example::
|
|
268
|
+
|
|
269
|
+
>>> recent_categories = Feature.last_distinct_n("category", "24h", n=5)
|
|
270
|
+
"""
|
|
271
|
+
return cls(AggregationType.LAST_DISTINCT_N, column, window, offset, n=n)
|
|
272
|
+
|
|
273
|
+
@classmethod
|
|
274
|
+
def first_n(cls, column: str, window: str, *, n: int, offset: str = "0") -> Feature:
|
|
275
|
+
"""Create a FIRST_N aggregation feature.
|
|
276
|
+
|
|
277
|
+
Collects the N oldest values within the window.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
column: The column to collect values from.
|
|
281
|
+
window: The lookback window.
|
|
282
|
+
n: Number of values to collect.
|
|
283
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
A Feature configured for FIRST_N aggregation.
|
|
287
|
+
|
|
288
|
+
Example::
|
|
289
|
+
|
|
290
|
+
>>> first_pages = Feature.first_n("page_id", "1h", n=10)
|
|
291
|
+
"""
|
|
292
|
+
return cls(AggregationType.FIRST_N, column, window, offset, n=n)
|
|
293
|
+
|
|
294
|
+
@classmethod
|
|
295
|
+
def first_distinct_n(cls, column: str, window: str, *, n: int, offset: str = "0") -> Feature:
|
|
296
|
+
"""Create a FIRST_DISTINCT_N aggregation feature.
|
|
297
|
+
|
|
298
|
+
Collects the N oldest distinct values within the window.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
column: The column to collect values from.
|
|
302
|
+
window: The lookback window.
|
|
303
|
+
n: Number of distinct values to collect.
|
|
304
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
A Feature configured for FIRST_DISTINCT_N aggregation.
|
|
308
|
+
|
|
309
|
+
Example::
|
|
310
|
+
|
|
311
|
+
>>> first_categories = Feature.first_distinct_n("category", "24h", n=5)
|
|
312
|
+
"""
|
|
313
|
+
return cls(AggregationType.FIRST_DISTINCT_N, column, window, offset, n=n)
|
|
314
|
+
|
|
315
|
+
@classmethod
|
|
316
|
+
def approx_count_distinct(cls, column: str, window: str, offset: str = "0") -> Feature:
|
|
317
|
+
"""Create an APPROX_COUNT_DISTINCT aggregation feature.
|
|
318
|
+
|
|
319
|
+
Estimates the number of distinct values using HyperLogLog algorithm.
|
|
320
|
+
This is approximate but highly efficient for large datasets.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
column: The column to count distinct values.
|
|
324
|
+
window: The lookback window.
|
|
325
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
A Feature configured for APPROX_COUNT_DISTINCT aggregation.
|
|
329
|
+
|
|
330
|
+
Example::
|
|
331
|
+
|
|
332
|
+
>>> unique_users = Feature.approx_count_distinct("user_id", "24h")
|
|
333
|
+
"""
|
|
334
|
+
return cls(AggregationType.APPROX_COUNT_DISTINCT, column, window, offset)
|
|
335
|
+
|
|
336
|
+
@classmethod
|
|
337
|
+
def approx_percentile(cls, column: str, window: str, *, percentile: float = 0.5, offset: str = "0") -> Feature:
|
|
338
|
+
"""Create an APPROX_PERCENTILE aggregation feature.
|
|
339
|
+
|
|
340
|
+
Estimates the specified percentile using T-Digest algorithm.
|
|
341
|
+
This is approximate but highly efficient for large datasets.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
column: The column to compute percentile for.
|
|
345
|
+
window: The lookback window.
|
|
346
|
+
percentile: The percentile to estimate (0.0 to 1.0). Default is 0.5 (median).
|
|
347
|
+
offset: Offset to shift window into past. Default is "0" (no offset).
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
A Feature configured for APPROX_PERCENTILE aggregation.
|
|
351
|
+
|
|
352
|
+
Example::
|
|
353
|
+
|
|
354
|
+
>>> median_amount = Feature.approx_percentile("amount", "24h", percentile=0.5)
|
|
355
|
+
>>> p95_latency = Feature.approx_percentile("latency", "1h", percentile=0.95)
|
|
356
|
+
"""
|
|
357
|
+
return cls(AggregationType.APPROX_PERCENTILE, column, window, offset, percentile=percentile)
|
|
358
|
+
|
|
359
|
+
def __repr__(self) -> str:
|
|
360
|
+
alias_str = f", alias='{self._alias}'" if self._alias else ""
|
|
361
|
+
params_str = f", params={self._params}" if self._params else ""
|
|
362
|
+
offset_str = f", offset='{self._offset}'" if self._offset != "0" else ""
|
|
363
|
+
return (
|
|
364
|
+
f"Feature({self._function.value}, column='{self._column}', "
|
|
365
|
+
f"window='{self._window}'{offset_str}{params_str}{alias_str})"
|
|
366
|
+
)
|