snowflake-ml-python 1.22.0__py3-none-any.whl → 1.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. snowflake/ml/_internal/platform_capabilities.py +0 -4
  2. snowflake/ml/feature_store/__init__.py +2 -0
  3. snowflake/ml/feature_store/aggregation.py +367 -0
  4. snowflake/ml/feature_store/feature.py +366 -0
  5. snowflake/ml/feature_store/feature_store.py +234 -20
  6. snowflake/ml/feature_store/feature_view.py +189 -4
  7. snowflake/ml/feature_store/metadata_manager.py +425 -0
  8. snowflake/ml/feature_store/tile_sql_generator.py +1079 -0
  9. snowflake/ml/jobs/__init__.py +2 -0
  10. snowflake/ml/jobs/_utils/constants.py +1 -0
  11. snowflake/ml/jobs/_utils/payload_utils.py +38 -18
  12. snowflake/ml/jobs/_utils/query_helper.py +8 -1
  13. snowflake/ml/jobs/_utils/runtime_env_utils.py +117 -0
  14. snowflake/ml/jobs/_utils/stage_utils.py +2 -2
  15. snowflake/ml/jobs/_utils/types.py +22 -2
  16. snowflake/ml/jobs/job_definition.py +232 -0
  17. snowflake/ml/jobs/manager.py +16 -177
  18. snowflake/ml/model/__init__.py +4 -0
  19. snowflake/ml/model/_client/model/batch_inference_specs.py +38 -2
  20. snowflake/ml/model/_client/model/model_version_impl.py +120 -89
  21. snowflake/ml/model/_client/ops/model_ops.py +4 -26
  22. snowflake/ml/model/_client/ops/param_utils.py +124 -0
  23. snowflake/ml/model/_client/ops/service_ops.py +63 -23
  24. snowflake/ml/model/_client/service/model_deployment_spec.py +12 -5
  25. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -0
  26. snowflake/ml/model/_client/sql/service.py +25 -54
  27. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +21 -3
  28. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +21 -3
  29. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +21 -3
  30. snowflake/ml/model/_model_composer/model_method/model_method.py +3 -1
  31. snowflake/ml/model/_packager/model_handlers/huggingface.py +74 -10
  32. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +121 -29
  33. snowflake/ml/model/_signatures/utils.py +130 -0
  34. snowflake/ml/model/openai_signatures.py +97 -0
  35. snowflake/ml/registry/_manager/model_parameter_reconciler.py +1 -1
  36. snowflake/ml/version.py +1 -1
  37. {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/METADATA +105 -1
  38. {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/RECORD +41 -35
  39. {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/WHEEL +1 -1
  40. snowflake/ml/experiment/callback/__init__.py +0 -0
  41. {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/licenses/LICENSE.txt +0 -0
  42. {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,366 @@
1
+ """User-facing Feature builder for defining feature view features.
2
+
3
+ This module provides the Feature class for defining features in a FeatureView,
4
+ including both aggregated features (for tiled feature views) and non-aggregated
5
+ fields (for standard feature views).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any, Optional
11
+
12
+ from snowflake.ml.feature_store.aggregation import (
13
+ AggregationSpec,
14
+ AggregationType,
15
+ is_lifetime_window,
16
+ )
17
+
18
+
19
+ class Feature:
20
+ """Fluent builder for defining features in a FeatureView.
21
+
22
+ This class provides a user-friendly API for defining features with
23
+ time-series aggregations. It supports method chaining for setting
24
+ options like aliases.
25
+
26
+ Example::
27
+
28
+ >>> from snowflake.ml.feature_store import Feature
29
+ >>>
30
+ >>> # Define features with factory methods
31
+ >>> amount_sum = Feature.sum("amount", "24h").alias("total_amount_24h")
32
+ >>> recent_pages = Feature.last_n("page_id", "1h", n=10).alias("recent_pages")
33
+ >>> txn_count = Feature.count("transaction_id", "7d")
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ function: AggregationType,
39
+ column: str,
40
+ window: str,
41
+ offset: str = "0",
42
+ **params: Any,
43
+ ) -> None:
44
+ """Create a Feature with the specified aggregation.
45
+
46
+ Args:
47
+ function: The aggregation function type.
48
+ column: The source column to aggregate.
49
+ window: The lookback window (e.g., "24h", "7d").
50
+ offset: Offset to shift window into past (e.g., "1d" = [t-window-1d, t-1d]).
51
+ Must be a multiple of feature_granularity. Default is "0" (no offset).
52
+ **params: Additional parameters for the aggregation.
53
+ """
54
+ self._function = function
55
+ self._column = column
56
+ self._window = window
57
+ self._offset = offset
58
+ self._params = params
59
+ self._alias: Optional[str] = None
60
+
61
+ def alias(self, name: str, case_sensitive: bool = False) -> Feature:
62
+ """Set the output column name for this feature.
63
+
64
+ Args:
65
+ name: The output column name.
66
+ case_sensitive: If True, preserve the exact case of the name (will be quoted in SQL).
67
+ If False (default), the name will be converted to uppercase (Snowflake default).
68
+
69
+ Returns:
70
+ Self for method chaining.
71
+ """
72
+ # Store with quotes if case-sensitive, uppercase if case-insensitive
73
+ self._alias = f'"{name}"' if case_sensitive else name.upper()
74
+ return self
75
+
76
+ def to_spec(self) -> AggregationSpec:
77
+ """Convert to an AggregationSpec for internal use.
78
+
79
+ Returns:
80
+ The AggregationSpec representation.
81
+ """
82
+ output_column = self._alias if self._alias else self._default_output_name()
83
+ return AggregationSpec(
84
+ function=self._function,
85
+ source_column=self._column,
86
+ window=self._window,
87
+ output_column=output_column,
88
+ offset=self._offset,
89
+ params=self._params,
90
+ )
91
+
92
+ def _default_output_name(self) -> str:
93
+ """Generate a default output column name."""
94
+ if is_lifetime_window(self._window):
95
+ window_suffix = "lifetime"
96
+ else:
97
+ window_suffix = self._window.replace(" ", "").lower()
98
+ base_name = f"{self._column}_{self._function.value}_{window_suffix}"
99
+ return base_name.upper()
100
+
101
+ # Factory methods for creating features
102
+
103
+ @classmethod
104
+ def sum(cls, column: str, window: str, offset: str = "0") -> Feature:
105
+ """Create a SUM aggregation feature.
106
+
107
+ Args:
108
+ column: The column to sum.
109
+ window: The lookback window (e.g., "24h").
110
+ offset: Offset to shift window into past (e.g., "7d" = previous week).
111
+ Default is "0" (no offset).
112
+
113
+ Returns:
114
+ A Feature configured for SUM aggregation.
115
+
116
+ Example::
117
+
118
+ >>> amount_sum = Feature.sum("amount", "24h").alias("total_amount")
119
+ >>> prev_week_sum = Feature.sum("amount", "7d", offset="7d").alias("prev_week")
120
+ """
121
+ return cls(AggregationType.SUM, column, window, offset)
122
+
123
+ @classmethod
124
+ def count(cls, column: str, window: str, offset: str = "0") -> Feature:
125
+ """Create a COUNT aggregation feature.
126
+
127
+ Args:
128
+ column: The column to count.
129
+ window: The lookback window (e.g., "7d").
130
+ offset: Offset to shift window into past. Default is "0" (no offset).
131
+
132
+ Returns:
133
+ A Feature configured for COUNT aggregation.
134
+
135
+ Example::
136
+
137
+ >>> txn_count = Feature.count("transaction_id", "7d")
138
+ """
139
+ return cls(AggregationType.COUNT, column, window, offset)
140
+
141
+ @classmethod
142
+ def avg(cls, column: str, window: str, offset: str = "0") -> Feature:
143
+ """Create an AVG aggregation feature.
144
+
145
+ Args:
146
+ column: The column to average.
147
+ window: The lookback window.
148
+ offset: Offset to shift window into past. Default is "0" (no offset).
149
+
150
+ Returns:
151
+ A Feature configured for AVG aggregation.
152
+
153
+ Example::
154
+
155
+ >>> avg_amount = Feature.avg("amount", "24h")
156
+ """
157
+ return cls(AggregationType.AVG, column, window, offset)
158
+
159
+ @classmethod
160
+ def min(cls, column: str, window: str, offset: str = "0") -> Feature:
161
+ """Create a MIN aggregation feature.
162
+
163
+ Args:
164
+ column: The column to find minimum of.
165
+ window: The lookback window (e.g., "24h").
166
+ offset: Offset to shift window into past. Default is "0" (no offset).
167
+
168
+ Returns:
169
+ A Feature configured for MIN aggregation.
170
+
171
+ Example::
172
+
173
+ >>> min_price = Feature.min("price", "24h")
174
+ """
175
+ return cls(AggregationType.MIN, column, window, offset)
176
+
177
+ @classmethod
178
+ def max(cls, column: str, window: str, offset: str = "0") -> Feature:
179
+ """Create a MAX aggregation feature.
180
+
181
+ Args:
182
+ column: The column to find maximum of.
183
+ window: The lookback window (e.g., "24h").
184
+ offset: Offset to shift window into past. Default is "0" (no offset).
185
+
186
+ Returns:
187
+ A Feature configured for MAX aggregation.
188
+
189
+ Example::
190
+
191
+ >>> max_price = Feature.max("price", "24h")
192
+ """
193
+ return cls(AggregationType.MAX, column, window, offset)
194
+
195
+ @classmethod
196
+ def std(cls, column: str, window: str, offset: str = "0") -> Feature:
197
+ """Create a STD (standard deviation) aggregation feature.
198
+
199
+ Args:
200
+ column: The column to compute standard deviation for.
201
+ window: The lookback window.
202
+ offset: Offset to shift window into past. Default is "0" (no offset).
203
+
204
+ Returns:
205
+ A Feature configured for STD aggregation.
206
+
207
+ Example::
208
+
209
+ >>> price_std = Feature.std("price", "24h")
210
+ """
211
+ return cls(AggregationType.STD, column, window, offset)
212
+
213
+ @classmethod
214
+ def var(cls, column: str, window: str, offset: str = "0") -> Feature:
215
+ """Create a VAR (variance) aggregation feature.
216
+
217
+ Args:
218
+ column: The column to compute variance for.
219
+ window: The lookback window.
220
+ offset: Offset to shift window into past. Default is "0" (no offset).
221
+
222
+ Returns:
223
+ A Feature configured for VAR aggregation.
224
+
225
+ Example::
226
+
227
+ >>> price_var = Feature.var("price", "24h")
228
+ """
229
+ return cls(AggregationType.VAR, column, window, offset)
230
+
231
+ @classmethod
232
+ def last_n(cls, column: str, window: str, *, n: int, offset: str = "0") -> Feature:
233
+ """Create a LAST_N aggregation feature.
234
+
235
+ Collects the N most recent values within the window.
236
+
237
+ Args:
238
+ column: The column to collect values from.
239
+ window: The lookback window.
240
+ n: Number of values to collect.
241
+ offset: Offset to shift window into past. Default is "0" (no offset).
242
+
243
+ Returns:
244
+ A Feature configured for LAST_N aggregation.
245
+
246
+ Example::
247
+
248
+ >>> recent_pages = Feature.last_n("page_id", "1h", n=10)
249
+ """
250
+ return cls(AggregationType.LAST_N, column, window, offset, n=n)
251
+
252
+ @classmethod
253
+ def last_distinct_n(cls, column: str, window: str, *, n: int, offset: str = "0") -> Feature:
254
+ """Create a LAST_DISTINCT_N aggregation feature.
255
+
256
+ Collects the N most recent distinct values within the window.
257
+
258
+ Args:
259
+ column: The column to collect values from.
260
+ window: The lookback window.
261
+ n: Number of distinct values to collect.
262
+ offset: Offset to shift window into past. Default is "0" (no offset).
263
+
264
+ Returns:
265
+ A Feature configured for LAST_DISTINCT_N aggregation.
266
+
267
+ Example::
268
+
269
+ >>> recent_categories = Feature.last_distinct_n("category", "24h", n=5)
270
+ """
271
+ return cls(AggregationType.LAST_DISTINCT_N, column, window, offset, n=n)
272
+
273
+ @classmethod
274
+ def first_n(cls, column: str, window: str, *, n: int, offset: str = "0") -> Feature:
275
+ """Create a FIRST_N aggregation feature.
276
+
277
+ Collects the N oldest values within the window.
278
+
279
+ Args:
280
+ column: The column to collect values from.
281
+ window: The lookback window.
282
+ n: Number of values to collect.
283
+ offset: Offset to shift window into past. Default is "0" (no offset).
284
+
285
+ Returns:
286
+ A Feature configured for FIRST_N aggregation.
287
+
288
+ Example::
289
+
290
+ >>> first_pages = Feature.first_n("page_id", "1h", n=10)
291
+ """
292
+ return cls(AggregationType.FIRST_N, column, window, offset, n=n)
293
+
294
+ @classmethod
295
+ def first_distinct_n(cls, column: str, window: str, *, n: int, offset: str = "0") -> Feature:
296
+ """Create a FIRST_DISTINCT_N aggregation feature.
297
+
298
+ Collects the N oldest distinct values within the window.
299
+
300
+ Args:
301
+ column: The column to collect values from.
302
+ window: The lookback window.
303
+ n: Number of distinct values to collect.
304
+ offset: Offset to shift window into past. Default is "0" (no offset).
305
+
306
+ Returns:
307
+ A Feature configured for FIRST_DISTINCT_N aggregation.
308
+
309
+ Example::
310
+
311
+ >>> first_categories = Feature.first_distinct_n("category", "24h", n=5)
312
+ """
313
+ return cls(AggregationType.FIRST_DISTINCT_N, column, window, offset, n=n)
314
+
315
+ @classmethod
316
+ def approx_count_distinct(cls, column: str, window: str, offset: str = "0") -> Feature:
317
+ """Create an APPROX_COUNT_DISTINCT aggregation feature.
318
+
319
+ Estimates the number of distinct values using HyperLogLog algorithm.
320
+ This is approximate but highly efficient for large datasets.
321
+
322
+ Args:
323
+ column: The column to count distinct values.
324
+ window: The lookback window.
325
+ offset: Offset to shift window into past. Default is "0" (no offset).
326
+
327
+ Returns:
328
+ A Feature configured for APPROX_COUNT_DISTINCT aggregation.
329
+
330
+ Example::
331
+
332
+ >>> unique_users = Feature.approx_count_distinct("user_id", "24h")
333
+ """
334
+ return cls(AggregationType.APPROX_COUNT_DISTINCT, column, window, offset)
335
+
336
+ @classmethod
337
+ def approx_percentile(cls, column: str, window: str, *, percentile: float = 0.5, offset: str = "0") -> Feature:
338
+ """Create an APPROX_PERCENTILE aggregation feature.
339
+
340
+ Estimates the specified percentile using T-Digest algorithm.
341
+ This is approximate but highly efficient for large datasets.
342
+
343
+ Args:
344
+ column: The column to compute percentile for.
345
+ window: The lookback window.
346
+ percentile: The percentile to estimate (0.0 to 1.0). Default is 0.5 (median).
347
+ offset: Offset to shift window into past. Default is "0" (no offset).
348
+
349
+ Returns:
350
+ A Feature configured for APPROX_PERCENTILE aggregation.
351
+
352
+ Example::
353
+
354
+ >>> median_amount = Feature.approx_percentile("amount", "24h", percentile=0.5)
355
+ >>> p95_latency = Feature.approx_percentile("latency", "1h", percentile=0.95)
356
+ """
357
+ return cls(AggregationType.APPROX_PERCENTILE, column, window, offset, percentile=percentile)
358
+
359
+ def __repr__(self) -> str:
360
+ alias_str = f", alias='{self._alias}'" if self._alias else ""
361
+ params_str = f", params={self._params}" if self._params else ""
362
+ offset_str = f", offset='{self._offset}'" if self._offset != "0" else ""
363
+ return (
364
+ f"Feature({self._function.value}, column='{self._column}', "
365
+ f"window='{self._window}'{offset_str}{params_str}{alias_str})"
366
+ )