snowflake-ml-python 1.22.0__py3-none-any.whl → 1.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. snowflake/ml/_internal/platform_capabilities.py +0 -4
  2. snowflake/ml/feature_store/__init__.py +2 -0
  3. snowflake/ml/feature_store/aggregation.py +367 -0
  4. snowflake/ml/feature_store/feature.py +366 -0
  5. snowflake/ml/feature_store/feature_store.py +234 -20
  6. snowflake/ml/feature_store/feature_view.py +189 -4
  7. snowflake/ml/feature_store/metadata_manager.py +425 -0
  8. snowflake/ml/feature_store/tile_sql_generator.py +1079 -0
  9. snowflake/ml/jobs/__init__.py +2 -0
  10. snowflake/ml/jobs/_utils/constants.py +1 -0
  11. snowflake/ml/jobs/_utils/payload_utils.py +38 -18
  12. snowflake/ml/jobs/_utils/query_helper.py +8 -1
  13. snowflake/ml/jobs/_utils/runtime_env_utils.py +117 -0
  14. snowflake/ml/jobs/_utils/stage_utils.py +2 -2
  15. snowflake/ml/jobs/_utils/types.py +22 -2
  16. snowflake/ml/jobs/job_definition.py +232 -0
  17. snowflake/ml/jobs/manager.py +16 -177
  18. snowflake/ml/model/__init__.py +4 -0
  19. snowflake/ml/model/_client/model/batch_inference_specs.py +38 -2
  20. snowflake/ml/model/_client/model/model_version_impl.py +120 -89
  21. snowflake/ml/model/_client/ops/model_ops.py +4 -26
  22. snowflake/ml/model/_client/ops/param_utils.py +124 -0
  23. snowflake/ml/model/_client/ops/service_ops.py +63 -23
  24. snowflake/ml/model/_client/service/model_deployment_spec.py +12 -5
  25. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -0
  26. snowflake/ml/model/_client/sql/service.py +25 -54
  27. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +21 -3
  28. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +21 -3
  29. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +21 -3
  30. snowflake/ml/model/_model_composer/model_method/model_method.py +3 -1
  31. snowflake/ml/model/_packager/model_handlers/huggingface.py +74 -10
  32. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +121 -29
  33. snowflake/ml/model/_signatures/utils.py +130 -0
  34. snowflake/ml/model/openai_signatures.py +97 -0
  35. snowflake/ml/registry/_manager/model_parameter_reconciler.py +1 -1
  36. snowflake/ml/version.py +1 -1
  37. {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/METADATA +105 -1
  38. {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/RECORD +41 -35
  39. {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/WHEEL +1 -1
  40. snowflake/ml/experiment/callback/__init__.py +0 -0
  41. {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/licenses/LICENSE.txt +0 -0
  42. {snowflake_ml_python-1.22.0.dist-info → snowflake_ml_python-1.24.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,425 @@
1
+ """Metadata manager for Feature Store internal metadata table.
2
+
3
+ This module provides a centralized class for managing the internal metadata table
4
+ used by Feature Store to store configuration that doesn't fit in Snowflake object
5
+ properties (tags, comments, etc.).
6
+
7
+ Currently used for:
8
+ - Feature specifications for tiled feature views
9
+ - Feature descriptions for tiled feature views (since tile columns differ from output columns)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ from dataclasses import dataclass
16
+ from enum import Enum
17
+ from typing import TYPE_CHECKING, Any, Optional
18
+
19
+ from snowflake.ml.feature_store.aggregation import AggregationSpec
20
+
21
+ if TYPE_CHECKING:
22
+ from snowflake.snowpark import Session
23
+
24
+
25
+ # Table and column names
26
+ _METADATA_TABLE_NAME = "_FEATURE_STORE_METADATA"
27
+ _METADATA_TABLE_COMMENT = (
28
+ "Internal metadata table for Feature Store. " "DO NOT modify directly - used for Feature Store internal operations."
29
+ )
30
+
31
+
32
+ class MetadataObjectType(str, Enum):
33
+ """Types of objects that can have metadata stored."""
34
+
35
+ FEATURE_VIEW = "FEATURE_VIEW"
36
+
37
+
38
+ class MetadataType(str, Enum):
39
+ """Types of metadata that can be stored."""
40
+
41
+ FEATURE_SPECS = "FEATURE_SPECS"
42
+ FEATURE_DESCS = "FEATURE_DESCS"
43
+
44
+
45
+ @dataclass
46
+ class AggregationMetadata:
47
+ """Aggregation configuration for tiled feature views."""
48
+
49
+ feature_granularity: str
50
+ features: list[AggregationSpec]
51
+
52
+ def to_dict(self) -> dict[str, Any]:
53
+ """Convert to dictionary for JSON serialization."""
54
+ return {
55
+ "feature_granularity": self.feature_granularity,
56
+ "features": [f.to_dict() for f in self.features],
57
+ }
58
+
59
+ @classmethod
60
+ def from_dict(cls, data: dict[str, Any]) -> AggregationMetadata:
61
+ """Create from dictionary."""
62
+ return cls(
63
+ feature_granularity=data["feature_granularity"],
64
+ features=[AggregationSpec.from_dict(f) for f in data["features"]],
65
+ )
66
+
67
+
68
+ class FeatureStoreMetadataManager:
69
+ """Manages the internal metadata table for Feature Store objects.
70
+
71
+ This class encapsulates all operations on the _FEATURE_STORE_METADATA table,
72
+ providing typed APIs for reading and writing different types of metadata.
73
+
74
+ The metadata table schema:
75
+ - OBJECT_TYPE: Type of object (e.g., 'FEATURE_VIEW')
76
+ - OBJECT_NAME: Name of the object
77
+ - VERSION: Version of the object (nullable for non-versioned objects)
78
+ - METADATA_TYPE: Type of metadata (e.g., 'FEATURE_SPECS', 'FEATURE_DESCS')
79
+ - METADATA: VARIANT column containing the actual metadata as JSON
80
+ - CREATED_AT: Timestamp when the entry was created
81
+ - UPDATED_AT: Timestamp when the entry was last updated
82
+ """
83
+
84
+ def __init__(
85
+ self,
86
+ session: Session,
87
+ schema_path: str,
88
+ fs_object_tag_path: str,
89
+ telemetry_stmp: dict[str, Any],
90
+ ) -> None:
91
+ """Initialize the metadata manager.
92
+
93
+ Args:
94
+ session: Snowpark session.
95
+ schema_path: Fully qualified schema path (e.g., "DB.SCHEMA").
96
+ fs_object_tag_path: Fully qualified path to the feature store object tag.
97
+ telemetry_stmp: Telemetry statement parameters.
98
+ """
99
+ self._session = session
100
+ self._schema_path = schema_path
101
+ self._fs_object_tag_path = fs_object_tag_path
102
+ self._table_path = f"{schema_path}.{_METADATA_TABLE_NAME}"
103
+ self._telemetry_stmp = telemetry_stmp
104
+ self._table_exists: Optional[bool] = None
105
+
106
+ def ensure_table_exists(self) -> None:
107
+ """Create the metadata table if it doesn't exist.
108
+
109
+ This method is idempotent and safe to call multiple times.
110
+ The table is tagged as a feature store object and has a description
111
+ indicating it's for internal use only.
112
+ """
113
+ if self._table_exists:
114
+ return
115
+
116
+ # Create the table
117
+ self._session.sql(
118
+ f"""
119
+ CREATE TABLE IF NOT EXISTS {self._table_path} (
120
+ OBJECT_TYPE VARCHAR(50) NOT NULL,
121
+ OBJECT_NAME VARCHAR(256) NOT NULL,
122
+ VERSION VARCHAR(128) NOT NULL,
123
+ METADATA_TYPE VARCHAR(50) NOT NULL,
124
+ METADATA VARIANT NOT NULL,
125
+ CREATED_AT TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
126
+ UPDATED_AT TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
127
+ PRIMARY KEY (OBJECT_TYPE, OBJECT_NAME, VERSION, METADATA_TYPE)
128
+ )
129
+ COMMENT = '{_METADATA_TABLE_COMMENT}'
130
+ """
131
+ ).collect(statement_params=self._telemetry_stmp)
132
+
133
+ # Add feature store object tag to identify this as an internal FS object
134
+ # Import here to avoid circular dependency
135
+ import snowflake.ml.version as snowml_version
136
+ from snowflake.ml.feature_store.feature_store import (
137
+ _FeatureStoreObjInfo,
138
+ _FeatureStoreObjTypes,
139
+ )
140
+
141
+ obj_info = _FeatureStoreObjInfo(_FeatureStoreObjTypes.INTERNAL_METADATA_TABLE, snowml_version.VERSION)
142
+ self._session.sql(
143
+ f"""
144
+ ALTER TABLE {self._table_path}
145
+ SET TAG {self._fs_object_tag_path} = '{obj_info.to_json()}'
146
+ """
147
+ ).collect(statement_params=self._telemetry_stmp)
148
+
149
+ self._table_exists = True
150
+
151
+ # =========================================================================
152
+ # Feature Specs
153
+ # =========================================================================
154
+
155
+ def save_feature_specs(
156
+ self,
157
+ fv_name: str,
158
+ version: str,
159
+ metadata: AggregationMetadata,
160
+ ) -> None:
161
+ """Save feature specifications for a tiled feature view.
162
+
163
+ Args:
164
+ fv_name: Feature view name.
165
+ version: Feature view version.
166
+ metadata: Aggregation metadata to save.
167
+ """
168
+ self.ensure_table_exists()
169
+ self._upsert_metadata(
170
+ object_type=MetadataObjectType.FEATURE_VIEW,
171
+ object_name=fv_name,
172
+ version=version,
173
+ metadata_type=MetadataType.FEATURE_SPECS,
174
+ metadata=metadata.to_dict(),
175
+ )
176
+
177
+ def get_feature_specs(
178
+ self,
179
+ fv_name: str,
180
+ version: str,
181
+ ) -> Optional[AggregationMetadata]:
182
+ """Get feature specifications for a tiled feature view.
183
+
184
+ Args:
185
+ fv_name: Feature view name.
186
+ version: Feature view version.
187
+
188
+ Returns:
189
+ AggregationMetadata if found, None otherwise.
190
+ """
191
+ data = self._get_metadata(
192
+ object_type=MetadataObjectType.FEATURE_VIEW,
193
+ object_name=fv_name,
194
+ version=version,
195
+ metadata_type=MetadataType.FEATURE_SPECS,
196
+ )
197
+ if data is None:
198
+ return None
199
+ return AggregationMetadata.from_dict(data)
200
+
201
+ # =========================================================================
202
+ # Feature Descriptions
203
+ # =========================================================================
204
+
205
+ def save_feature_descs(
206
+ self,
207
+ fv_name: str,
208
+ version: str,
209
+ descs: dict[str, str],
210
+ ) -> None:
211
+ """Save feature descriptions for a tiled feature view.
212
+
213
+ Args:
214
+ fv_name: Feature view name.
215
+ version: Feature view version.
216
+ descs: Dictionary mapping output column names to descriptions.
217
+ """
218
+ if not descs:
219
+ return # Don't save empty descriptions
220
+
221
+ self.ensure_table_exists()
222
+ self._upsert_metadata(
223
+ object_type=MetadataObjectType.FEATURE_VIEW,
224
+ object_name=fv_name,
225
+ version=version,
226
+ metadata_type=MetadataType.FEATURE_DESCS,
227
+ metadata=descs,
228
+ )
229
+
230
+ def save_feature_view_metadata(
231
+ self,
232
+ fv_name: str,
233
+ version: str,
234
+ specs: AggregationMetadata,
235
+ descs: Optional[dict[str, str]] = None,
236
+ ) -> None:
237
+ """Save all metadata for a tiled feature view atomically.
238
+
239
+ This method saves both feature specs and descriptions in a single
240
+ INSERT statement for atomicity during creation.
241
+
242
+ Args:
243
+ fv_name: Feature view name.
244
+ version: Feature view version.
245
+ specs: Aggregation metadata (required).
246
+ descs: Optional dictionary of feature descriptions.
247
+ """
248
+ self.ensure_table_exists()
249
+
250
+ # Normalize name: strip quotes and uppercase for consistent storage
251
+ normalized_name = fv_name.strip('"').upper()
252
+ specs_json = json.dumps(specs.to_dict())
253
+
254
+ # Build SELECT statements for atomic insert (PARSE_JSON can't be used in VALUES)
255
+ selects = [
256
+ f"SELECT '{MetadataObjectType.FEATURE_VIEW.value}', '{normalized_name}', "
257
+ f"'{version}', '{MetadataType.FEATURE_SPECS.value}', PARSE_JSON($${specs_json}$$)"
258
+ ]
259
+
260
+ if descs:
261
+ descs_json = json.dumps(descs)
262
+ selects.append(
263
+ f"SELECT '{MetadataObjectType.FEATURE_VIEW.value}', '{normalized_name}', "
264
+ f"'{version}', '{MetadataType.FEATURE_DESCS.value}', PARSE_JSON($${descs_json}$$)"
265
+ )
266
+
267
+ union_query = " UNION ALL ".join(selects)
268
+
269
+ self._session.sql(
270
+ f"""
271
+ INSERT INTO {self._table_path}
272
+ (OBJECT_TYPE, OBJECT_NAME, VERSION, METADATA_TYPE, METADATA)
273
+ {union_query}
274
+ """
275
+ ).collect(statement_params=self._telemetry_stmp)
276
+
277
+ def get_feature_descs(
278
+ self,
279
+ fv_name: str,
280
+ version: str,
281
+ ) -> Optional[dict[str, str]]:
282
+ """Get feature descriptions for a tiled feature view.
283
+
284
+ Args:
285
+ fv_name: Feature view name.
286
+ version: Feature view version.
287
+
288
+ Returns:
289
+ Dictionary of feature descriptions if found, None otherwise.
290
+ """
291
+ data = self._get_metadata(
292
+ object_type=MetadataObjectType.FEATURE_VIEW,
293
+ object_name=fv_name,
294
+ version=version,
295
+ metadata_type=MetadataType.FEATURE_DESCS,
296
+ )
297
+ return data
298
+
299
+ # =========================================================================
300
+ # Cleanup
301
+ # =========================================================================
302
+
303
+ def delete_feature_view_metadata(
304
+ self,
305
+ fv_name: str,
306
+ version: str,
307
+ ) -> None:
308
+ """Delete all metadata entries for a feature view.
309
+
310
+ Args:
311
+ fv_name: Feature view name.
312
+ version: Feature view version.
313
+ """
314
+ # Check if table exists before trying to delete
315
+ if not self._check_table_exists():
316
+ return
317
+
318
+ # Normalize name: strip quotes and uppercase for consistent lookup
319
+ normalized_name = fv_name.strip('"').upper()
320
+
321
+ self._session.sql(
322
+ f"""
323
+ DELETE FROM {self._table_path}
324
+ WHERE OBJECT_TYPE = '{MetadataObjectType.FEATURE_VIEW.value}'
325
+ AND OBJECT_NAME = '{normalized_name}'
326
+ AND VERSION = '{version}'
327
+ """
328
+ ).collect(statement_params=self._telemetry_stmp)
329
+
330
+ # =========================================================================
331
+ # Private helpers
332
+ # =========================================================================
333
+
334
+ def _check_table_exists(self) -> bool:
335
+ """Check if the metadata table exists."""
336
+ if self._table_exists is not None:
337
+ return self._table_exists
338
+
339
+ result = self._session.sql(
340
+ f"""
341
+ SELECT COUNT(*) as cnt
342
+ FROM INFORMATION_SCHEMA.TABLES
343
+ WHERE TABLE_SCHEMA = '{self._schema_path.split('.')[1]}'
344
+ AND TABLE_NAME = '{_METADATA_TABLE_NAME}'
345
+ """
346
+ ).collect(statement_params=self._telemetry_stmp)
347
+
348
+ self._table_exists = result[0]["CNT"] > 0
349
+ return self._table_exists
350
+
351
+ def _upsert_metadata(
352
+ self,
353
+ object_type: MetadataObjectType,
354
+ object_name: str,
355
+ version: str,
356
+ metadata_type: MetadataType,
357
+ metadata: dict[str, Any],
358
+ ) -> None:
359
+ """Insert or update a metadata entry."""
360
+ metadata_json = json.dumps(metadata)
361
+ # Normalize name: strip quotes and uppercase for consistent storage
362
+ normalized_name = object_name.strip('"').upper()
363
+
364
+ self._session.sql(
365
+ f"""
366
+ MERGE INTO {self._table_path} AS target
367
+ USING (
368
+ SELECT
369
+ '{object_type.value}' AS OBJECT_TYPE,
370
+ '{normalized_name}' AS OBJECT_NAME,
371
+ '{version}' AS VERSION,
372
+ '{metadata_type.value}' AS METADATA_TYPE,
373
+ PARSE_JSON($${metadata_json}$$) AS METADATA
374
+ ) AS source
375
+ ON target.OBJECT_TYPE = source.OBJECT_TYPE
376
+ AND target.OBJECT_NAME = source.OBJECT_NAME
377
+ AND target.VERSION = source.VERSION
378
+ AND target.METADATA_TYPE = source.METADATA_TYPE
379
+ WHEN MATCHED THEN UPDATE SET
380
+ METADATA = source.METADATA,
381
+ UPDATED_AT = CURRENT_TIMESTAMP()
382
+ WHEN NOT MATCHED THEN INSERT (
383
+ OBJECT_TYPE, OBJECT_NAME, VERSION, METADATA_TYPE, METADATA
384
+ ) VALUES (
385
+ source.OBJECT_TYPE, source.OBJECT_NAME,
386
+ source.VERSION, source.METADATA_TYPE, source.METADATA
387
+ )
388
+ """
389
+ ).collect(statement_params=self._telemetry_stmp)
390
+
391
+ def _get_metadata(
392
+ self,
393
+ object_type: MetadataObjectType,
394
+ object_name: str,
395
+ version: str,
396
+ metadata_type: MetadataType,
397
+ ) -> Optional[dict[str, Any]]:
398
+ """Get a metadata entry."""
399
+ # Check if table exists before querying
400
+ if not self._check_table_exists():
401
+ return None
402
+
403
+ # Normalize name: strip quotes and uppercase for consistent lookup
404
+ normalized_name = object_name.strip('"').upper()
405
+
406
+ result = self._session.sql(
407
+ f"""
408
+ SELECT METADATA
409
+ FROM {self._table_path}
410
+ WHERE OBJECT_TYPE = '{object_type.value}'
411
+ AND OBJECT_NAME = '{normalized_name}'
412
+ AND VERSION = '{version}'
413
+ AND METADATA_TYPE = '{metadata_type.value}'
414
+ """
415
+ ).collect(statement_params=self._telemetry_stmp)
416
+
417
+ if not result:
418
+ return None
419
+
420
+ metadata_value = result[0]["METADATA"]
421
+ # Handle both string and dict responses from Snowflake
422
+ if isinstance(metadata_value, str):
423
+ result_dict: dict[str, Any] = json.loads(metadata_value)
424
+ return result_dict
425
+ return dict(metadata_value)