snowflake-ml-python 1.7.4__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. snowflake/cortex/_complete.py +58 -3
  2. snowflake/ml/_internal/env_utils.py +64 -21
  3. snowflake/ml/_internal/file_utils.py +18 -4
  4. snowflake/ml/_internal/platform_capabilities.py +3 -0
  5. snowflake/ml/_internal/relax_version_strategy.py +16 -0
  6. snowflake/ml/_internal/telemetry.py +25 -0
  7. snowflake/ml/data/_internal/arrow_ingestor.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +18 -0
  9. snowflake/ml/feature_store/feature_view.py +46 -1
  10. snowflake/ml/fileset/fileset.py +0 -1
  11. snowflake/ml/jobs/_utils/constants.py +31 -1
  12. snowflake/ml/jobs/_utils/payload_utils.py +232 -72
  13. snowflake/ml/jobs/_utils/spec_utils.py +78 -38
  14. snowflake/ml/jobs/decorators.py +8 -25
  15. snowflake/ml/jobs/job.py +4 -4
  16. snowflake/ml/jobs/manager.py +5 -0
  17. snowflake/ml/model/_client/model/model_version_impl.py +1 -1
  18. snowflake/ml/model/_client/ops/model_ops.py +107 -14
  19. snowflake/ml/model/_client/ops/service_ops.py +1 -1
  20. snowflake/ml/model/_client/service/model_deployment_spec.py +7 -3
  21. snowflake/ml/model/_client/sql/model_version.py +58 -0
  22. snowflake/ml/model/_client/sql/service.py +8 -2
  23. snowflake/ml/model/_model_composer/model_composer.py +50 -3
  24. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +4 -0
  25. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -1
  26. snowflake/ml/model/_model_composer/model_method/model_method.py +0 -1
  27. snowflake/ml/model/_packager/model_env/model_env.py +49 -29
  28. snowflake/ml/model/_packager/model_handlers/_utils.py +8 -4
  29. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +44 -24
  30. snowflake/ml/model/_packager/model_handlers/keras.py +226 -0
  31. snowflake/ml/model/_packager/model_handlers/pytorch.py +51 -20
  32. snowflake/ml/model/_packager/model_handlers/sklearn.py +25 -3
  33. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +73 -21
  34. snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -72
  35. snowflake/ml/model/_packager/model_handlers/torchscript.py +49 -20
  36. snowflake/ml/model/_packager/model_handlers/xgboost.py +2 -2
  37. snowflake/ml/model/_packager/model_handlers_migrator/pytorch_migrator_2023_12_01.py +20 -0
  38. snowflake/ml/model/_packager/model_handlers_migrator/tensorflow_migrator_2023_12_01.py +48 -0
  39. snowflake/ml/model/_packager/model_handlers_migrator/tensorflow_migrator_2025_01_01.py +19 -0
  40. snowflake/ml/model/_packager/model_handlers_migrator/torchscript_migrator_2023_12_01.py +20 -0
  41. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +0 -1
  42. snowflake/ml/model/_packager/model_meta/model_meta.py +6 -2
  43. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +16 -0
  44. snowflake/ml/model/_packager/model_packager.py +3 -5
  45. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -2
  46. snowflake/ml/model/_packager/model_runtime/model_runtime.py +8 -1
  47. snowflake/ml/model/_packager/model_task/model_task_utils.py +5 -1
  48. snowflake/ml/model/_signatures/builtins_handler.py +20 -9
  49. snowflake/ml/model/_signatures/core.py +54 -33
  50. snowflake/ml/model/_signatures/dmatrix_handler.py +98 -0
  51. snowflake/ml/model/_signatures/numpy_handler.py +12 -20
  52. snowflake/ml/model/_signatures/pandas_handler.py +28 -37
  53. snowflake/ml/model/_signatures/pytorch_handler.py +57 -41
  54. snowflake/ml/model/_signatures/snowpark_handler.py +0 -12
  55. snowflake/ml/model/_signatures/tensorflow_handler.py +61 -67
  56. snowflake/ml/model/_signatures/utils.py +120 -8
  57. snowflake/ml/model/custom_model.py +13 -4
  58. snowflake/ml/model/model_signature.py +39 -13
  59. snowflake/ml/model/type_hints.py +28 -2
  60. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +14 -1
  61. snowflake/ml/modeling/metrics/ranking.py +3 -0
  62. snowflake/ml/modeling/metrics/regression.py +3 -0
  63. snowflake/ml/modeling/pipeline/pipeline.py +18 -1
  64. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -1
  65. snowflake/ml/modeling/preprocessing/polynomial_features.py +2 -2
  66. snowflake/ml/registry/_manager/model_manager.py +55 -7
  67. snowflake/ml/registry/registry.py +52 -4
  68. snowflake/ml/version.py +1 -1
  69. {snowflake_ml_python-1.7.4.dist-info → snowflake_ml_python-1.8.0.dist-info}/METADATA +336 -27
  70. {snowflake_ml_python-1.7.4.dist-info → snowflake_ml_python-1.8.0.dist-info}/RECORD +73 -66
  71. {snowflake_ml_python-1.7.4.dist-info → snowflake_ml_python-1.8.0.dist-info}/WHEEL +1 -1
  72. {snowflake_ml_python-1.7.4.dist-info → snowflake_ml_python-1.8.0.dist-info/licenses}/LICENSE.txt +0 -0
  73. {snowflake_ml_python-1.7.4.dist-info → snowflake_ml_python-1.8.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  from collections import abc
2
- from typing import TYPE_CHECKING, List, Literal, Optional, Sequence, Union
2
+ from typing import TYPE_CHECKING, Literal, Optional, Sequence, Union
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -11,12 +11,62 @@ from snowflake.ml._internal.exceptions import (
11
11
  exceptions as snowml_exceptions,
12
12
  )
13
13
  from snowflake.ml.model import type_hints as model_types
14
- from snowflake.ml.model._signatures import base_handler, core
14
+ from snowflake.ml.model._signatures import base_handler, core, numpy_handler
15
15
 
16
16
  if TYPE_CHECKING:
17
17
  import tensorflow
18
18
 
19
19
 
20
+ class TensorflowTensorHandler(base_handler.BaseDataHandler[Union["tensorflow.Tensor", "tensorflow.Variable"]]):
21
+ @staticmethod
22
+ def can_handle(
23
+ data: model_types.SupportedDataType,
24
+ ) -> TypeGuard[Union["tensorflow.Tensor", "tensorflow.Variable"]]:
25
+ return type_utils.LazyType("tensorflow.Tensor").isinstance(data) or type_utils.LazyType(
26
+ "tensorflow.Variable"
27
+ ).isinstance(data)
28
+
29
+ @staticmethod
30
+ def count(data: Union["tensorflow.Tensor", "tensorflow.Variable"]) -> int:
31
+ return numpy_handler.NumpyArrayHandler.count(data.numpy())
32
+
33
+ @staticmethod
34
+ def truncate(
35
+ data: Union["tensorflow.Tensor", "tensorflow.Variable"], length: int
36
+ ) -> Union["tensorflow.Tensor", "tensorflow.Variable"]:
37
+ return data[: min(TensorflowTensorHandler.count(data), length)]
38
+
39
+ @staticmethod
40
+ def validate(data: Union["tensorflow.Tensor", "tensorflow.Variable"]) -> None:
41
+ numpy_handler.NumpyArrayHandler.validate(data.numpy())
42
+
43
+ @staticmethod
44
+ def infer_signature(
45
+ data: Union["tensorflow.Tensor", "tensorflow.Variable"], role: Literal["input", "output"]
46
+ ) -> Sequence[core.BaseFeatureSpec]:
47
+ return numpy_handler.NumpyArrayHandler.infer_signature(data.numpy(), role=role)
48
+
49
+ @staticmethod
50
+ def convert_to_df(
51
+ data: Union["tensorflow.Tensor", "tensorflow.Variable"], ensure_serializable: bool = True
52
+ ) -> pd.DataFrame:
53
+ return numpy_handler.NumpyArrayHandler.convert_to_df(data.numpy(), ensure_serializable=ensure_serializable)
54
+
55
+ @staticmethod
56
+ def convert_from_df(
57
+ df: pd.DataFrame, features: Optional[Sequence[core.BaseFeatureSpec]] = None
58
+ ) -> Union["tensorflow.Tensor", "tensorflow.Variable"]:
59
+ import tensorflow as tf
60
+
61
+ if features is None:
62
+ if any(dtype == np.dtype("O") for dtype in df.dtypes):
63
+ return tf.convert_to_tensor(np.array(df.to_numpy().tolist()))
64
+ return tf.convert_to_tensor(df.to_numpy())
65
+
66
+ assert isinstance(features[0], core.FeatureSpec)
67
+ return tf.convert_to_tensor(np.array(df.to_numpy().tolist()), dtype=features[0]._dtype._numpy_type)
68
+
69
+
20
70
  class SeqOfTensorflowTensorHandler(
21
71
  base_handler.BaseDataHandler[Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]]
22
72
  ):
@@ -28,35 +78,12 @@ class SeqOfTensorflowTensorHandler(
28
78
  return False
29
79
  if len(data) == 0:
30
80
  return False
31
- if type_utils.LazyType("tensorflow.Tensor").isinstance(data[0]) or type_utils.LazyType(
32
- "tensorflow.Variable"
33
- ).isinstance(data[0]):
34
- return all(
35
- type_utils.LazyType("tensorflow.Tensor").isinstance(data_col)
36
- or type_utils.LazyType("tensorflow.Variable").isinstance(data_col)
37
- for data_col in data
38
- )
39
- return False
81
+
82
+ return all(TensorflowTensorHandler.can_handle(data_col) for data_col in data)
40
83
 
41
84
  @staticmethod
42
85
  def count(data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]) -> int:
43
- import tensorflow as tf
44
-
45
- rows = []
46
- for data_col in data:
47
- shapes = data_col.shape.as_list()
48
- if data_col.shape == tf.TensorShape(None) or (not shapes) or (shapes[0] is None):
49
- # Unknown shape array
50
- raise snowml_exceptions.SnowflakeMLException(
51
- error_code=error_codes.INVALID_DATA,
52
- original_exception=ValueError("Data Validation Error: Unknown shape data is found."),
53
- )
54
- # Make mypy happy
55
- assert isinstance(shapes[0], int)
56
-
57
- rows.append(shapes[0])
58
-
59
- return min(rows)
86
+ return min(TensorflowTensorHandler.count(data_col) for data_col in data)
60
87
 
61
88
  @staticmethod
62
89
  def truncate(
@@ -66,49 +93,14 @@ class SeqOfTensorflowTensorHandler(
66
93
 
67
94
  @staticmethod
68
95
  def validate(data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]) -> None:
69
- import tensorflow as tf
70
-
71
96
  for data_col in data:
72
- if data_col.shape == tf.TensorShape(None) or any(dim is None for dim in data_col.shape.as_list()):
73
- # Unknown shape array
74
- raise snowml_exceptions.SnowflakeMLException(
75
- error_code=error_codes.INVALID_DATA,
76
- original_exception=ValueError("Data Validation Error: Unknown shape data is found."),
77
- )
78
-
79
- if data_col.shape == tf.TensorShape([0]):
80
- # Empty array
81
- raise snowml_exceptions.SnowflakeMLException(
82
- error_code=error_codes.INVALID_DATA,
83
- original_exception=ValueError("Data Validation Error: Empty data is found."),
84
- )
85
-
86
- if data_col.shape == tf.TensorShape([1]) or data_col.shape == tf.TensorShape([]):
87
- # scalar
88
- raise snowml_exceptions.SnowflakeMLException(
89
- error_code=error_codes.INVALID_DATA,
90
- original_exception=ValueError("Data Validation Error: Scalar data is found."),
91
- )
97
+ TensorflowTensorHandler.validate(data_col)
92
98
 
93
99
  @staticmethod
94
100
  def infer_signature(
95
101
  data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]], role: Literal["input", "output"]
96
102
  ) -> Sequence[core.BaseFeatureSpec]:
97
- feature_prefix = f"{SeqOfTensorflowTensorHandler.FEATURE_PREFIX}_"
98
- features: List[core.BaseFeatureSpec] = []
99
- role_prefix = (
100
- SeqOfTensorflowTensorHandler.INPUT_PREFIX if role == "input" else SeqOfTensorflowTensorHandler.OUTPUT_PREFIX
101
- ) + "_"
102
-
103
- for i, data_col in enumerate(data):
104
- dtype = core.DataType.from_numpy_type(data_col.dtype.as_numpy_dtype)
105
- ft_name = f"{role_prefix}{feature_prefix}{i}"
106
- if len(data_col.shape) == 1:
107
- features.append(core.FeatureSpec(dtype=dtype, name=ft_name, nullable=False))
108
- else:
109
- ft_shape = tuple(data_col.shape[1:])
110
- features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape, nullable=False))
111
- return features
103
+ return numpy_handler.SeqOfNumpyArrayHandler.infer_signature([data_col.numpy() for data_col in data], role=role)
112
104
 
113
105
  @staticmethod
114
106
  def convert_to_df(
@@ -129,8 +121,10 @@ class SeqOfTensorflowTensorHandler(
129
121
  for feature in features:
130
122
  if isinstance(feature, core.FeatureGroupSpec):
131
123
  raise snowml_exceptions.SnowflakeMLException(
132
- error_code=error_codes.NOT_IMPLEMENTED,
133
- original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
124
+ error_code=error_codes.INVALID_DATA_TYPE,
125
+ original_exception=NotImplementedError(
126
+ "FeatureGroupSpec is not supported when converting to Tensorflow tensor."
127
+ ),
134
128
  )
135
129
  assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
136
130
  res.append(
@@ -135,7 +135,16 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
135
135
  core.FeatureSpec(name="inputs", dtype=core.DataType.STRING),
136
136
  ],
137
137
  outputs=[
138
- core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
138
+ core.FeatureGroupSpec(
139
+ name="outputs",
140
+ specs=[
141
+ core.FeatureSpec(name="sequence", dtype=core.DataType.STRING),
142
+ core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
143
+ core.FeatureSpec(name="token", dtype=core.DataType.INT64),
144
+ core.FeatureSpec(name="token_str", dtype=core.DataType.STRING),
145
+ ],
146
+ shape=(-1,),
147
+ ),
139
148
  ],
140
149
  )
141
150
 
@@ -144,7 +153,18 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
144
153
  return core.ModelSignature(
145
154
  inputs=[core.FeatureSpec(name="inputs", dtype=core.DataType.STRING)],
146
155
  outputs=[
147
- core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
156
+ core.FeatureGroupSpec(
157
+ name="outputs",
158
+ specs=[
159
+ core.FeatureSpec(name="word", dtype=core.DataType.STRING),
160
+ core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
161
+ core.FeatureSpec(name="entity", dtype=core.DataType.STRING),
162
+ core.FeatureSpec(name="index", dtype=core.DataType.INT64),
163
+ core.FeatureSpec(name="start", dtype=core.DataType.INT64),
164
+ core.FeatureSpec(name="end", dtype=core.DataType.INT64),
165
+ ],
166
+ shape=(-1,),
167
+ ),
148
168
  ],
149
169
  )
150
170
 
@@ -171,7 +191,16 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
171
191
  core.FeatureSpec(name="context", dtype=core.DataType.STRING),
172
192
  ],
173
193
  outputs=[
174
- core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
194
+ core.FeatureGroupSpec(
195
+ name="answers",
196
+ specs=[
197
+ core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
198
+ core.FeatureSpec(name="start", dtype=core.DataType.INT64),
199
+ core.FeatureSpec(name="end", dtype=core.DataType.INT64),
200
+ core.FeatureSpec(name="answer", dtype=core.DataType.STRING),
201
+ ],
202
+ shape=(-1,),
203
+ ),
175
204
  ],
176
205
  )
177
206
 
@@ -216,17 +245,22 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
216
245
  return core.ModelSignature(
217
246
  inputs=[
218
247
  core.FeatureSpec(name="text", dtype=core.DataType.STRING),
219
- core.FeatureSpec(name="text_pair", dtype=core.DataType.STRING),
220
248
  ],
221
249
  outputs=[
222
- core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
250
+ core.FeatureGroupSpec(
251
+ name="labels",
252
+ specs=[
253
+ core.FeatureSpec(name="label", dtype=core.DataType.STRING),
254
+ core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
255
+ ],
256
+ shape=(-1,),
257
+ ),
223
258
  ],
224
259
  )
225
260
  # Else, return a dict per input
226
261
  return core.ModelSignature(
227
262
  inputs=[
228
263
  core.FeatureSpec(name="text", dtype=core.DataType.STRING),
229
- core.FeatureSpec(name="text_pair", dtype=core.DataType.STRING),
230
264
  ],
231
265
  outputs=[
232
266
  core.FeatureSpec(name="label", dtype=core.DataType.STRING),
@@ -243,9 +277,24 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
243
277
  )
244
278
  # Always generate a list of dict per input
245
279
  return core.ModelSignature(
246
- inputs=[core.FeatureSpec(name="inputs", dtype=core.DataType.STRING)],
280
+ inputs=[
281
+ core.FeatureGroupSpec(
282
+ name="inputs",
283
+ specs=[
284
+ core.FeatureSpec(name="role", dtype=core.DataType.STRING),
285
+ core.FeatureSpec(name="content", dtype=core.DataType.STRING),
286
+ ],
287
+ shape=(-1,),
288
+ ),
289
+ ],
247
290
  outputs=[
248
- core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
291
+ core.FeatureGroupSpec(
292
+ name="outputs",
293
+ specs=[
294
+ core.FeatureSpec(name="generated_text", dtype=core.DataType.STRING),
295
+ ],
296
+ shape=(-1,),
297
+ )
249
298
  ],
250
299
  )
251
300
 
@@ -300,3 +349,66 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
300
349
 
301
350
  def series_dropna(series: pd.Series) -> pd.Series:
302
351
  return series.dropna(inplace=False).reset_index(drop=True).convert_dtypes()
352
+
353
+
354
+ def infer_list(name: str, data: List[Any]) -> core.BaseFeatureSpec:
355
+ """Infer the feature specification from a list.
356
+
357
+ Args:
358
+ name: Feature name.
359
+ data: A list.
360
+
361
+ Raises:
362
+ SnowflakeMLException: ValueError: Raised when empty list is provided.
363
+
364
+ Returns:
365
+ A feature specification.
366
+ """
367
+ if not data:
368
+ raise snowml_exceptions.SnowflakeMLException(
369
+ error_code=error_codes.INVALID_DATA,
370
+ original_exception=ValueError("Data Validation Error: Empty list is found."),
371
+ )
372
+
373
+ if all(isinstance(value, dict) for value in data):
374
+ ft = infer_dict(name, data[0])
375
+ ft._name = name
376
+ ft._shape = (-1,)
377
+ return ft
378
+
379
+ arr = convert_list_to_ndarray(data)
380
+ arr_dtype = core.DataType.from_numpy_type(arr.dtype)
381
+
382
+ return core.FeatureSpec(name=name, dtype=arr_dtype, shape=arr.shape)
383
+
384
+
385
+ def infer_dict(name: str, data: Dict[str, Any]) -> core.FeatureGroupSpec:
386
+ """Infer the feature specification from a dictionary.
387
+
388
+ Args:
389
+ name: Feature name.
390
+ data: A dictionary.
391
+
392
+ Raises:
393
+ SnowflakeMLException: ValueError: Raised when empty dictionary is provided.
394
+ SnowflakeMLException: ValueError: Raised when empty list is found in the dictionary.
395
+
396
+ Returns:
397
+ A feature group specification.
398
+ """
399
+ if not data:
400
+ raise snowml_exceptions.SnowflakeMLException(
401
+ error_code=error_codes.INVALID_DATA,
402
+ original_exception=ValueError("Data Validation Error: Empty dictionary is found."),
403
+ )
404
+
405
+ specs = []
406
+ for key, value in data.items():
407
+ if isinstance(value, list):
408
+ specs.append(infer_list(key, value))
409
+ elif isinstance(value, dict):
410
+ specs.append(infer_dict(key, value))
411
+ else:
412
+ specs.append(core.FeatureSpec(name=key, dtype=core.DataType.from_numpy_type(np.array(value).dtype)))
413
+
414
+ return core.FeatureGroupSpec(name=name, specs=specs)
@@ -76,7 +76,7 @@ class ModelRef:
76
76
  def __getattr__(self, method_name: str) -> Any:
77
77
  if hasattr(self._model, method_name):
78
78
  return MethodRef(self, method_name)
79
- raise TypeError(f"Model is does not have {method_name}.")
79
+ raise AttributeError(f"Method {method_name} not found in model {self._name}.")
80
80
 
81
81
  def __getstate__(self) -> Dict[str, Any]:
82
82
  state = self.__dict__.copy()
@@ -94,7 +94,16 @@ class ModelRef:
94
94
 
95
95
  class ModelContext:
96
96
  """
97
- Context for a custom model showing paths to artifacts and mapping between model name and object reference.
97
+ Context for a custom model storing paths to file artifacts and model object references.
98
+
99
+ Keyword argument values can be string file paths or supported in-memory models. Paths and model references
100
+ can be accessed with dictionary access methods in the custom model.
101
+
102
+ For example, in a custom model with `context=ModelContext(my_file='my_file.pkl', my_model=my_model)`,
103
+ the filepath and model reference can be accessed with `self.context['my_file']` and `self.context['my_model']`
104
+ in the inference and init methods.
105
+
106
+ The use of `artifacts` and `model_refs` arguments is deprecated. Set keyword arguments directly instead.
98
107
 
99
108
  Attributes:
100
109
  artifacts: A dictionary mapping the name of the artifact to its path.
@@ -267,14 +276,14 @@ def _validate_predict_function(func: Callable[[model_types.CustomModelType, pd.D
267
276
 
268
277
 
269
278
  def inference_api(
270
- func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]
279
+ func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame],
271
280
  ) -> Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]:
272
281
  func.__dict__["_is_inference_api"] = True
273
282
  return func
274
283
 
275
284
 
276
285
  def partitioned_inference_api(
277
- func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]
286
+ func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame],
278
287
  ) -> Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]:
279
288
  func.__dict__["_is_inference_api"] = True
280
289
  func.__dict__["_is_partitioned_inference_api"] = True
@@ -21,6 +21,7 @@ from typing_extensions import Never
21
21
  import snowflake.snowpark
22
22
  import snowflake.snowpark.functions as F
23
23
  import snowflake.snowpark.types as spt
24
+ from snowflake.ml._internal import telemetry
24
25
  from snowflake.ml._internal.exceptions import (
25
26
  error_codes,
26
27
  exceptions as snowml_exceptions,
@@ -31,6 +32,7 @@ from snowflake.ml.model._signatures import (
31
32
  base_handler,
32
33
  builtins_handler as builtins_handler,
33
34
  core,
35
+ dmatrix_handler,
34
36
  numpy_handler,
35
37
  pandas_handler,
36
38
  pytorch_handler,
@@ -51,11 +53,17 @@ _LOCAL_DATA_HANDLERS: List[Type[base_handler.BaseDataHandler[Any]]] = [
51
53
  numpy_handler.NumpyArrayHandler,
52
54
  builtins_handler.ListOfBuiltinHandler,
53
55
  numpy_handler.SeqOfNumpyArrayHandler,
56
+ pytorch_handler.PyTorchTensorHandler,
54
57
  pytorch_handler.SeqOfPyTorchTensorHandler,
58
+ tensorflow_handler.TensorflowTensorHandler,
55
59
  tensorflow_handler.SeqOfTensorflowTensorHandler,
60
+ dmatrix_handler.XGBoostDMatrixHandler,
56
61
  ]
57
62
  _ALL_DATA_HANDLERS = _LOCAL_DATA_HANDLERS + [snowpark_handler.SnowparkDataFrameHandler]
58
63
 
64
+ _TELEMETRY_PROJECT = "MLOps"
65
+ _MODEL_TELEMETRY_SUBPROJECT = "ModelSignature"
66
+
59
67
 
60
68
  def _truncate_data(
61
69
  data: model_types.SupportedDataType,
@@ -214,7 +222,6 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
214
222
  strict: Enable strict validation, this includes value range based validation
215
223
 
216
224
  Raises:
217
- SnowflakeMLException: NotImplementedError: FeatureGroupSpec is not supported.
218
225
  SnowflakeMLException: ValueError: Raised when a feature cannot be found.
219
226
  SnowflakeMLException: ValueError: Raised when feature is scalar but confront list element.
220
227
  SnowflakeMLException: ValueError: Raised when feature type is not aligned in list element.
@@ -232,7 +239,10 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
232
239
  except KeyError:
233
240
  raise snowml_exceptions.SnowflakeMLException(
234
241
  error_code=error_codes.INVALID_DATA,
235
- original_exception=ValueError(f"Data Validation Error: feature {ft_name} does not exist in data."),
242
+ original_exception=ValueError(
243
+ f"Data Validation Error: feature {ft_name} does not exist in data. "
244
+ f"Available columns are {data.columns}."
245
+ ),
236
246
  )
237
247
 
238
248
  if data_col.isnull().any():
@@ -240,10 +250,15 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
240
250
  df_col_dtype = data_col.dtype
241
251
 
242
252
  if isinstance(feature, core.FeatureGroupSpec):
243
- raise snowml_exceptions.SnowflakeMLException(
244
- error_code=error_codes.NOT_IMPLEMENTED,
245
- original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
246
- )
253
+ if df_col_dtype != np.dtype("O"):
254
+ raise snowml_exceptions.SnowflakeMLException(
255
+ error_code=error_codes.INVALID_DATA,
256
+ original_exception=ValueError(
257
+ f"Data Validation Error in feature group {ft_name}: "
258
+ + f"It needs to be a dictionary or list of dictionary, but get {df_col_dtype}."
259
+ ),
260
+ )
261
+ continue
247
262
 
248
263
  assert isinstance(feature, core.FeatureSpec) # assert for mypy.
249
264
  ft_type = feature._dtype
@@ -433,7 +448,6 @@ def _validate_snowpark_data(
433
448
  strict: Enable strict validation, this includes value range based validation.
434
449
 
435
450
  Raises:
436
- SnowflakeMLException: NotImplementedError: FeatureGroupSpec is not supported.
437
451
  SnowflakeMLException: ValueError: Raised when confronting invalid feature.
438
452
  SnowflakeMLException: ValueError: Raised when a feature cannot be found.
439
453
 
@@ -463,10 +477,15 @@ def _validate_snowpark_data(
463
477
  if field.name == ft_name:
464
478
  found = True
465
479
  if isinstance(feature, core.FeatureGroupSpec):
466
- raise snowml_exceptions.SnowflakeMLException(
467
- error_code=error_codes.NOT_IMPLEMENTED,
468
- original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
469
- )
480
+ if not isinstance(field.datatype, (spt.ArrayType, spt.StructType, spt.VariantType)):
481
+ errors[identifier_rule].append(
482
+ ValueError(
483
+ f"Data Validation Error in feature group {feature.name}: "
484
+ + f"Feature expects {feature.as_snowpark_type()},"
485
+ + f" while {field.name} has type {field.datatype}."
486
+ ),
487
+ )
488
+ continue
470
489
  assert isinstance(feature, core.FeatureSpec) # mypy
471
490
  ft_type = feature._dtype
472
491
  field_data_type = field.datatype
@@ -640,11 +659,14 @@ def _validate_snowpark_type_feature(
640
659
  )
641
660
 
642
661
 
643
- def _convert_local_data_to_df(data: model_types.SupportedLocalDataType) -> pd.DataFrame:
662
+ def _convert_local_data_to_df(
663
+ data: model_types.SupportedLocalDataType, ensure_serializable: bool = False
664
+ ) -> pd.DataFrame:
644
665
  """Convert local data to pandas DataFrame or Snowpark DataFrame
645
666
 
646
667
  Args:
647
668
  data: The provided data.
669
+ ensure_serializable: Ensure the data is serializable. Defaults to False.
648
670
 
649
671
  Raises:
650
672
  SnowflakeMLException: NotImplementedError: Raised when data cannot be handled by any data handler.
@@ -656,7 +678,7 @@ def _convert_local_data_to_df(data: model_types.SupportedLocalDataType) -> pd.Da
656
678
  for handler in _LOCAL_DATA_HANDLERS:
657
679
  if handler.can_handle(data):
658
680
  handler.validate(data)
659
- df = handler.convert_to_df(data, ensure_serializable=False)
681
+ df = handler.convert_to_df(data, ensure_serializable=ensure_serializable)
660
682
  break
661
683
  if df is None:
662
684
  raise snowml_exceptions.SnowflakeMLException(
@@ -687,6 +709,10 @@ def _convert_and_validate_local_data(
687
709
  return df
688
710
 
689
711
 
712
+ @telemetry.send_api_usage_telemetry(
713
+ project=_TELEMETRY_PROJECT,
714
+ subproject=_MODEL_TELEMETRY_SUBPROJECT,
715
+ )
690
716
  def infer_signature(
691
717
  input_data: model_types.SupportedLocalDataType,
692
718
  output_data: model_types.SupportedLocalDataType,
@@ -7,6 +7,7 @@ from typing_extensions import NotRequired
7
7
 
8
8
  if TYPE_CHECKING:
9
9
  import catboost
10
+ import keras
10
11
  import lightgbm
11
12
  import mlflow
12
13
  import numpy as np
@@ -25,7 +26,15 @@ if TYPE_CHECKING:
25
26
  from snowflake.ml.modeling.framework import base # noqa: F401
26
27
 
27
28
 
28
- _SupportedBuiltins = Union[int, float, bool, str, bytes, "_SupportedBuiltinsList"]
29
+ _SupportedBuiltins = Union[
30
+ int,
31
+ float,
32
+ bool,
33
+ str,
34
+ bytes,
35
+ Dict[str, Union["_SupportedBuiltins", "_SupportedBuiltinsList"]],
36
+ "_SupportedBuiltinsList",
37
+ ]
29
38
  _SupportedNumpyDtype = Union[
30
39
  "np.int8",
31
40
  "np.int16",
@@ -47,7 +56,7 @@ _SupportedBuiltinsList = Sequence[_SupportedBuiltins]
47
56
  _SupportedArrayLike = Union[_SupportedNumpyArray, "torch.Tensor", "tensorflow.Tensor", "tensorflow.Variable"]
48
57
 
49
58
  SupportedLocalDataType = Union[
50
- "pd.DataFrame", _SupportedNumpyArray, Sequence[_SupportedArrayLike], _SupportedBuiltinsList
59
+ "pd.DataFrame", _SupportedArrayLike, Sequence[_SupportedArrayLike], _SupportedBuiltinsList
51
60
  ]
52
61
 
53
62
  SupportedDataType = Union[SupportedLocalDataType, "snowflake.snowpark.DataFrame"]
@@ -68,6 +77,7 @@ SupportedRequireSignatureModelType = Union[
68
77
  "torch.nn.Module",
69
78
  "torch.jit.ScriptModule",
70
79
  "tensorflow.Module",
80
+ "keras.Model",
71
81
  ]
72
82
 
73
83
  SupportedNoSignatureRequirementsModelType = Union[
@@ -103,6 +113,7 @@ Here is all acceptable types of Snowflake native model packaging and its handler
103
113
  | transformers.Pipeline | huggingface_pipeline.py | _HuggingFacePipelineHandler |
104
114
  | huggingface_pipeline.HuggingFacePipelineModel | huggingface_pipeline.py | _HuggingFacePipelineHandler |
105
115
  | sentence_transformers.SentenceTransformer | sentence_transformers.py | _SentenceTransformerHandler |
116
+ | keras.Model | keras.py | _KerasHandler |
106
117
  """
107
118
 
108
119
  SupportedModelHandlerType = Literal[
@@ -118,6 +129,7 @@ SupportedModelHandlerType = Literal[
118
129
  "tensorflow",
119
130
  "torchscript",
120
131
  "xgboost",
132
+ "keras",
121
133
  ]
122
134
 
123
135
  _ModelType = TypeVar("_ModelType", bound=SupportedModelType)
@@ -173,16 +185,19 @@ class SNOWModelSaveOptions(BaseModelSaveOption):
173
185
  class PyTorchSaveOptions(BaseModelSaveOption):
174
186
  target_methods: NotRequired[Sequence[str]]
175
187
  cuda_version: NotRequired[str]
188
+ multiple_inputs: NotRequired[bool]
176
189
 
177
190
 
178
191
  class TorchScriptSaveOptions(BaseModelSaveOption):
179
192
  target_methods: NotRequired[Sequence[str]]
180
193
  cuda_version: NotRequired[str]
194
+ multiple_inputs: NotRequired[bool]
181
195
 
182
196
 
183
197
  class TensorflowSaveOptions(BaseModelSaveOption):
184
198
  target_methods: NotRequired[Sequence[str]]
185
199
  cuda_version: NotRequired[str]
200
+ multiple_inputs: NotRequired[bool]
186
201
 
187
202
 
188
203
  class MLFlowSaveOptions(BaseModelSaveOption):
@@ -202,6 +217,11 @@ class SentenceTransformersSaveOptions(BaseModelSaveOption):
202
217
  batch_size: NotRequired[int]
203
218
 
204
219
 
220
+ class KerasSaveOptions(BaseModelSaveOption):
221
+ target_methods: NotRequired[Sequence[str]]
222
+ cuda_version: NotRequired[str]
223
+
224
+
205
225
  ModelSaveOption = Union[
206
226
  BaseModelSaveOption,
207
227
  CatBoostModelSaveOptions,
@@ -216,6 +236,7 @@ ModelSaveOption = Union[
216
236
  MLFlowSaveOptions,
217
237
  HuggingFaceSaveOptions,
218
238
  SentenceTransformersSaveOptions,
239
+ KerasSaveOptions,
219
240
  ]
220
241
 
221
242
 
@@ -276,6 +297,10 @@ class SentenceTransformersLoadOptions(BaseModelLoadOption):
276
297
  device: NotRequired[str]
277
298
 
278
299
 
300
+ class KerasLoadOptions(BaseModelLoadOption):
301
+ use_gpu: NotRequired[bool]
302
+
303
+
279
304
  ModelLoadOption = Union[
280
305
  BaseModelLoadOption,
281
306
  CatBoostModelLoadOptions,
@@ -290,6 +315,7 @@ ModelLoadOption = Union[
290
315
  MLFlowLoadOptions,
291
316
  HuggingFaceLoadOptions,
292
317
  SentenceTransformersLoadOptions,
318
+ KerasLoadOptions,
293
319
  ]
294
320
 
295
321
 
@@ -199,8 +199,21 @@ class SnowparkTransformHandlers:
199
199
  if expected_output_cols_type == "":
200
200
  expected_output_cols_type = "string"
201
201
  assert expected_output_cols_type is not None
202
+
203
+ # If there is only one output column, the UDF might have generate complex objects (lists, dicts).
204
+ # In such cases, we attempt to not do explicit cast. (Example: PolynomialFeatures.transform)
205
+ try_parse_object = len(expected_output_cols) == 1 and expected_output_cols_type != "string"
202
206
  for output_feature in expected_output_cols:
203
- output_cols.append(F.col(INTERMEDIATE_OBJ_NAME)[output_feature].astype(expected_output_cols_type))
207
+ column_expr = F.col(INTERMEDIATE_OBJ_NAME)[output_feature]
208
+
209
+ if try_parse_object and df_res.count() > 0:
210
+ # Only do type casting if it's not an array
211
+ if not df_res.select(F.is_array(column_expr)).first()[0]:
212
+ column_expr = column_expr.astype(expected_output_cols_type)
213
+ else:
214
+ column_expr = column_expr.astype(expected_output_cols_type)
215
+
216
+ output_cols.append(column_expr)
204
217
  output_col_names.append(identifier.get_inferred_name(output_feature))
205
218
 
206
219
  # Extract output from INTERMEDIATE_OBJ_NAME and drop that column