snowflake-ml-python 1.7.5__py3-none-any.whl → 1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. snowflake/cortex/_complete.py +58 -3
  2. snowflake/ml/_internal/file_utils.py +18 -4
  3. snowflake/ml/_internal/platform_capabilities.py +3 -0
  4. snowflake/ml/_internal/telemetry.py +4 -0
  5. snowflake/ml/fileset/fileset.py +0 -1
  6. snowflake/ml/jobs/_utils/constants.py +25 -1
  7. snowflake/ml/jobs/_utils/payload_utils.py +94 -20
  8. snowflake/ml/jobs/_utils/spec_utils.py +95 -31
  9. snowflake/ml/jobs/decorators.py +7 -0
  10. snowflake/ml/jobs/manager.py +20 -0
  11. snowflake/ml/model/_client/model/model_version_impl.py +1 -1
  12. snowflake/ml/model/_client/ops/model_ops.py +113 -17
  13. snowflake/ml/model/_client/ops/service_ops.py +16 -5
  14. snowflake/ml/model/_client/service/model_deployment_spec.py +7 -3
  15. snowflake/ml/model/_client/sql/model_version.py +58 -0
  16. snowflake/ml/model/_client/sql/service.py +10 -2
  17. snowflake/ml/model/_model_composer/model_composer.py +50 -3
  18. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +5 -2
  19. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -1
  20. snowflake/ml/model/_model_composer/model_method/model_method.py +0 -1
  21. snowflake/ml/model/_packager/model_env/model_env.py +4 -1
  22. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +28 -24
  23. snowflake/ml/model/_packager/model_handlers/keras.py +1 -5
  24. snowflake/ml/model/_packager/model_handlers/pytorch.py +50 -20
  25. snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -8
  26. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +1 -2
  27. snowflake/ml/model/_packager/model_handlers/tensorflow.py +46 -26
  28. snowflake/ml/model/_packager/model_handlers/torchscript.py +49 -20
  29. snowflake/ml/model/_packager/model_handlers/xgboost.py +2 -2
  30. snowflake/ml/model/_packager/model_handlers_migrator/pytorch_migrator_2023_12_01.py +20 -0
  31. snowflake/ml/model/_packager/model_handlers_migrator/tensorflow_migrator_2025_01_01.py +19 -0
  32. snowflake/ml/model/_packager/model_handlers_migrator/torchscript_migrator_2023_12_01.py +20 -0
  33. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +1 -2
  34. snowflake/ml/model/_packager/model_meta/model_meta.py +5 -1
  35. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +14 -0
  36. snowflake/ml/model/_packager/model_packager.py +3 -5
  37. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -2
  38. snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -0
  39. snowflake/ml/model/_signatures/builtins_handler.py +20 -9
  40. snowflake/ml/model/_signatures/core.py +52 -31
  41. snowflake/ml/model/_signatures/dmatrix_handler.py +98 -0
  42. snowflake/ml/model/_signatures/numpy_handler.py +9 -17
  43. snowflake/ml/model/_signatures/pandas_handler.py +19 -30
  44. snowflake/ml/model/_signatures/pytorch_handler.py +57 -41
  45. snowflake/ml/model/_signatures/snowpark_handler.py +0 -12
  46. snowflake/ml/model/_signatures/tensorflow_handler.py +61 -67
  47. snowflake/ml/model/_signatures/utils.py +120 -8
  48. snowflake/ml/model/custom_model.py +13 -4
  49. snowflake/ml/model/model_signature.py +31 -13
  50. snowflake/ml/model/type_hints.py +13 -2
  51. snowflake/ml/modeling/_internal/estimator_utils.py +5 -1
  52. snowflake/ml/modeling/metrics/ranking.py +3 -0
  53. snowflake/ml/modeling/metrics/regression.py +3 -0
  54. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -1
  55. snowflake/ml/registry/_manager/model_manager.py +55 -7
  56. snowflake/ml/registry/registry.py +59 -1
  57. snowflake/ml/version.py +1 -1
  58. {snowflake_ml_python-1.7.5.dist-info → snowflake_ml_python-1.8.1.dist-info}/METADATA +308 -12
  59. {snowflake_ml_python-1.7.5.dist-info → snowflake_ml_python-1.8.1.dist-info}/RECORD +62 -58
  60. {snowflake_ml_python-1.7.5.dist-info → snowflake_ml_python-1.8.1.dist-info}/WHEEL +1 -1
  61. {snowflake_ml_python-1.7.5.dist-info → snowflake_ml_python-1.8.1.dist-info/licenses}/LICENSE.txt +0 -0
  62. {snowflake_ml_python-1.7.5.dist-info → snowflake_ml_python-1.8.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  from collections import abc
2
- from typing import TYPE_CHECKING, List, Literal, Optional, Sequence
2
+ from typing import TYPE_CHECKING, Literal, Optional, Sequence
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -11,12 +11,54 @@ from snowflake.ml._internal.exceptions import (
11
11
  exceptions as snowml_exceptions,
12
12
  )
13
13
  from snowflake.ml.model import type_hints as model_types
14
- from snowflake.ml.model._signatures import base_handler, core
14
+ from snowflake.ml.model._signatures import base_handler, core, numpy_handler
15
15
 
16
16
  if TYPE_CHECKING:
17
17
  import torch
18
18
 
19
19
 
20
+ class PyTorchTensorHandler(base_handler.BaseDataHandler["torch.Tensor"]):
21
+ @staticmethod
22
+ def can_handle(data: model_types.SupportedDataType) -> TypeGuard["torch.Tensor"]:
23
+ return type_utils.LazyType("torch.Tensor").isinstance(data)
24
+
25
+ @staticmethod
26
+ def count(data: "torch.Tensor") -> int:
27
+ return data.shape[0]
28
+
29
+ @staticmethod
30
+ def truncate(data: "torch.Tensor", length: int) -> "torch.Tensor":
31
+ return data[: min(PyTorchTensorHandler.count(data), length)]
32
+
33
+ @staticmethod
34
+ def validate(data: "torch.Tensor") -> None:
35
+ return numpy_handler.NumpyArrayHandler.validate(data.detach().cpu().numpy())
36
+
37
+ @staticmethod
38
+ def infer_signature(data: "torch.Tensor", role: Literal["input", "output"]) -> Sequence[core.BaseFeatureSpec]:
39
+ return numpy_handler.NumpyArrayHandler.infer_signature(data.detach().cpu().numpy(), role=role)
40
+
41
+ @staticmethod
42
+ def convert_to_df(data: "torch.Tensor", ensure_serializable: bool = True) -> pd.DataFrame:
43
+ return numpy_handler.NumpyArrayHandler.convert_to_df(
44
+ data.detach().cpu().numpy(), ensure_serializable=ensure_serializable
45
+ )
46
+
47
+ @staticmethod
48
+ def convert_from_df(df: pd.DataFrame, features: Optional[Sequence[core.BaseFeatureSpec]] = None) -> "torch.Tensor":
49
+ import torch
50
+
51
+ if features is None:
52
+ if any(dtype == np.dtype("O") for dtype in df.dtypes):
53
+ return torch.from_numpy(np.array(df.to_numpy().tolist()))
54
+ return torch.from_numpy(df.to_numpy())
55
+
56
+ assert isinstance(features[0], core.FeatureSpec)
57
+ return torch.from_numpy(
58
+ np.array(df.to_numpy().tolist(), dtype=features[0]._dtype._numpy_type),
59
+ )
60
+
61
+
20
62
  class SeqOfPyTorchTensorHandler(base_handler.BaseDataHandler[Sequence["torch.Tensor"]]):
21
63
  @staticmethod
22
64
  def can_handle(data: model_types.SupportedDataType) -> TypeGuard[Sequence["torch.Tensor"]]:
@@ -24,56 +66,28 @@ class SeqOfPyTorchTensorHandler(base_handler.BaseDataHandler[Sequence["torch.Ten
24
66
  return False
25
67
  if len(data) == 0:
26
68
  return False
27
- if type_utils.LazyType("torch.Tensor").isinstance(data[0]):
28
- return all(type_utils.LazyType("torch.Tensor").isinstance(data_col) for data_col in data)
29
- return False
69
+ return all(PyTorchTensorHandler.can_handle(data_col) for data_col in data)
30
70
 
31
71
  @staticmethod
32
72
  def count(data: Sequence["torch.Tensor"]) -> int:
33
- return min(data_col.shape[0] for data_col in data)
73
+ return min(PyTorchTensorHandler.count(data_col) for data_col in data)
34
74
 
35
75
  @staticmethod
36
76
  def truncate(data: Sequence["torch.Tensor"], length: int) -> Sequence["torch.Tensor"]:
37
- return [data_col[: min(SeqOfPyTorchTensorHandler.count(data), 10)] for data_col in data]
77
+ return [data_col[: min(SeqOfPyTorchTensorHandler.count(data), length)] for data_col in data]
38
78
 
39
79
  @staticmethod
40
80
  def validate(data: Sequence["torch.Tensor"]) -> None:
41
- import torch
42
-
43
81
  for data_col in data:
44
- if data_col.shape == torch.Size([0]):
45
- # Empty array
46
- raise snowml_exceptions.SnowflakeMLException(
47
- error_code=error_codes.INVALID_DATA,
48
- original_exception=ValueError("Data Validation Error: Empty data is found."),
49
- )
50
-
51
- if data_col.shape == torch.Size([1]):
52
- # scalar
53
- raise snowml_exceptions.SnowflakeMLException(
54
- error_code=error_codes.INVALID_DATA,
55
- original_exception=ValueError("Data Validation Error: Scalar data is found."),
56
- )
82
+ PyTorchTensorHandler.validate(data_col)
57
83
 
58
84
  @staticmethod
59
85
  def infer_signature(
60
86
  data: Sequence["torch.Tensor"], role: Literal["input", "output"]
61
87
  ) -> Sequence[core.BaseFeatureSpec]:
62
- feature_prefix = f"{SeqOfPyTorchTensorHandler.FEATURE_PREFIX}_"
63
- features: List[core.BaseFeatureSpec] = []
64
- role_prefix = (
65
- SeqOfPyTorchTensorHandler.INPUT_PREFIX if role == "input" else SeqOfPyTorchTensorHandler.OUTPUT_PREFIX
66
- ) + "_"
67
-
68
- for i, data_col in enumerate(data):
69
- dtype = core.DataType.from_torch_type(data_col.dtype)
70
- ft_name = f"{role_prefix}{feature_prefix}{i}"
71
- if len(data_col.shape) == 1:
72
- features.append(core.FeatureSpec(dtype=dtype, name=ft_name, nullable=False))
73
- else:
74
- ft_shape = tuple(data_col.shape[1:])
75
- features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape, nullable=False))
76
- return features
88
+ return numpy_handler.SeqOfNumpyArrayHandler.infer_signature(
89
+ [data_col.detach().cpu().numpy() for data_col in data], role=role
90
+ )
77
91
 
78
92
  @staticmethod
79
93
  def convert_to_df(data: Sequence["torch.Tensor"], ensure_serializable: bool = True) -> pd.DataFrame:
@@ -81,8 +95,8 @@ class SeqOfPyTorchTensorHandler(base_handler.BaseDataHandler[Sequence["torch.Ten
81
95
  # the content is still numpy array so that the type could be preserved.
82
96
  # But that would not serializable and cannot use as UDF input and output.
83
97
  if ensure_serializable:
84
- return pd.DataFrame({i: data_col.detach().to("cpu").numpy().tolist() for i, data_col in enumerate(data)})
85
- return pd.DataFrame({i: list(data_col.detach().to("cpu").numpy()) for i, data_col in enumerate(data)})
98
+ return pd.DataFrame({i: data_col.detach().cpu().numpy().tolist() for i, data_col in enumerate(data)})
99
+ return pd.DataFrame({i: list(data_col.detach().cpu().numpy()) for i, data_col in enumerate(data)})
86
100
 
87
101
  @staticmethod
88
102
  def convert_from_df(
@@ -95,8 +109,10 @@ class SeqOfPyTorchTensorHandler(base_handler.BaseDataHandler[Sequence["torch.Ten
95
109
  for feature in features:
96
110
  if isinstance(feature, core.FeatureGroupSpec):
97
111
  raise snowml_exceptions.SnowflakeMLException(
98
- error_code=error_codes.NOT_IMPLEMENTED,
99
- original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
112
+ error_code=error_codes.INVALID_DATA_TYPE,
113
+ original_exception=NotImplementedError(
114
+ "FeatureGroupSpec is not supported when converting to Tensorflow tensor."
115
+ ),
100
116
  )
101
117
  assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
102
118
  res.append(torch.from_numpy(np.stack(df[feature.name].to_numpy()).astype(feature._dtype._numpy_type)))
@@ -65,12 +65,6 @@ class SnowparkDataFrameHandler(base_handler.BaseDataHandler[snowflake.snowpark.D
65
65
  dtype_map = {}
66
66
  if features:
67
67
  for feature in features:
68
- if isinstance(feature, core.FeatureGroupSpec):
69
- raise snowml_exceptions.SnowflakeMLException(
70
- error_code=error_codes.NOT_IMPLEMENTED,
71
- original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
72
- )
73
- assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
74
68
  dtype_map[feature.name] = feature.as_dtype()
75
69
  df_local = data.to_pandas()
76
70
 
@@ -122,12 +116,6 @@ class SnowparkDataFrameHandler(base_handler.BaseDataHandler[snowflake.snowpark.D
122
116
  column_names = []
123
117
  columns = []
124
118
  for feature in features:
125
- if isinstance(feature, core.FeatureGroupSpec):
126
- raise snowml_exceptions.SnowflakeMLException(
127
- error_code=error_codes.NOT_IMPLEMENTED,
128
- original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
129
- )
130
- assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
131
119
  column_names.append(identifier.get_inferred_name(feature.name))
132
120
  columns.append(F.col(identifier.get_inferred_name(feature.name)).cast(feature.as_snowpark_type()))
133
121
 
@@ -1,5 +1,5 @@
1
1
  from collections import abc
2
- from typing import TYPE_CHECKING, List, Literal, Optional, Sequence, Union
2
+ from typing import TYPE_CHECKING, Literal, Optional, Sequence, Union
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -11,12 +11,62 @@ from snowflake.ml._internal.exceptions import (
11
11
  exceptions as snowml_exceptions,
12
12
  )
13
13
  from snowflake.ml.model import type_hints as model_types
14
- from snowflake.ml.model._signatures import base_handler, core
14
+ from snowflake.ml.model._signatures import base_handler, core, numpy_handler
15
15
 
16
16
  if TYPE_CHECKING:
17
17
  import tensorflow
18
18
 
19
19
 
20
+ class TensorflowTensorHandler(base_handler.BaseDataHandler[Union["tensorflow.Tensor", "tensorflow.Variable"]]):
21
+ @staticmethod
22
+ def can_handle(
23
+ data: model_types.SupportedDataType,
24
+ ) -> TypeGuard[Union["tensorflow.Tensor", "tensorflow.Variable"]]:
25
+ return type_utils.LazyType("tensorflow.Tensor").isinstance(data) or type_utils.LazyType(
26
+ "tensorflow.Variable"
27
+ ).isinstance(data)
28
+
29
+ @staticmethod
30
+ def count(data: Union["tensorflow.Tensor", "tensorflow.Variable"]) -> int:
31
+ return numpy_handler.NumpyArrayHandler.count(data.numpy())
32
+
33
+ @staticmethod
34
+ def truncate(
35
+ data: Union["tensorflow.Tensor", "tensorflow.Variable"], length: int
36
+ ) -> Union["tensorflow.Tensor", "tensorflow.Variable"]:
37
+ return data[: min(TensorflowTensorHandler.count(data), length)]
38
+
39
+ @staticmethod
40
+ def validate(data: Union["tensorflow.Tensor", "tensorflow.Variable"]) -> None:
41
+ numpy_handler.NumpyArrayHandler.validate(data.numpy())
42
+
43
+ @staticmethod
44
+ def infer_signature(
45
+ data: Union["tensorflow.Tensor", "tensorflow.Variable"], role: Literal["input", "output"]
46
+ ) -> Sequence[core.BaseFeatureSpec]:
47
+ return numpy_handler.NumpyArrayHandler.infer_signature(data.numpy(), role=role)
48
+
49
+ @staticmethod
50
+ def convert_to_df(
51
+ data: Union["tensorflow.Tensor", "tensorflow.Variable"], ensure_serializable: bool = True
52
+ ) -> pd.DataFrame:
53
+ return numpy_handler.NumpyArrayHandler.convert_to_df(data.numpy(), ensure_serializable=ensure_serializable)
54
+
55
+ @staticmethod
56
+ def convert_from_df(
57
+ df: pd.DataFrame, features: Optional[Sequence[core.BaseFeatureSpec]] = None
58
+ ) -> Union["tensorflow.Tensor", "tensorflow.Variable"]:
59
+ import tensorflow as tf
60
+
61
+ if features is None:
62
+ if any(dtype == np.dtype("O") for dtype in df.dtypes):
63
+ return tf.convert_to_tensor(np.array(df.to_numpy().tolist()))
64
+ return tf.convert_to_tensor(df.to_numpy())
65
+
66
+ assert isinstance(features[0], core.FeatureSpec)
67
+ return tf.convert_to_tensor(np.array(df.to_numpy().tolist()), dtype=features[0]._dtype._numpy_type)
68
+
69
+
20
70
  class SeqOfTensorflowTensorHandler(
21
71
  base_handler.BaseDataHandler[Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]]
22
72
  ):
@@ -28,35 +78,12 @@ class SeqOfTensorflowTensorHandler(
28
78
  return False
29
79
  if len(data) == 0:
30
80
  return False
31
- if type_utils.LazyType("tensorflow.Tensor").isinstance(data[0]) or type_utils.LazyType(
32
- "tensorflow.Variable"
33
- ).isinstance(data[0]):
34
- return all(
35
- type_utils.LazyType("tensorflow.Tensor").isinstance(data_col)
36
- or type_utils.LazyType("tensorflow.Variable").isinstance(data_col)
37
- for data_col in data
38
- )
39
- return False
81
+
82
+ return all(TensorflowTensorHandler.can_handle(data_col) for data_col in data)
40
83
 
41
84
  @staticmethod
42
85
  def count(data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]) -> int:
43
- import tensorflow as tf
44
-
45
- rows = []
46
- for data_col in data:
47
- shapes = data_col.shape.as_list()
48
- if data_col.shape == tf.TensorShape(None) or (not shapes) or (shapes[0] is None):
49
- # Unknown shape array
50
- raise snowml_exceptions.SnowflakeMLException(
51
- error_code=error_codes.INVALID_DATA,
52
- original_exception=ValueError("Data Validation Error: Unknown shape data is found."),
53
- )
54
- # Make mypy happy
55
- assert isinstance(shapes[0], int)
56
-
57
- rows.append(shapes[0])
58
-
59
- return min(rows)
86
+ return min(TensorflowTensorHandler.count(data_col) for data_col in data)
60
87
 
61
88
  @staticmethod
62
89
  def truncate(
@@ -66,49 +93,14 @@ class SeqOfTensorflowTensorHandler(
66
93
 
67
94
  @staticmethod
68
95
  def validate(data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]) -> None:
69
- import tensorflow as tf
70
-
71
96
  for data_col in data:
72
- if data_col.shape == tf.TensorShape(None) or any(dim is None for dim in data_col.shape.as_list()):
73
- # Unknown shape array
74
- raise snowml_exceptions.SnowflakeMLException(
75
- error_code=error_codes.INVALID_DATA,
76
- original_exception=ValueError("Data Validation Error: Unknown shape data is found."),
77
- )
78
-
79
- if data_col.shape == tf.TensorShape([0]):
80
- # Empty array
81
- raise snowml_exceptions.SnowflakeMLException(
82
- error_code=error_codes.INVALID_DATA,
83
- original_exception=ValueError("Data Validation Error: Empty data is found."),
84
- )
85
-
86
- if data_col.shape == tf.TensorShape([1]) or data_col.shape == tf.TensorShape([]):
87
- # scalar
88
- raise snowml_exceptions.SnowflakeMLException(
89
- error_code=error_codes.INVALID_DATA,
90
- original_exception=ValueError("Data Validation Error: Scalar data is found."),
91
- )
97
+ TensorflowTensorHandler.validate(data_col)
92
98
 
93
99
  @staticmethod
94
100
  def infer_signature(
95
101
  data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]], role: Literal["input", "output"]
96
102
  ) -> Sequence[core.BaseFeatureSpec]:
97
- feature_prefix = f"{SeqOfTensorflowTensorHandler.FEATURE_PREFIX}_"
98
- features: List[core.BaseFeatureSpec] = []
99
- role_prefix = (
100
- SeqOfTensorflowTensorHandler.INPUT_PREFIX if role == "input" else SeqOfTensorflowTensorHandler.OUTPUT_PREFIX
101
- ) + "_"
102
-
103
- for i, data_col in enumerate(data):
104
- dtype = core.DataType.from_numpy_type(data_col.dtype.as_numpy_dtype)
105
- ft_name = f"{role_prefix}{feature_prefix}{i}"
106
- if len(data_col.shape) == 1:
107
- features.append(core.FeatureSpec(dtype=dtype, name=ft_name, nullable=False))
108
- else:
109
- ft_shape = tuple(data_col.shape[1:])
110
- features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape, nullable=False))
111
- return features
103
+ return numpy_handler.SeqOfNumpyArrayHandler.infer_signature([data_col.numpy() for data_col in data], role=role)
112
104
 
113
105
  @staticmethod
114
106
  def convert_to_df(
@@ -129,8 +121,10 @@ class SeqOfTensorflowTensorHandler(
129
121
  for feature in features:
130
122
  if isinstance(feature, core.FeatureGroupSpec):
131
123
  raise snowml_exceptions.SnowflakeMLException(
132
- error_code=error_codes.NOT_IMPLEMENTED,
133
- original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
124
+ error_code=error_codes.INVALID_DATA_TYPE,
125
+ original_exception=NotImplementedError(
126
+ "FeatureGroupSpec is not supported when converting to Tensorflow tensor."
127
+ ),
134
128
  )
135
129
  assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
136
130
  res.append(
@@ -135,7 +135,16 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
135
135
  core.FeatureSpec(name="inputs", dtype=core.DataType.STRING),
136
136
  ],
137
137
  outputs=[
138
- core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
138
+ core.FeatureGroupSpec(
139
+ name="outputs",
140
+ specs=[
141
+ core.FeatureSpec(name="sequence", dtype=core.DataType.STRING),
142
+ core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
143
+ core.FeatureSpec(name="token", dtype=core.DataType.INT64),
144
+ core.FeatureSpec(name="token_str", dtype=core.DataType.STRING),
145
+ ],
146
+ shape=(-1,),
147
+ ),
139
148
  ],
140
149
  )
141
150
 
@@ -144,7 +153,18 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
144
153
  return core.ModelSignature(
145
154
  inputs=[core.FeatureSpec(name="inputs", dtype=core.DataType.STRING)],
146
155
  outputs=[
147
- core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
156
+ core.FeatureGroupSpec(
157
+ name="outputs",
158
+ specs=[
159
+ core.FeatureSpec(name="word", dtype=core.DataType.STRING),
160
+ core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
161
+ core.FeatureSpec(name="entity", dtype=core.DataType.STRING),
162
+ core.FeatureSpec(name="index", dtype=core.DataType.INT64),
163
+ core.FeatureSpec(name="start", dtype=core.DataType.INT64),
164
+ core.FeatureSpec(name="end", dtype=core.DataType.INT64),
165
+ ],
166
+ shape=(-1,),
167
+ ),
148
168
  ],
149
169
  )
150
170
 
@@ -171,7 +191,16 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
171
191
  core.FeatureSpec(name="context", dtype=core.DataType.STRING),
172
192
  ],
173
193
  outputs=[
174
- core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
194
+ core.FeatureGroupSpec(
195
+ name="answers",
196
+ specs=[
197
+ core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
198
+ core.FeatureSpec(name="start", dtype=core.DataType.INT64),
199
+ core.FeatureSpec(name="end", dtype=core.DataType.INT64),
200
+ core.FeatureSpec(name="answer", dtype=core.DataType.STRING),
201
+ ],
202
+ shape=(-1,),
203
+ ),
175
204
  ],
176
205
  )
177
206
 
@@ -216,17 +245,22 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
216
245
  return core.ModelSignature(
217
246
  inputs=[
218
247
  core.FeatureSpec(name="text", dtype=core.DataType.STRING),
219
- core.FeatureSpec(name="text_pair", dtype=core.DataType.STRING),
220
248
  ],
221
249
  outputs=[
222
- core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
250
+ core.FeatureGroupSpec(
251
+ name="labels",
252
+ specs=[
253
+ core.FeatureSpec(name="label", dtype=core.DataType.STRING),
254
+ core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
255
+ ],
256
+ shape=(-1,),
257
+ ),
223
258
  ],
224
259
  )
225
260
  # Else, return a dict per input
226
261
  return core.ModelSignature(
227
262
  inputs=[
228
263
  core.FeatureSpec(name="text", dtype=core.DataType.STRING),
229
- core.FeatureSpec(name="text_pair", dtype=core.DataType.STRING),
230
264
  ],
231
265
  outputs=[
232
266
  core.FeatureSpec(name="label", dtype=core.DataType.STRING),
@@ -243,9 +277,24 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
243
277
  )
244
278
  # Always generate a list of dict per input
245
279
  return core.ModelSignature(
246
- inputs=[core.FeatureSpec(name="inputs", dtype=core.DataType.STRING)],
280
+ inputs=[
281
+ core.FeatureGroupSpec(
282
+ name="inputs",
283
+ specs=[
284
+ core.FeatureSpec(name="role", dtype=core.DataType.STRING),
285
+ core.FeatureSpec(name="content", dtype=core.DataType.STRING),
286
+ ],
287
+ shape=(-1,),
288
+ ),
289
+ ],
247
290
  outputs=[
248
- core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
291
+ core.FeatureGroupSpec(
292
+ name="outputs",
293
+ specs=[
294
+ core.FeatureSpec(name="generated_text", dtype=core.DataType.STRING),
295
+ ],
296
+ shape=(-1,),
297
+ )
249
298
  ],
250
299
  )
251
300
 
@@ -300,3 +349,66 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
300
349
 
301
350
  def series_dropna(series: pd.Series) -> pd.Series:
302
351
  return series.dropna(inplace=False).reset_index(drop=True).convert_dtypes()
352
+
353
+
354
+ def infer_list(name: str, data: List[Any]) -> core.BaseFeatureSpec:
355
+ """Infer the feature specification from a list.
356
+
357
+ Args:
358
+ name: Feature name.
359
+ data: A list.
360
+
361
+ Raises:
362
+ SnowflakeMLException: ValueError: Raised when empty list is provided.
363
+
364
+ Returns:
365
+ A feature specification.
366
+ """
367
+ if not data:
368
+ raise snowml_exceptions.SnowflakeMLException(
369
+ error_code=error_codes.INVALID_DATA,
370
+ original_exception=ValueError("Data Validation Error: Empty list is found."),
371
+ )
372
+
373
+ if all(isinstance(value, dict) for value in data):
374
+ ft = infer_dict(name, data[0])
375
+ ft._name = name
376
+ ft._shape = (-1,)
377
+ return ft
378
+
379
+ arr = convert_list_to_ndarray(data)
380
+ arr_dtype = core.DataType.from_numpy_type(arr.dtype)
381
+
382
+ return core.FeatureSpec(name=name, dtype=arr_dtype, shape=arr.shape)
383
+
384
+
385
+ def infer_dict(name: str, data: Dict[str, Any]) -> core.FeatureGroupSpec:
386
+ """Infer the feature specification from a dictionary.
387
+
388
+ Args:
389
+ name: Feature name.
390
+ data: A dictionary.
391
+
392
+ Raises:
393
+ SnowflakeMLException: ValueError: Raised when empty dictionary is provided.
394
+ SnowflakeMLException: ValueError: Raised when empty list is found in the dictionary.
395
+
396
+ Returns:
397
+ A feature group specification.
398
+ """
399
+ if not data:
400
+ raise snowml_exceptions.SnowflakeMLException(
401
+ error_code=error_codes.INVALID_DATA,
402
+ original_exception=ValueError("Data Validation Error: Empty dictionary is found."),
403
+ )
404
+
405
+ specs = []
406
+ for key, value in data.items():
407
+ if isinstance(value, list):
408
+ specs.append(infer_list(key, value))
409
+ elif isinstance(value, dict):
410
+ specs.append(infer_dict(key, value))
411
+ else:
412
+ specs.append(core.FeatureSpec(name=key, dtype=core.DataType.from_numpy_type(np.array(value).dtype)))
413
+
414
+ return core.FeatureGroupSpec(name=name, specs=specs)
@@ -76,7 +76,7 @@ class ModelRef:
76
76
  def __getattr__(self, method_name: str) -> Any:
77
77
  if hasattr(self._model, method_name):
78
78
  return MethodRef(self, method_name)
79
- raise TypeError(f"Model is does not have {method_name}.")
79
+ raise AttributeError(f"Method {method_name} not found in model {self._name}.")
80
80
 
81
81
  def __getstate__(self) -> Dict[str, Any]:
82
82
  state = self.__dict__.copy()
@@ -94,7 +94,16 @@ class ModelRef:
94
94
 
95
95
  class ModelContext:
96
96
  """
97
- Context for a custom model showing paths to artifacts and mapping between model name and object reference.
97
+ Context for a custom model storing paths to file artifacts and model object references.
98
+
99
+ Keyword argument values can be string file paths or supported in-memory models. Paths and model references
100
+ can be accessed with dictionary access methods in the custom model.
101
+
102
+ For example, in a custom model with `context=ModelContext(my_file='my_file.pkl', my_model=my_model)`,
103
+ the filepath and model reference can be accessed with `self.context['my_file']` and `self.context['my_model']`
104
+ in the inference and init methods.
105
+
106
+ The use of `artifacts` and `model_refs` arguments is deprecated. Set keyword arguments directly instead.
98
107
 
99
108
  Attributes:
100
109
  artifacts: A dictionary mapping the name of the artifact to its path.
@@ -267,14 +276,14 @@ def _validate_predict_function(func: Callable[[model_types.CustomModelType, pd.D
267
276
 
268
277
 
269
278
  def inference_api(
270
- func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]
279
+ func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame],
271
280
  ) -> Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]:
272
281
  func.__dict__["_is_inference_api"] = True
273
282
  return func
274
283
 
275
284
 
276
285
  def partitioned_inference_api(
277
- func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]
286
+ func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame],
278
287
  ) -> Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]:
279
288
  func.__dict__["_is_inference_api"] = True
280
289
  func.__dict__["_is_partitioned_inference_api"] = True