snowflake-ml-python 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/platform_capabilities.py +36 -0
- snowflake/ml/_internal/utils/url.py +42 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +67 -2
- snowflake/ml/data/data_connector.py +103 -1
- snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +8 -2
- snowflake/ml/experiment/callback/__init__.py +0 -0
- snowflake/ml/experiment/callback/keras.py +25 -2
- snowflake/ml/experiment/callback/lightgbm.py +27 -2
- snowflake/ml/experiment/callback/xgboost.py +25 -2
- snowflake/ml/experiment/experiment_tracking.py +93 -3
- snowflake/ml/experiment/utils.py +6 -0
- snowflake/ml/feature_store/feature_view.py +34 -24
- snowflake/ml/jobs/_interop/protocols.py +3 -0
- snowflake/ml/jobs/_utils/constants.py +1 -0
- snowflake/ml/jobs/_utils/payload_utils.py +354 -356
- snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +95 -8
- snowflake/ml/jobs/_utils/scripts/start_mlruntime.sh +92 -0
- snowflake/ml/jobs/_utils/scripts/startup.sh +112 -0
- snowflake/ml/jobs/_utils/spec_utils.py +1 -445
- snowflake/ml/jobs/_utils/stage_utils.py +22 -1
- snowflake/ml/jobs/_utils/types.py +14 -7
- snowflake/ml/jobs/job.py +2 -8
- snowflake/ml/jobs/manager.py +57 -135
- snowflake/ml/lineage/lineage_node.py +1 -1
- snowflake/ml/model/__init__.py +6 -0
- snowflake/ml/model/_client/model/batch_inference_specs.py +16 -1
- snowflake/ml/model/_client/model/model_version_impl.py +130 -14
- snowflake/ml/model/_client/ops/deployment_step.py +36 -0
- snowflake/ml/model/_client/ops/model_ops.py +93 -8
- snowflake/ml/model/_client/ops/service_ops.py +32 -52
- snowflake/ml/model/_client/service/import_model_spec_schema.py +23 -0
- snowflake/ml/model/_client/service/model_deployment_spec.py +12 -4
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +3 -0
- snowflake/ml/model/_client/sql/model_version.py +30 -6
- snowflake/ml/model/_client/sql/service.py +94 -5
- snowflake/ml/model/_model_composer/model_composer.py +1 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +5 -0
- snowflake/ml/model/_model_composer/model_method/model_method.py +61 -2
- snowflake/ml/model/_packager/model_handler.py +8 -2
- snowflake/ml/model/_packager/model_handlers/custom.py +52 -0
- snowflake/ml/model/_packager/model_handlers/{huggingface_pipeline.py → huggingface.py} +203 -76
- snowflake/ml/model/_packager/model_handlers/mlflow.py +6 -1
- snowflake/ml/model/_packager/model_handlers/xgboost.py +26 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +40 -7
- snowflake/ml/model/_packager/model_packager.py +1 -1
- snowflake/ml/model/_signatures/core.py +390 -8
- snowflake/ml/model/_signatures/utils.py +13 -4
- snowflake/ml/model/code_path.py +104 -0
- snowflake/ml/model/compute_pool.py +2 -0
- snowflake/ml/model/custom_model.py +55 -13
- snowflake/ml/model/model_signature.py +13 -1
- snowflake/ml/model/models/huggingface.py +285 -0
- snowflake/ml/model/models/huggingface_pipeline.py +19 -208
- snowflake/ml/model/type_hints.py +7 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
- snowflake/ml/monitoring/_client/model_monitor_sql_client.py +12 -0
- snowflake/ml/monitoring/_manager/model_monitor_manager.py +12 -0
- snowflake/ml/monitoring/entities/model_monitor_config.py +5 -0
- snowflake/ml/registry/_manager/model_manager.py +230 -15
- snowflake/ml/registry/registry.py +4 -4
- snowflake/ml/utils/html_utils.py +67 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/METADATA +81 -7
- {snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/RECORD +67 -59
- snowflake/ml/jobs/_utils/runtime_env_utils.py +0 -63
- {snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/top_level.txt +0 -0
|
@@ -194,7 +194,18 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
|
|
|
194
194
|
|
|
195
195
|
if kwargs.get("use_gpu", False):
|
|
196
196
|
assert type(kwargs.get("use_gpu", False)) == bool
|
|
197
|
-
|
|
197
|
+
from packaging import version
|
|
198
|
+
|
|
199
|
+
xgb_version = version.parse(xgboost.__version__)
|
|
200
|
+
if xgb_version >= version.parse("3.1.0"):
|
|
201
|
+
# XGBoost 3.1.0+: Use device="cuda" for GPU acceleration
|
|
202
|
+
# gpu_hist and gpu_predictor were removed in XGBoost 3.1.0
|
|
203
|
+
# See: https://xgboost.readthedocs.io/en/latest/changes/v3.1.0.html
|
|
204
|
+
gpu_params = {"tree_method": "hist", "device": "cuda"}
|
|
205
|
+
else:
|
|
206
|
+
# XGBoost < 3.1.0: Use legacy gpu_hist tree_method
|
|
207
|
+
gpu_params = {"tree_method": "gpu_hist", "predictor": "gpu_predictor"}
|
|
208
|
+
|
|
198
209
|
if isinstance(m, xgboost.Booster):
|
|
199
210
|
m.set_param(gpu_params)
|
|
200
211
|
elif isinstance(m, xgboost.XGBModel):
|
|
@@ -256,6 +267,20 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
|
|
|
256
267
|
@custom_model.inference_api
|
|
257
268
|
def explain_fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
|
|
258
269
|
import shap
|
|
270
|
+
from packaging import version
|
|
271
|
+
|
|
272
|
+
xgb_version = version.parse(xgboost.__version__)
|
|
273
|
+
shap_version = version.parse(shap.__version__)
|
|
274
|
+
|
|
275
|
+
# SHAP < 0.50.0 is incompatible with XGBoost >= 3.1.0 due to base_score format change
|
|
276
|
+
# (base_score is now stored as a vector for multi-output models)
|
|
277
|
+
# See: https://xgboost.readthedocs.io/en/latest/changes/v3.1.0.html
|
|
278
|
+
if xgb_version >= version.parse("3.1.0") and shap_version < version.parse("0.50.0"):
|
|
279
|
+
raise RuntimeError(
|
|
280
|
+
f"SHAP version {shap.__version__} is incompatible with XGBoost version "
|
|
281
|
+
f"{xgboost.__version__}. XGBoost 3.1+ changed the model format which requires "
|
|
282
|
+
f"SHAP >= 0.50.0. Please upgrade SHAP or use XGBoost < 3.1."
|
|
283
|
+
)
|
|
259
284
|
|
|
260
285
|
explainer = shap.TreeExplainer(raw_model)
|
|
261
286
|
df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer.shap_values(X))
|
|
@@ -20,6 +20,7 @@ from snowflake.ml.model._packager.model_env import model_env
|
|
|
20
20
|
from snowflake.ml.model._packager.model_meta import model_blob_meta, model_meta_schema
|
|
21
21
|
from snowflake.ml.model._packager.model_meta_migrator import migrator_plans
|
|
22
22
|
from snowflake.ml.model._packager.model_runtime import model_runtime
|
|
23
|
+
from snowflake.ml.model.code_path import CodePath
|
|
23
24
|
|
|
24
25
|
MODEL_METADATA_FILE = "model.yaml"
|
|
25
26
|
MODEL_CODE_DIR = "code"
|
|
@@ -39,7 +40,7 @@ def create_model_metadata(
|
|
|
39
40
|
signatures: Optional[dict[str, model_signature.ModelSignature]] = None,
|
|
40
41
|
function_properties: Optional[dict[str, dict[str, Any]]] = None,
|
|
41
42
|
metadata: Optional[dict[str, str]] = None,
|
|
42
|
-
code_paths: Optional[list[
|
|
43
|
+
code_paths: Optional[list[model_types.CodePathLike]] = None,
|
|
43
44
|
ext_modules: Optional[list[ModuleType]] = None,
|
|
44
45
|
conda_dependencies: Optional[list[str]] = None,
|
|
45
46
|
pip_requirements: Optional[list[str]] = None,
|
|
@@ -77,7 +78,8 @@ def create_model_metadata(
|
|
|
77
78
|
**kwargs: Dict of attributes and values of the metadata. Used when loading from file.
|
|
78
79
|
|
|
79
80
|
Raises:
|
|
80
|
-
ValueError: Raised when the code path contains reserved file or directory.
|
|
81
|
+
ValueError: Raised when the code path contains reserved file or directory, or destination conflicts.
|
|
82
|
+
FileNotFoundError: Raised when a code path does not exist.
|
|
81
83
|
|
|
82
84
|
Yields:
|
|
83
85
|
A model metadata object.
|
|
@@ -134,13 +136,44 @@ def create_model_metadata(
|
|
|
134
136
|
os.makedirs(code_dir_path, exist_ok=True)
|
|
135
137
|
|
|
136
138
|
if code_paths:
|
|
139
|
+
# Resolve all code paths and check for conflicts
|
|
140
|
+
resolved_paths: list[tuple[str, str]] = [] # (source, destination_relative)
|
|
137
141
|
for code_path in code_paths:
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
+
if isinstance(code_path, CodePath):
|
|
143
|
+
source, dest_relative = code_path._resolve()
|
|
144
|
+
else:
|
|
145
|
+
# String path: keep existing behavior
|
|
146
|
+
source = os.path.normpath(os.path.abspath(code_path))
|
|
147
|
+
if not os.path.exists(source):
|
|
148
|
+
raise FileNotFoundError(f"Code path '{code_path}' does not exist (resolved to {source}).")
|
|
149
|
+
dest_relative = os.path.basename(source)
|
|
150
|
+
resolved_paths.append((source, dest_relative))
|
|
151
|
+
|
|
152
|
+
# Check for destination conflicts
|
|
153
|
+
seen: dict[str, str] = {}
|
|
154
|
+
for source, dest in resolved_paths:
|
|
155
|
+
if dest in seen:
|
|
156
|
+
raise ValueError(
|
|
157
|
+
f"Destination path conflict: '{dest}' is targeted by both '{seen[dest]}' and '{source}'."
|
|
158
|
+
)
|
|
159
|
+
seen[dest] = source
|
|
160
|
+
|
|
161
|
+
# Copy files
|
|
162
|
+
for source, dest_relative in resolved_paths:
|
|
163
|
+
# Prevent reserved name conflicts
|
|
164
|
+
dest_name = dest_relative.split(os.sep)[0] if os.sep in dest_relative else dest_relative
|
|
165
|
+
if (os.path.isfile(source) and os.path.splitext(dest_name)[0] == _SNOWFLAKE_PKG_NAME) or (
|
|
166
|
+
os.path.isdir(source) and dest_name == _SNOWFLAKE_PKG_NAME
|
|
167
|
+
):
|
|
142
168
|
raise ValueError("`snowflake` is a reserved name and you cannot contain that into code path.")
|
|
143
|
-
|
|
169
|
+
|
|
170
|
+
parent_dir = (
|
|
171
|
+
os.path.join(code_dir_path, os.path.dirname(dest_relative))
|
|
172
|
+
if os.path.dirname(dest_relative)
|
|
173
|
+
else code_dir_path
|
|
174
|
+
)
|
|
175
|
+
os.makedirs(parent_dir, exist_ok=True)
|
|
176
|
+
file_utils.copy_file_or_tree(source, parent_dir)
|
|
144
177
|
|
|
145
178
|
try:
|
|
146
179
|
imported_modules = []
|
|
@@ -49,7 +49,7 @@ class ModelPackager:
|
|
|
49
49
|
target_platforms: Optional[list[model_types.TargetPlatform]] = None,
|
|
50
50
|
python_version: Optional[str] = None,
|
|
51
51
|
ext_modules: Optional[list[ModuleType]] = None,
|
|
52
|
-
code_paths: Optional[list[
|
|
52
|
+
code_paths: Optional[list[model_types.CodePathLike]] = None,
|
|
53
53
|
options: model_types.ModelSaveOption,
|
|
54
54
|
task: model_types.Task = model_types.Task.UNKNOWN,
|
|
55
55
|
) -> model_meta.ModelMetadata:
|
|
@@ -191,6 +191,35 @@ class DataType(Enum):
|
|
|
191
191
|
original_exception=NotImplementedError(f"Type {snowpark_type} is not supported as a DataType."),
|
|
192
192
|
)
|
|
193
193
|
|
|
194
|
+
@classmethod
|
|
195
|
+
def from_python_type(cls, python_type: type) -> "DataType":
|
|
196
|
+
"""Translate Python built-in type to DataType for signature definition.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
python_type: A Python built-in type (int, float, str, bool).
|
|
200
|
+
|
|
201
|
+
Raises:
|
|
202
|
+
SnowflakeMLException: NotImplementedError: Raised when the given Python type is not supported.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Corresponding DataType.
|
|
206
|
+
"""
|
|
207
|
+
python_to_snowml_type_mapping: dict[type, "DataType"] = {
|
|
208
|
+
int: DataType.INT64,
|
|
209
|
+
float: DataType.DOUBLE,
|
|
210
|
+
str: DataType.STRING,
|
|
211
|
+
bool: DataType.BOOL,
|
|
212
|
+
}
|
|
213
|
+
if python_type in python_to_snowml_type_mapping:
|
|
214
|
+
return python_to_snowml_type_mapping[python_type]
|
|
215
|
+
raise snowml_exceptions.SnowflakeMLException(
|
|
216
|
+
error_code=error_codes.NOT_IMPLEMENTED,
|
|
217
|
+
original_exception=NotImplementedError(
|
|
218
|
+
f"Python type {python_type} is not supported as a DataType. "
|
|
219
|
+
f"Supported types are: {list(python_to_snowml_type_mapping.keys())}."
|
|
220
|
+
),
|
|
221
|
+
)
|
|
222
|
+
|
|
194
223
|
|
|
195
224
|
class BaseFeatureSpec(ABC):
|
|
196
225
|
"""Abstract Class for specification of a feature."""
|
|
@@ -481,10 +510,280 @@ class FeatureGroupSpec(BaseFeatureSpec):
|
|
|
481
510
|
return FeatureGroupSpec(name=input_dict["name"], specs=specs, shape=shape)
|
|
482
511
|
|
|
483
512
|
|
|
513
|
+
class BaseParamSpec(ABC):
|
|
514
|
+
"""Abstract Class for specification of a parameter."""
|
|
515
|
+
|
|
516
|
+
def __init__(self, name: str, shape: Optional[tuple[int, ...]] = None) -> None:
|
|
517
|
+
self._name = name
|
|
518
|
+
|
|
519
|
+
if shape is not None and not isinstance(shape, tuple):
|
|
520
|
+
raise snowml_exceptions.SnowflakeMLException(
|
|
521
|
+
error_code=error_codes.INVALID_TYPE,
|
|
522
|
+
original_exception=TypeError("Shape should be a tuple if presented."),
|
|
523
|
+
)
|
|
524
|
+
self._shape = shape
|
|
525
|
+
|
|
526
|
+
@final
|
|
527
|
+
@property
|
|
528
|
+
def name(self) -> str:
|
|
529
|
+
"""Name of the parameter."""
|
|
530
|
+
return self._name
|
|
531
|
+
|
|
532
|
+
@final
|
|
533
|
+
@property
|
|
534
|
+
def shape(self) -> Optional[tuple[int, ...]]:
|
|
535
|
+
"""Shape of the parameter. None means scalar."""
|
|
536
|
+
return self._shape
|
|
537
|
+
|
|
538
|
+
@abstractmethod
|
|
539
|
+
def to_dict(self) -> dict[str, Any]:
|
|
540
|
+
"""Serialization"""
|
|
541
|
+
|
|
542
|
+
@classmethod
|
|
543
|
+
@abstractmethod
|
|
544
|
+
def from_dict(cls, input_dict: dict[str, Any]) -> "BaseParamSpec":
|
|
545
|
+
"""Deserialization"""
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
class ParamSpec(BaseParamSpec):
|
|
549
|
+
"""Specification of a parameter in Snowflake native model packaging."""
|
|
550
|
+
|
|
551
|
+
def __init__(
|
|
552
|
+
self,
|
|
553
|
+
name: str,
|
|
554
|
+
dtype: DataType,
|
|
555
|
+
default_value: Any,
|
|
556
|
+
shape: Optional[tuple[int, ...]] = None,
|
|
557
|
+
) -> None:
|
|
558
|
+
"""Initialize a parameter.
|
|
559
|
+
|
|
560
|
+
Args:
|
|
561
|
+
name: Name of the parameter.
|
|
562
|
+
dtype: Type of the parameter.
|
|
563
|
+
default_value: Default value of the parameter.
|
|
564
|
+
shape: Shape of the parameter. None means scalar, otherwise a tuple
|
|
565
|
+
representing dimensions. Use -1 for variable length dimensions.
|
|
566
|
+
"""
|
|
567
|
+
super().__init__(name=name, shape=shape)
|
|
568
|
+
|
|
569
|
+
self._validate_default_value(dtype, default_value, shape)
|
|
570
|
+
self._dtype = dtype
|
|
571
|
+
self._default_value = default_value
|
|
572
|
+
|
|
573
|
+
@staticmethod
|
|
574
|
+
def _validate_default_value(dtype: DataType, default_value: Any, shape: Optional[tuple[int, ...]]) -> None:
|
|
575
|
+
"""Validate that default_value is compatible with dtype and shape.
|
|
576
|
+
|
|
577
|
+
Args:
|
|
578
|
+
dtype: The expected data type.
|
|
579
|
+
default_value: The default value to validate. None is allowed and means no default.
|
|
580
|
+
shape: The expected shape. None means scalar.
|
|
581
|
+
|
|
582
|
+
Raises:
|
|
583
|
+
SnowflakeMLException: ValueError: When the default_value is not compatible with dtype/shape.
|
|
584
|
+
"""
|
|
585
|
+
if default_value is None:
|
|
586
|
+
return
|
|
587
|
+
|
|
588
|
+
try:
|
|
589
|
+
arr = np.array(default_value, dtype=dtype._numpy_type)
|
|
590
|
+
|
|
591
|
+
# Validate shape compatibility
|
|
592
|
+
if shape is None:
|
|
593
|
+
# Scalar expected
|
|
594
|
+
if arr.ndim != 0:
|
|
595
|
+
raise snowml_exceptions.SnowflakeMLException(
|
|
596
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
|
597
|
+
original_exception=ValueError(f"Expected scalar value, got array with shape {arr.shape}"),
|
|
598
|
+
)
|
|
599
|
+
else:
|
|
600
|
+
# Non-scalar expected
|
|
601
|
+
if arr.ndim != len(shape):
|
|
602
|
+
raise snowml_exceptions.SnowflakeMLException(
|
|
603
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
|
604
|
+
original_exception=ValueError(
|
|
605
|
+
f"Expected {len(shape)}-dimensional value, got {arr.ndim}-dimensional"
|
|
606
|
+
),
|
|
607
|
+
)
|
|
608
|
+
# Check each dimension (-1 means variable length)
|
|
609
|
+
for i, (expected, actual) in enumerate(zip(shape, arr.shape)):
|
|
610
|
+
if expected != -1 and expected != actual:
|
|
611
|
+
raise snowml_exceptions.SnowflakeMLException(
|
|
612
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
|
613
|
+
original_exception=ValueError(f"Dimension {i}: expected {expected}, got {actual}"),
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
except (ValueError, TypeError, OverflowError) as e:
|
|
617
|
+
raise snowml_exceptions.SnowflakeMLException(
|
|
618
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
|
619
|
+
original_exception=ValueError(
|
|
620
|
+
f"Default value {repr(default_value)} (type: {type(default_value).__name__}) "
|
|
621
|
+
f"is not compatible with dtype {dtype} and shape {shape}. {str(e)}"
|
|
622
|
+
),
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
@property
|
|
626
|
+
def dtype(self) -> DataType:
|
|
627
|
+
"""Type of the parameter."""
|
|
628
|
+
return self._dtype
|
|
629
|
+
|
|
630
|
+
@property
|
|
631
|
+
def default_value(self) -> Any:
|
|
632
|
+
"""Default value of the parameter."""
|
|
633
|
+
return self._default_value
|
|
634
|
+
|
|
635
|
+
def to_dict(self) -> dict[str, Any]:
|
|
636
|
+
"""Serialize the parameter specification into a dict.
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
A dict that serializes the parameter specification.
|
|
640
|
+
"""
|
|
641
|
+
result: dict[str, Any] = {
|
|
642
|
+
"name": self._name,
|
|
643
|
+
"dtype": self._dtype.name,
|
|
644
|
+
"default_value": self._default_value,
|
|
645
|
+
}
|
|
646
|
+
if self._shape is not None:
|
|
647
|
+
result["shape"] = self._shape
|
|
648
|
+
return result
|
|
649
|
+
|
|
650
|
+
@classmethod
|
|
651
|
+
def from_dict(cls, input_dict: dict[str, Any]) -> "ParamSpec":
|
|
652
|
+
"""Deserialize the parameter specification from a dict.
|
|
653
|
+
|
|
654
|
+
Args:
|
|
655
|
+
input_dict: The dict containing information of the parameter specification.
|
|
656
|
+
|
|
657
|
+
Returns:
|
|
658
|
+
ParamSpec: The deserialized parameter specification.
|
|
659
|
+
"""
|
|
660
|
+
shape = input_dict.get("shape", None)
|
|
661
|
+
if shape is not None:
|
|
662
|
+
shape = tuple(shape)
|
|
663
|
+
return ParamSpec(
|
|
664
|
+
name=input_dict["name"],
|
|
665
|
+
dtype=DataType[input_dict["dtype"]],
|
|
666
|
+
default_value=input_dict["default_value"],
|
|
667
|
+
shape=shape,
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
def __eq__(self, other: object) -> bool:
|
|
671
|
+
if isinstance(other, ParamSpec):
|
|
672
|
+
return (
|
|
673
|
+
self._name == other._name
|
|
674
|
+
and self._dtype == other._dtype
|
|
675
|
+
and np.array_equal(self._default_value, other._default_value)
|
|
676
|
+
and self._shape == other._shape
|
|
677
|
+
)
|
|
678
|
+
else:
|
|
679
|
+
return False
|
|
680
|
+
|
|
681
|
+
def __repr__(self) -> str:
|
|
682
|
+
shape_str = f", shape={repr(self._shape)}" if self._shape else ""
|
|
683
|
+
return (
|
|
684
|
+
f"ParamSpec(name={repr(self._name)}, dtype={repr(self._dtype)}, "
|
|
685
|
+
f"default_value={repr(self._default_value)}{shape_str})"
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
@classmethod
|
|
689
|
+
def from_mlflow_spec(cls, param_spec: "mlflow.types.ParamSpec") -> "ParamSpec":
|
|
690
|
+
return ParamSpec(
|
|
691
|
+
name=param_spec.name,
|
|
692
|
+
dtype=DataType.from_numpy_type(param_spec.dtype.to_numpy()),
|
|
693
|
+
default_value=param_spec.default,
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
class ParamGroupSpec(BaseParamSpec):
|
|
698
|
+
"""Specification of a group of parameters in Snowflake native model packaging."""
|
|
699
|
+
|
|
700
|
+
def __init__(
|
|
701
|
+
self,
|
|
702
|
+
name: str,
|
|
703
|
+
specs: list[BaseParamSpec],
|
|
704
|
+
shape: Optional[tuple[int, ...]] = None,
|
|
705
|
+
) -> None:
|
|
706
|
+
"""Initialize a parameter group.
|
|
707
|
+
|
|
708
|
+
Args:
|
|
709
|
+
name: Name of the parameter group.
|
|
710
|
+
specs: A list of parameter specifications that composes the group.
|
|
711
|
+
shape: Shape of the parameter group. None means scalar, otherwise a tuple
|
|
712
|
+
representing dimensions. Use -1 for variable length dimensions.
|
|
713
|
+
"""
|
|
714
|
+
super().__init__(name=name, shape=shape)
|
|
715
|
+
self._specs = specs
|
|
716
|
+
self._validate()
|
|
717
|
+
|
|
718
|
+
def _validate(self) -> None:
|
|
719
|
+
if len(self._specs) == 0:
|
|
720
|
+
raise snowml_exceptions.SnowflakeMLException(
|
|
721
|
+
error_code=error_codes.INVALID_ARGUMENT, original_exception=ValueError("No children param specs.")
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
@property
|
|
725
|
+
def specs(self) -> list[BaseParamSpec]:
|
|
726
|
+
"""List of parameter specifications in the group."""
|
|
727
|
+
return self._specs
|
|
728
|
+
|
|
729
|
+
def __eq__(self, other: object) -> bool:
|
|
730
|
+
if isinstance(other, ParamGroupSpec):
|
|
731
|
+
return self._name == other._name and self._specs == other._specs and self._shape == other._shape
|
|
732
|
+
return False
|
|
733
|
+
|
|
734
|
+
def __repr__(self) -> str:
|
|
735
|
+
spec_strs = ",\n\t\t".join(repr(spec) for spec in self._specs)
|
|
736
|
+
shape_str = f",\nshape={repr(self._shape)}" if self._shape else ""
|
|
737
|
+
return textwrap.dedent(
|
|
738
|
+
f"""ParamGroupSpec(
|
|
739
|
+
name={repr(self._name)},
|
|
740
|
+
specs=[
|
|
741
|
+
{spec_strs}
|
|
742
|
+
]{shape_str}
|
|
743
|
+
)
|
|
744
|
+
"""
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
def to_dict(self) -> dict[str, Any]:
|
|
748
|
+
"""Serialize the parameter group into a dict.
|
|
749
|
+
|
|
750
|
+
Returns:
|
|
751
|
+
A dict that serializes the parameter group.
|
|
752
|
+
"""
|
|
753
|
+
result: dict[str, Any] = {"name": self._name, "specs": [s.to_dict() for s in self._specs]}
|
|
754
|
+
if self._shape is not None:
|
|
755
|
+
result["shape"] = self._shape
|
|
756
|
+
return result
|
|
757
|
+
|
|
758
|
+
@classmethod
|
|
759
|
+
def from_dict(cls, input_dict: dict[str, Any]) -> "ParamGroupSpec":
|
|
760
|
+
"""Deserialize the parameter group from a dict.
|
|
761
|
+
|
|
762
|
+
Args:
|
|
763
|
+
input_dict: The dict containing information of the parameter group.
|
|
764
|
+
|
|
765
|
+
Returns:
|
|
766
|
+
A parameter group instance deserialized and created from the dict.
|
|
767
|
+
"""
|
|
768
|
+
specs: list[BaseParamSpec] = []
|
|
769
|
+
for e in input_dict["specs"]:
|
|
770
|
+
spec: BaseParamSpec = ParamGroupSpec.from_dict(e) if "specs" in e else ParamSpec.from_dict(e)
|
|
771
|
+
specs.append(spec)
|
|
772
|
+
shape = input_dict.get("shape", None)
|
|
773
|
+
if shape is not None:
|
|
774
|
+
shape = tuple(shape)
|
|
775
|
+
return ParamGroupSpec(name=input_dict["name"], specs=specs, shape=shape)
|
|
776
|
+
|
|
777
|
+
|
|
484
778
|
class ModelSignature:
|
|
485
779
|
"""Signature of a model that specifies the input and output of a model."""
|
|
486
780
|
|
|
487
|
-
def __init__(
|
|
781
|
+
def __init__(
|
|
782
|
+
self,
|
|
783
|
+
inputs: Sequence[BaseFeatureSpec],
|
|
784
|
+
outputs: Sequence[BaseFeatureSpec],
|
|
785
|
+
params: Optional[Sequence[BaseParamSpec]] = None,
|
|
786
|
+
) -> None:
|
|
488
787
|
"""Initialize a model signature.
|
|
489
788
|
|
|
490
789
|
Args:
|
|
@@ -492,9 +791,19 @@ class ModelSignature:
|
|
|
492
791
|
the input of the model.
|
|
493
792
|
outputs: A sequence of feature specifications and feature group specifications that will compose
|
|
494
793
|
the output of the model.
|
|
794
|
+
params: A sequence of parameter specifications and parameter group specifications that will compose
|
|
795
|
+
the parameters of the model. Defaults to None.
|
|
796
|
+
|
|
797
|
+
Raises:
|
|
798
|
+
SnowflakeMLException: ValueError: When the parameters have duplicate names or the same
|
|
799
|
+
names as input features.
|
|
800
|
+
|
|
801
|
+
# noqa: DAR402
|
|
495
802
|
"""
|
|
496
803
|
self._inputs = inputs
|
|
497
804
|
self._outputs = outputs
|
|
805
|
+
self._params = params or []
|
|
806
|
+
self._name_validation()
|
|
498
807
|
|
|
499
808
|
@property
|
|
500
809
|
def inputs(self) -> Sequence[BaseFeatureSpec]:
|
|
@@ -506,9 +815,18 @@ class ModelSignature:
|
|
|
506
815
|
"""Outputs of the model, containing a sequence of feature specifications and feature group specifications."""
|
|
507
816
|
return self._outputs
|
|
508
817
|
|
|
818
|
+
@property
|
|
819
|
+
def params(self) -> Sequence[BaseParamSpec]:
|
|
820
|
+
"""Parameters of the model, containing a sequence of parameter specifications."""
|
|
821
|
+
return self._params
|
|
822
|
+
|
|
509
823
|
def __eq__(self, other: object) -> bool:
|
|
510
824
|
if isinstance(other, ModelSignature):
|
|
511
|
-
return
|
|
825
|
+
return (
|
|
826
|
+
self._inputs == other._inputs
|
|
827
|
+
and self._outputs == other._outputs
|
|
828
|
+
and getattr(other, "_params", []) == self._params # handles backward compatibility
|
|
829
|
+
)
|
|
512
830
|
else:
|
|
513
831
|
return False
|
|
514
832
|
|
|
@@ -522,6 +840,7 @@ class ModelSignature:
|
|
|
522
840
|
return {
|
|
523
841
|
"inputs": [spec.to_dict() for spec in self._inputs],
|
|
524
842
|
"outputs": [spec.to_dict() for spec in self._outputs],
|
|
843
|
+
"params": [spec.to_dict() for spec in self._params],
|
|
525
844
|
}
|
|
526
845
|
|
|
527
846
|
@classmethod
|
|
@@ -536,18 +855,26 @@ class ModelSignature:
|
|
|
536
855
|
"""
|
|
537
856
|
sig_outs = loaded["outputs"]
|
|
538
857
|
sig_inputs = loaded["inputs"]
|
|
858
|
+
# If parameters is not provided, default to empty list for backward compatibility.
|
|
859
|
+
sig_params = loaded.get("params", [])
|
|
539
860
|
|
|
540
861
|
deserialize_spec: Callable[[dict[str, Any]], BaseFeatureSpec] = lambda sig_spec: (
|
|
541
862
|
FeatureGroupSpec.from_dict(sig_spec) if "specs" in sig_spec else FeatureSpec.from_dict(sig_spec)
|
|
542
863
|
)
|
|
864
|
+
deserialize_param: Callable[[dict[str, Any]], BaseParamSpec] = lambda sig_param: (
|
|
865
|
+
ParamGroupSpec.from_dict(sig_param) if "specs" in sig_param else ParamSpec.from_dict(sig_param)
|
|
866
|
+
)
|
|
543
867
|
|
|
544
868
|
return ModelSignature(
|
|
545
|
-
inputs=[deserialize_spec(s) for s in sig_inputs],
|
|
869
|
+
inputs=[deserialize_spec(s) for s in sig_inputs],
|
|
870
|
+
outputs=[deserialize_spec(s) for s in sig_outs],
|
|
871
|
+
params=[deserialize_param(s) for s in sig_params],
|
|
546
872
|
)
|
|
547
873
|
|
|
548
874
|
def __repr__(self) -> str:
|
|
549
875
|
inputs_spec_strs = ",\n\t\t".join(repr(spec) for spec in self._inputs)
|
|
550
876
|
outputs_spec_strs = ",\n\t\t".join(repr(spec) for spec in self._outputs)
|
|
877
|
+
params_spec_strs = ",\n\t\t".join(repr(spec) for spec in self._params)
|
|
551
878
|
return textwrap.dedent(
|
|
552
879
|
f"""ModelSignature(
|
|
553
880
|
inputs=[
|
|
@@ -555,6 +882,9 @@ class ModelSignature:
|
|
|
555
882
|
],
|
|
556
883
|
outputs=[
|
|
557
884
|
{outputs_spec_strs}
|
|
885
|
+
],
|
|
886
|
+
params=[
|
|
887
|
+
{params_spec_strs}
|
|
558
888
|
]
|
|
559
889
|
)"""
|
|
560
890
|
)
|
|
@@ -570,19 +900,70 @@ class ModelSignature:
|
|
|
570
900
|
# Create collapsible sections for inputs and outputs
|
|
571
901
|
inputs_content = html_utils.create_features_html(self.inputs, "Input")
|
|
572
902
|
outputs_content = html_utils.create_features_html(self.outputs, "Output")
|
|
573
|
-
|
|
903
|
+
params_content = html_utils.create_parameters_html(self.params, "Parameter")
|
|
574
904
|
inputs_section = html_utils.create_collapsible_section("Inputs", inputs_content, open_by_default=True)
|
|
575
905
|
outputs_section = html_utils.create_collapsible_section("Outputs", outputs_content, open_by_default=True)
|
|
906
|
+
params_section = html_utils.create_collapsible_section("Parameters", params_content, open_by_default=True)
|
|
576
907
|
|
|
577
908
|
content = f"""
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
909
|
+
<div style="margin-top: 10px;">
|
|
910
|
+
{inputs_section}
|
|
911
|
+
{outputs_section}
|
|
912
|
+
{params_section}
|
|
913
|
+
</div>
|
|
582
914
|
"""
|
|
583
915
|
|
|
584
916
|
return html_utils.create_base_container("Model Signature", content)
|
|
585
917
|
|
|
918
|
+
def _name_validation(self) -> None:
|
|
919
|
+
"""Validate the names of the inputs and parameters.
|
|
920
|
+
|
|
921
|
+
Names are compared case-insensitively (matches Snowflake identifier behavior).
|
|
922
|
+
|
|
923
|
+
Raises:
|
|
924
|
+
SnowflakeMLException: ValueError: When the parameters have duplicate names or the same
|
|
925
|
+
names as input features.
|
|
926
|
+
"""
|
|
927
|
+
input_names: set[str] = set()
|
|
928
|
+
for input_spec in self._inputs:
|
|
929
|
+
names = (
|
|
930
|
+
[input_spec.name.upper() for spec in input_spec._specs]
|
|
931
|
+
if isinstance(input_spec, FeatureGroupSpec)
|
|
932
|
+
else [input_spec.name.upper()]
|
|
933
|
+
)
|
|
934
|
+
input_names.update(names)
|
|
935
|
+
|
|
936
|
+
param_names: set[str] = set()
|
|
937
|
+
dup_params: set[str] = set()
|
|
938
|
+
collision_names: set[str] = set()
|
|
939
|
+
|
|
940
|
+
for param in self._params:
|
|
941
|
+
names = [spec.name for spec in param.specs] if isinstance(param, ParamGroupSpec) else [param.name]
|
|
942
|
+
for name in names:
|
|
943
|
+
if name.upper() in param_names:
|
|
944
|
+
dup_params.add(name)
|
|
945
|
+
if name.upper() in input_names:
|
|
946
|
+
collision_names.add(name)
|
|
947
|
+
param_names.add(name.upper())
|
|
948
|
+
|
|
949
|
+
if dup_params:
|
|
950
|
+
raise snowml_exceptions.SnowflakeMLException(
|
|
951
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
|
952
|
+
original_exception=ValueError(
|
|
953
|
+
f"Found duplicate parameter named resolved as {', '.join(sorted(dup_params))}."
|
|
954
|
+
" Parameters must have distinct names (case-insensitive)."
|
|
955
|
+
),
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
if collision_names:
|
|
959
|
+
raise snowml_exceptions.SnowflakeMLException(
|
|
960
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
|
961
|
+
original_exception=ValueError(
|
|
962
|
+
f"Found parameter(s) with the same name as input feature(s): {', '.join(sorted(collision_names))}."
|
|
963
|
+
" Parameters and inputs must have distinct names (case-insensitive)."
|
|
964
|
+
),
|
|
965
|
+
)
|
|
966
|
+
|
|
586
967
|
@classmethod
|
|
587
968
|
def from_mlflow_sig(cls, mlflow_sig: "mlflow.models.ModelSignature") -> "ModelSignature":
|
|
588
969
|
return ModelSignature(
|
|
@@ -593,4 +974,5 @@ class ModelSignature:
|
|
|
593
974
|
FeatureSpec.from_mlflow_spec(spec, f"output_feature_{idx}")
|
|
594
975
|
for idx, spec in enumerate(mlflow_sig.outputs)
|
|
595
976
|
],
|
|
977
|
+
params=[ParamSpec.from_mlflow_spec(spec) for spec in mlflow_sig.params or []],
|
|
596
978
|
)
|
|
@@ -20,15 +20,21 @@ def convert_list_to_ndarray(data: list[Any]) -> npt.NDArray[Any]:
|
|
|
20
20
|
|
|
21
21
|
Raises:
|
|
22
22
|
SnowflakeMLException: ValueError: Raised when ragged nested list or list containing non-basic type confronted.
|
|
23
|
-
SnowflakeMLException: ValueError: Raised when ragged nested list or list containing non-basic type confronted.
|
|
24
23
|
|
|
25
24
|
Returns:
|
|
26
25
|
The converted numpy array.
|
|
27
26
|
"""
|
|
28
|
-
|
|
27
|
+
# VisibleDeprecationWarning was removed in numpy>2
|
|
28
|
+
visible_deprecation_warning = getattr(np, "VisibleDeprecationWarning", None)
|
|
29
|
+
exception_types = (ValueError,)
|
|
30
|
+
|
|
31
|
+
if visible_deprecation_warning is not None:
|
|
32
|
+
warnings.filterwarnings("error", category=visible_deprecation_warning)
|
|
33
|
+
exception_types = (visible_deprecation_warning, ValueError) # type: ignore[assignment]
|
|
34
|
+
|
|
29
35
|
try:
|
|
30
36
|
arr = np.array(data)
|
|
31
|
-
except
|
|
37
|
+
except exception_types:
|
|
32
38
|
# In recent version of numpy, this warning should be raised when bad list provided.
|
|
33
39
|
raise snowml_exceptions.SnowflakeMLException(
|
|
34
40
|
error_code=error_codes.INVALID_DATA,
|
|
@@ -36,7 +42,10 @@ def convert_list_to_ndarray(data: list[Any]) -> npt.NDArray[Any]:
|
|
|
36
42
|
f"Unable to construct signature: Ragged nested or Unsupported list-like data {data} confronted."
|
|
37
43
|
),
|
|
38
44
|
)
|
|
39
|
-
|
|
45
|
+
finally:
|
|
46
|
+
if visible_deprecation_warning is not None:
|
|
47
|
+
warnings.filterwarnings("default", category=visible_deprecation_warning)
|
|
48
|
+
|
|
40
49
|
if arr.dtype == object:
|
|
41
50
|
# If not raised, then a array of object would be created.
|
|
42
51
|
raise snowml_exceptions.SnowflakeMLException(
|