snowflake-ml-python 1.9.1__py3-none-any.whl → 1.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/utils/mixins.py +6 -4
- snowflake/ml/_internal/utils/service_logger.py +101 -1
- snowflake/ml/data/_internal/arrow_ingestor.py +4 -1
- snowflake/ml/data/data_connector.py +4 -34
- snowflake/ml/dataset/dataset.py +1 -1
- snowflake/ml/dataset/dataset_reader.py +2 -8
- snowflake/ml/experiment/__init__.py +3 -0
- snowflake/ml/experiment/callback.py +121 -0
- snowflake/ml/jobs/_utils/constants.py +15 -4
- snowflake/ml/jobs/_utils/payload_utils.py +150 -49
- snowflake/ml/jobs/_utils/scripts/constants.py +0 -22
- snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +125 -22
- snowflake/ml/jobs/_utils/spec_utils.py +1 -1
- snowflake/ml/jobs/_utils/stage_utils.py +30 -14
- snowflake/ml/jobs/_utils/types.py +64 -4
- snowflake/ml/jobs/job.py +22 -6
- snowflake/ml/jobs/manager.py +5 -3
- snowflake/ml/model/_client/ops/service_ops.py +17 -2
- snowflake/ml/model/_client/sql/service.py +1 -38
- snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -5
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -0
- snowflake/ml/model/_signatures/pandas_handler.py +3 -0
- snowflake/ml/model/_signatures/utils.py +4 -0
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.9.1.dist-info → snowflake_ml_python-1.9.2.dist-info}/METADATA +42 -4
- {snowflake_ml_python-1.9.1.dist-info → snowflake_ml_python-1.9.2.dist-info}/RECORD +30 -28
- {snowflake_ml_python-1.9.1.dist-info → snowflake_ml_python-1.9.2.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.9.1.dist-info → snowflake_ml_python-1.9.2.dist-info}/licenses/LICENSE.txt +0 -0
- {snowflake_ml_python-1.9.1.dist-info → snowflake_ml_python-1.9.2.dist-info}/top_level.txt +0 -0
|
@@ -14,7 +14,10 @@ _SNOWURL_PATH_RE = re.compile(
|
|
|
14
14
|
r"(?P<path>versions(?:/(?P<version>[^/]+)(?:/(?P<relpath>.*))?)?)$"
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
# Break long regex into two main parts
|
|
18
|
+
_STAGE_PATTERN = rf"~|%?(?:(?:{identifier._SF_IDENTIFIER}\.?){{,2}}{identifier._SF_IDENTIFIER})"
|
|
19
|
+
_RELPATH_PATTERN = r"[\w\-./]*"
|
|
20
|
+
_STAGEF_PATH_RE = re.compile(rf"^@(?P<stage>{_STAGE_PATTERN})(?:/(?P<relpath>{_RELPATH_PATTERN}))?$")
|
|
18
21
|
|
|
19
22
|
|
|
20
23
|
class StagePath:
|
|
@@ -29,6 +32,14 @@ class StagePath:
|
|
|
29
32
|
self._root = self._raw_path[0:start].rstrip("/") if relpath else self._raw_path.rstrip("/")
|
|
30
33
|
self._path = Path(relpath or "")
|
|
31
34
|
|
|
35
|
+
@property
|
|
36
|
+
def parts(self) -> tuple[str, ...]:
|
|
37
|
+
return self._path.parts
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def name(self) -> str:
|
|
41
|
+
return self._path.name
|
|
42
|
+
|
|
32
43
|
@property
|
|
33
44
|
def parent(self) -> "StagePath":
|
|
34
45
|
if self._path.parent == Path(""):
|
|
@@ -51,18 +62,28 @@ class StagePath:
|
|
|
51
62
|
else:
|
|
52
63
|
return f"{self.root}/{path}"
|
|
53
64
|
|
|
54
|
-
def is_relative_to(self,
|
|
65
|
+
def is_relative_to(self, *other: Union[str, os.PathLike[str]]) -> bool:
|
|
66
|
+
if not other:
|
|
67
|
+
raise TypeError("is_relative_to() requires at least one argument")
|
|
68
|
+
# For now, we only support a single argument, like pathlib.Path in Python < 3.12
|
|
69
|
+
path = other[0]
|
|
55
70
|
stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
|
|
56
71
|
if stage_path.root == self.root:
|
|
57
72
|
return self._path.is_relative_to(stage_path._path)
|
|
58
73
|
else:
|
|
59
74
|
return False
|
|
60
75
|
|
|
61
|
-
def relative_to(self,
|
|
76
|
+
def relative_to(self, *other: Union[str, os.PathLike[str]]) -> PurePath:
|
|
77
|
+
if not other:
|
|
78
|
+
raise TypeError("relative_to() requires at least one argument")
|
|
79
|
+
if not self.is_relative_to(*other):
|
|
80
|
+
raise ValueError(f"{other} does not start with {self._raw_path}")
|
|
81
|
+
path = other[0]
|
|
62
82
|
stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
|
|
63
83
|
if self.root == stage_path.root:
|
|
64
84
|
return self._path.relative_to(stage_path._path)
|
|
65
|
-
|
|
85
|
+
else:
|
|
86
|
+
raise ValueError(f"{self._raw_path} does not start with {stage_path._raw_path}")
|
|
66
87
|
|
|
67
88
|
def absolute(self) -> "StagePath":
|
|
68
89
|
return self
|
|
@@ -88,6 +109,9 @@ class StagePath:
|
|
|
88
109
|
def __str__(self) -> str:
|
|
89
110
|
return self.as_posix()
|
|
90
111
|
|
|
112
|
+
def __repr__(self) -> str:
|
|
113
|
+
return f"StagePath('{self.as_posix()}')"
|
|
114
|
+
|
|
91
115
|
def __eq__(self, other: object) -> bool:
|
|
92
116
|
if not isinstance(other, StagePath):
|
|
93
117
|
raise NotImplementedError
|
|
@@ -96,24 +120,16 @@ class StagePath:
|
|
|
96
120
|
def __fspath__(self) -> str:
|
|
97
121
|
return self._compose_path(self._path)
|
|
98
122
|
|
|
99
|
-
def joinpath(self, *args: Union[str, PathLike[str]
|
|
123
|
+
def joinpath(self, *args: Union[str, PathLike[str]]) -> "StagePath":
|
|
100
124
|
path = self
|
|
101
125
|
for arg in args:
|
|
102
126
|
path = path._make_child(arg)
|
|
103
127
|
return path
|
|
104
128
|
|
|
105
|
-
def _make_child(self, path: Union[str, PathLike[str]
|
|
129
|
+
def _make_child(self, path: Union[str, PathLike[str]]) -> "StagePath":
|
|
106
130
|
stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
|
|
107
131
|
if self.root == stage_path.root:
|
|
108
132
|
child_path = self._path.joinpath(stage_path._path)
|
|
109
133
|
return StagePath(self._compose_path(child_path))
|
|
110
134
|
else:
|
|
111
135
|
return stage_path
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def identify_stage_path(path: str) -> Union[StagePath, Path]:
|
|
115
|
-
try:
|
|
116
|
-
stage_path = StagePath(path)
|
|
117
|
-
except ValueError:
|
|
118
|
-
return Path(path)
|
|
119
|
-
return stage_path
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from dataclasses import dataclass
|
|
2
3
|
from pathlib import PurePath
|
|
3
|
-
from typing import Literal, Optional, Union
|
|
4
|
-
|
|
5
|
-
from snowflake.ml.jobs._utils import stage_utils
|
|
4
|
+
from typing import Iterator, Literal, Optional, Protocol, Union, runtime_checkable
|
|
6
5
|
|
|
7
6
|
JOB_STATUS = Literal[
|
|
8
7
|
"PENDING",
|
|
@@ -15,9 +14,70 @@ JOB_STATUS = Literal[
|
|
|
15
14
|
]
|
|
16
15
|
|
|
17
16
|
|
|
17
|
+
@runtime_checkable
|
|
18
|
+
class PayloadPath(Protocol):
|
|
19
|
+
"""A protocol for path-like objects used in this module, covering methods from pathlib.Path and StagePath."""
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def name(self) -> str:
|
|
23
|
+
...
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def suffix(self) -> str:
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def parent(self) -> "PayloadPath":
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
def exists(self) -> bool:
|
|
34
|
+
...
|
|
35
|
+
|
|
36
|
+
def is_file(self) -> bool:
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
def is_absolute(self) -> bool:
|
|
40
|
+
...
|
|
41
|
+
|
|
42
|
+
def absolute(self) -> "PayloadPath":
|
|
43
|
+
...
|
|
44
|
+
|
|
45
|
+
def joinpath(self, *other: Union[str, os.PathLike[str]]) -> "PayloadPath":
|
|
46
|
+
...
|
|
47
|
+
|
|
48
|
+
def as_posix(self) -> str:
|
|
49
|
+
...
|
|
50
|
+
|
|
51
|
+
def is_relative_to(self, *other: Union[str, os.PathLike[str]]) -> bool:
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
def relative_to(self, *other: Union[str, os.PathLike[str]]) -> PurePath:
|
|
55
|
+
...
|
|
56
|
+
|
|
57
|
+
def __fspath__(self) -> str:
|
|
58
|
+
...
|
|
59
|
+
|
|
60
|
+
def __str__(self) -> str:
|
|
61
|
+
...
|
|
62
|
+
|
|
63
|
+
def __repr__(self) -> str:
|
|
64
|
+
...
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass
|
|
68
|
+
class PayloadSpec:
|
|
69
|
+
"""Represents a payload item to be uploaded."""
|
|
70
|
+
|
|
71
|
+
source_path: PayloadPath
|
|
72
|
+
remote_relative_path: Optional[PurePath] = None
|
|
73
|
+
|
|
74
|
+
def __iter__(self) -> Iterator[Union[PayloadPath, Optional[PurePath]]]:
|
|
75
|
+
return iter((self.source_path, self.remote_relative_path))
|
|
76
|
+
|
|
77
|
+
|
|
18
78
|
@dataclass(frozen=True)
|
|
19
79
|
class PayloadEntrypoint:
|
|
20
|
-
file_path:
|
|
80
|
+
file_path: PayloadPath
|
|
21
81
|
main_func: Optional[str]
|
|
22
82
|
|
|
23
83
|
|
snowflake/ml/jobs/job.py
CHANGED
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
import time
|
|
5
5
|
from functools import cached_property
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from typing import Any, Generic, Literal, Optional, TypeVar, Union, cast, overload
|
|
7
8
|
|
|
8
9
|
import yaml
|
|
@@ -95,10 +96,24 @@ class MLJob(Generic[T], SerializableSessionMixin):
|
|
|
95
96
|
@property
|
|
96
97
|
def _result_path(self) -> str:
|
|
97
98
|
"""Get the job's result file location."""
|
|
98
|
-
|
|
99
|
-
if
|
|
99
|
+
result_path_str = self._container_spec["env"].get(constants.RESULT_PATH_ENV_VAR)
|
|
100
|
+
if result_path_str is None:
|
|
100
101
|
raise RuntimeError(f"Job {self.name} doesn't have a result path configured")
|
|
101
|
-
|
|
102
|
+
volume_mounts = self._container_spec["volumeMounts"]
|
|
103
|
+
stage_mount_str = next(v for v in volume_mounts if v.get("name") == constants.STAGE_VOLUME_NAME)["mountPath"]
|
|
104
|
+
|
|
105
|
+
result_path = Path(result_path_str)
|
|
106
|
+
stage_mount = Path(stage_mount_str)
|
|
107
|
+
try:
|
|
108
|
+
relative_path = result_path.relative_to(stage_mount)
|
|
109
|
+
except ValueError:
|
|
110
|
+
if result_path.is_absolute():
|
|
111
|
+
raise ValueError(
|
|
112
|
+
f"Result path {result_path} is absolute, but should be relative to stage mount {stage_mount}"
|
|
113
|
+
)
|
|
114
|
+
relative_path = result_path
|
|
115
|
+
|
|
116
|
+
return f"{self._stage_path}/{relative_path.as_posix()}"
|
|
102
117
|
|
|
103
118
|
@overload
|
|
104
119
|
def get_logs(
|
|
@@ -181,7 +196,10 @@ class MLJob(Generic[T], SerializableSessionMixin):
|
|
|
181
196
|
start_time = time.monotonic()
|
|
182
197
|
warning_shown = False
|
|
183
198
|
while (status := self.status) not in TERMINAL_JOB_STATUSES:
|
|
184
|
-
|
|
199
|
+
elapsed = time.monotonic() - start_time
|
|
200
|
+
if elapsed >= timeout >= 0:
|
|
201
|
+
raise TimeoutError(f"Job {self.name} did not complete within {timeout} seconds")
|
|
202
|
+
elif status == "PENDING" and not warning_shown and elapsed >= 2: # Only show warning after 2s
|
|
185
203
|
pool_info = _get_compute_pool_info(self._session, self._compute_pool)
|
|
186
204
|
if (pool_info.max_nodes - pool_info.active_nodes) < self.min_instances:
|
|
187
205
|
logger.warning(
|
|
@@ -189,8 +207,6 @@ class MLJob(Generic[T], SerializableSessionMixin):
|
|
|
189
207
|
f"{self.min_instances} nodes required). Job execution may be delayed."
|
|
190
208
|
)
|
|
191
209
|
warning_shown = True
|
|
192
|
-
if timeout >= 0 and (elapsed := time.monotonic() - start_time) >= timeout:
|
|
193
|
-
raise TimeoutError(f"Job {self.name} did not complete within {elapsed} seconds")
|
|
194
210
|
time.sleep(delay)
|
|
195
211
|
delay = min(delay * 1.2, constants.JOB_POLL_MAX_DELAY_SECONDS) # Exponential backoff
|
|
196
212
|
return self.status
|
snowflake/ml/jobs/manager.py
CHANGED
|
@@ -447,6 +447,10 @@ def _submit_job(
|
|
|
447
447
|
spec_overrides = kwargs.pop("spec_overrides", None)
|
|
448
448
|
enable_metrics = kwargs.pop("enable_metrics", True)
|
|
449
449
|
query_warehouse = kwargs.pop("query_warehouse", None)
|
|
450
|
+
additional_payloads = kwargs.pop("additional_payloads", None)
|
|
451
|
+
|
|
452
|
+
if additional_payloads:
|
|
453
|
+
logger.warning("'additional_payloads' is in private preview since 1.9.1. Do not use it in production.")
|
|
450
454
|
|
|
451
455
|
# Warn if there are unknown kwargs
|
|
452
456
|
if kwargs:
|
|
@@ -477,9 +481,7 @@ def _submit_job(
|
|
|
477
481
|
|
|
478
482
|
# Upload payload
|
|
479
483
|
uploaded_payload = payload_utils.JobPayload(
|
|
480
|
-
source,
|
|
481
|
-
entrypoint=entrypoint,
|
|
482
|
-
pip_requirements=pip_requirements,
|
|
484
|
+
source, entrypoint=entrypoint, pip_requirements=pip_requirements, additional_payloads=additional_payloads
|
|
483
485
|
).upload(session, stage_path)
|
|
484
486
|
|
|
485
487
|
# Generate service spec
|
|
@@ -96,11 +96,13 @@ class ServiceLogMetadata:
|
|
|
96
96
|
msg: str,
|
|
97
97
|
is_model_build_service_done: bool,
|
|
98
98
|
is_model_logger_service_done: bool,
|
|
99
|
+
operation_id: str,
|
|
99
100
|
propagate: bool = False,
|
|
100
101
|
) -> None:
|
|
101
102
|
to_service_logger = service_logger.get_logger(
|
|
102
103
|
f"{to_service.display_service_name}-{to_service.instance_id}",
|
|
103
104
|
to_service.log_color,
|
|
105
|
+
operation_id=operation_id,
|
|
104
106
|
)
|
|
105
107
|
to_service_logger.propagate = propagate
|
|
106
108
|
self.service_logger = to_service_logger
|
|
@@ -196,6 +198,9 @@ class ServiceOperator:
|
|
|
196
198
|
hf_model_args: Optional[HFModelArgs] = None,
|
|
197
199
|
) -> Union[str, async_job.AsyncJob]:
|
|
198
200
|
|
|
201
|
+
# Generate operation ID for this deployment
|
|
202
|
+
operation_id = service_logger.get_operation_id()
|
|
203
|
+
|
|
199
204
|
# Fall back to the registry's database and schema if not provided
|
|
200
205
|
database_name = database_name or self._database_name
|
|
201
206
|
schema_name = schema_name or self._schema_name
|
|
@@ -327,6 +332,7 @@ class ServiceOperator:
|
|
|
327
332
|
model_inference_service=model_inference_service,
|
|
328
333
|
model_inference_service_exists=model_inference_service_exists,
|
|
329
334
|
force_rebuild=force_rebuild,
|
|
335
|
+
operation_id=operation_id,
|
|
330
336
|
statement_params=statement_params,
|
|
331
337
|
)
|
|
332
338
|
|
|
@@ -347,6 +353,7 @@ class ServiceOperator:
|
|
|
347
353
|
model_inference_service: ServiceLogInfo,
|
|
348
354
|
model_inference_service_exists: bool,
|
|
349
355
|
force_rebuild: bool,
|
|
356
|
+
operation_id: str,
|
|
350
357
|
statement_params: Optional[dict[str, Any]] = None,
|
|
351
358
|
) -> threading.Thread:
|
|
352
359
|
"""Start the service log streaming in a separate thread."""
|
|
@@ -360,6 +367,7 @@ class ServiceOperator:
|
|
|
360
367
|
model_inference_service,
|
|
361
368
|
model_inference_service_exists,
|
|
362
369
|
force_rebuild,
|
|
370
|
+
operation_id,
|
|
363
371
|
statement_params,
|
|
364
372
|
),
|
|
365
373
|
)
|
|
@@ -372,6 +380,7 @@ class ServiceOperator:
|
|
|
372
380
|
service_log_meta: ServiceLogMetadata,
|
|
373
381
|
model_build_service: ServiceLogInfo,
|
|
374
382
|
model_inference_service: ServiceLogInfo,
|
|
383
|
+
operation_id: str,
|
|
375
384
|
statement_params: Optional[dict[str, Any]] = None,
|
|
376
385
|
) -> None:
|
|
377
386
|
"""Helper function to fetch logs and update the service log metadata if needed.
|
|
@@ -386,6 +395,7 @@ class ServiceOperator:
|
|
|
386
395
|
service_log_meta: The ServiceLogMetadata holds the state of the service log metadata.
|
|
387
396
|
model_build_service: The ServiceLogInfo for the model build service.
|
|
388
397
|
model_inference_service: The ServiceLogInfo for the model inference service.
|
|
398
|
+
operation_id: The operation ID for the service, e.g. "model_deploy_a1b2c3d4_1703875200"
|
|
389
399
|
statement_params: The statement parameters to use for the service client.
|
|
390
400
|
"""
|
|
391
401
|
|
|
@@ -415,6 +425,7 @@ class ServiceOperator:
|
|
|
415
425
|
"Model build is not rebuilding the inference image, but using a previously built image.",
|
|
416
426
|
is_model_build_service_done=True,
|
|
417
427
|
is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
|
|
428
|
+
operation_id=operation_id,
|
|
418
429
|
)
|
|
419
430
|
|
|
420
431
|
try:
|
|
@@ -488,6 +499,7 @@ class ServiceOperator:
|
|
|
488
499
|
f"Model Logger service {service.display_service_name} complete.",
|
|
489
500
|
is_model_build_service_done=False,
|
|
490
501
|
is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
|
|
502
|
+
operation_id=operation_id,
|
|
491
503
|
)
|
|
492
504
|
# check if model build service is done
|
|
493
505
|
# and transition the service log metadata to the model inference service
|
|
@@ -497,6 +509,7 @@ class ServiceOperator:
|
|
|
497
509
|
f"Image build service {service.display_service_name} complete.",
|
|
498
510
|
is_model_build_service_done=True,
|
|
499
511
|
is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
|
|
512
|
+
operation_id=operation_id,
|
|
500
513
|
)
|
|
501
514
|
else:
|
|
502
515
|
module_logger.warning(f"Service {service.display_service_name} is done, but not transitioning.")
|
|
@@ -509,6 +522,7 @@ class ServiceOperator:
|
|
|
509
522
|
model_inference_service: ServiceLogInfo,
|
|
510
523
|
model_inference_service_exists: bool,
|
|
511
524
|
force_rebuild: bool,
|
|
525
|
+
operation_id: str,
|
|
512
526
|
statement_params: Optional[dict[str, Any]] = None,
|
|
513
527
|
) -> None:
|
|
514
528
|
"""Stream service logs while the async job is running."""
|
|
@@ -516,14 +530,14 @@ class ServiceOperator:
|
|
|
516
530
|
model_build_service_logger = service_logger.get_logger( # BuildJobName
|
|
517
531
|
model_build_service.display_service_name,
|
|
518
532
|
model_build_service.log_color,
|
|
533
|
+
operation_id=operation_id,
|
|
519
534
|
)
|
|
520
|
-
model_build_service_logger.propagate = False
|
|
521
535
|
if model_logger_service:
|
|
522
536
|
model_logger_service_logger = service_logger.get_logger( # ModelLoggerName
|
|
523
537
|
model_logger_service.display_service_name,
|
|
524
538
|
model_logger_service.log_color,
|
|
539
|
+
operation_id=operation_id,
|
|
525
540
|
)
|
|
526
|
-
model_logger_service_logger.propagate = False
|
|
527
541
|
|
|
528
542
|
service_log_meta = ServiceLogMetadata(
|
|
529
543
|
service_logger=model_logger_service_logger,
|
|
@@ -557,6 +571,7 @@ class ServiceOperator:
|
|
|
557
571
|
force_rebuild=force_rebuild,
|
|
558
572
|
model_build_service=model_build_service,
|
|
559
573
|
model_inference_service=model_inference_service,
|
|
574
|
+
operation_id=operation_id,
|
|
560
575
|
statement_params=statement_params,
|
|
561
576
|
)
|
|
562
577
|
except Exception as ex:
|
|
@@ -2,7 +2,7 @@ import dataclasses
|
|
|
2
2
|
import enum
|
|
3
3
|
import logging
|
|
4
4
|
import textwrap
|
|
5
|
-
from typing import Any, Optional
|
|
5
|
+
from typing import Any, Optional
|
|
6
6
|
|
|
7
7
|
from snowflake import snowpark
|
|
8
8
|
from snowflake.ml._internal.utils import (
|
|
@@ -69,43 +69,6 @@ class ServiceSQLClient(_base._BaseSQLClient):
|
|
|
69
69
|
CONTAINER_STATUS = "status"
|
|
70
70
|
MESSAGE = "message"
|
|
71
71
|
|
|
72
|
-
def build_model_container(
|
|
73
|
-
self,
|
|
74
|
-
*,
|
|
75
|
-
database_name: Optional[sql_identifier.SqlIdentifier],
|
|
76
|
-
schema_name: Optional[sql_identifier.SqlIdentifier],
|
|
77
|
-
model_name: sql_identifier.SqlIdentifier,
|
|
78
|
-
version_name: sql_identifier.SqlIdentifier,
|
|
79
|
-
compute_pool_name: sql_identifier.SqlIdentifier,
|
|
80
|
-
image_repo_database_name: Optional[sql_identifier.SqlIdentifier],
|
|
81
|
-
image_repo_schema_name: Optional[sql_identifier.SqlIdentifier],
|
|
82
|
-
image_repo_name: sql_identifier.SqlIdentifier,
|
|
83
|
-
gpu: Optional[Union[str, int]],
|
|
84
|
-
force_rebuild: bool,
|
|
85
|
-
external_access_integration: sql_identifier.SqlIdentifier,
|
|
86
|
-
statement_params: Optional[dict[str, Any]] = None,
|
|
87
|
-
) -> None:
|
|
88
|
-
actual_image_repo_database = image_repo_database_name or self._database_name
|
|
89
|
-
actual_image_repo_schema = image_repo_schema_name or self._schema_name
|
|
90
|
-
actual_model_database = database_name or self._database_name
|
|
91
|
-
actual_model_schema = schema_name or self._schema_name
|
|
92
|
-
fq_model_name = self.fully_qualified_object_name(actual_model_database, actual_model_schema, model_name)
|
|
93
|
-
fq_image_repo_name = identifier.get_schema_level_object_identifier(
|
|
94
|
-
actual_image_repo_database.identifier(),
|
|
95
|
-
actual_image_repo_schema.identifier(),
|
|
96
|
-
image_repo_name.identifier(),
|
|
97
|
-
)
|
|
98
|
-
is_gpu_str = "TRUE" if gpu else "FALSE"
|
|
99
|
-
force_rebuild_str = "TRUE" if force_rebuild else "FALSE"
|
|
100
|
-
query_result_checker.SqlResultValidator(
|
|
101
|
-
self._session,
|
|
102
|
-
(
|
|
103
|
-
f"CALL SYSTEM$BUILD_MODEL_CONTAINER('{fq_model_name}', '{version_name}', '{compute_pool_name}',"
|
|
104
|
-
f" '{fq_image_repo_name}', '{is_gpu_str}', '{force_rebuild_str}', '', '{external_access_integration}')"
|
|
105
|
-
),
|
|
106
|
-
statement_params=statement_params,
|
|
107
|
-
).has_dimensions(expected_rows=1, expected_cols=1).validate()
|
|
108
|
-
|
|
109
72
|
def deploy_model(
|
|
110
73
|
self,
|
|
111
74
|
*,
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
3
|
import warnings
|
|
3
4
|
from typing import TYPE_CHECKING, Callable, Optional, Sequence, Union, cast, final
|
|
@@ -24,6 +25,8 @@ if TYPE_CHECKING:
|
|
|
24
25
|
import sklearn.base
|
|
25
26
|
import sklearn.pipeline
|
|
26
27
|
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
27
30
|
|
|
28
31
|
def _unpack_container_runtime_pipeline(model: "sklearn.pipeline.Pipeline") -> "sklearn.pipeline.Pipeline":
|
|
29
32
|
new_steps = []
|
|
@@ -201,13 +204,13 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
|
|
|
201
204
|
explain_target_method = str(explain_target_method) # mypy complains if we don't cast to str here
|
|
202
205
|
|
|
203
206
|
input_signature = handlers_utils.get_input_signature(model_meta, explain_target_method)
|
|
204
|
-
transformed_background_data = _apply_transforms_up_to_last_step(
|
|
205
|
-
model=model,
|
|
206
|
-
data=background_data,
|
|
207
|
-
input_feature_names=[spec.name for spec in input_signature],
|
|
208
|
-
)
|
|
209
207
|
|
|
210
208
|
try:
|
|
209
|
+
transformed_background_data = _apply_transforms_up_to_last_step(
|
|
210
|
+
model=model,
|
|
211
|
+
data=background_data,
|
|
212
|
+
input_feature_names=[spec.name for spec in input_signature],
|
|
213
|
+
)
|
|
211
214
|
model_meta = handlers_utils.add_inferred_explain_method_signature(
|
|
212
215
|
model_meta=model_meta,
|
|
213
216
|
explain_method="explain",
|
|
@@ -217,6 +220,7 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
|
|
|
217
220
|
output_feature_names=transformed_background_data.columns,
|
|
218
221
|
)
|
|
219
222
|
except Exception:
|
|
223
|
+
logger.debug("Explainability is disabled due to an exception.", exc_info=True)
|
|
220
224
|
if kwargs.get("enable_explainability", None):
|
|
221
225
|
# user explicitly enabled explainability, so we should raise the error
|
|
222
226
|
raise ValueError(
|
|
@@ -86,6 +86,9 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
|
|
|
86
86
|
df_col_data = utils.series_dropna(df_col_data)
|
|
87
87
|
df_col_dtype = df_col_data.dtype
|
|
88
88
|
|
|
89
|
+
if utils.check_if_series_is_empty(df_col_data):
|
|
90
|
+
continue
|
|
91
|
+
|
|
89
92
|
if df_col_dtype == np.dtype("O"):
|
|
90
93
|
# Check if all objects have the same type
|
|
91
94
|
if not all(isinstance(data_row, type(df_col_data.iloc[0])) for data_row in df_col_data):
|
|
@@ -412,3 +412,7 @@ def infer_dict(name: str, data: dict[str, Any]) -> core.FeatureGroupSpec:
|
|
|
412
412
|
specs.append(core.FeatureSpec(name=key, dtype=core.DataType.from_numpy_type(np.array(value).dtype)))
|
|
413
413
|
|
|
414
414
|
return core.FeatureGroupSpec(name=name, specs=specs)
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def check_if_series_is_empty(series: Optional[pd.Series]) -> bool:
|
|
418
|
+
return series is None or series.empty
|
|
@@ -272,6 +272,8 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
|
|
|
272
272
|
),
|
|
273
273
|
)
|
|
274
274
|
else:
|
|
275
|
+
if utils.check_if_series_is_empty(data_col):
|
|
276
|
+
continue
|
|
275
277
|
if isinstance(data_col.iloc[0], list):
|
|
276
278
|
if not ft_shape:
|
|
277
279
|
raise snowml_exceptions.SnowflakeMLException(
|
snowflake/ml/version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# This is parsed by regex in conda recipe meta file. Make sure not to break it.
|
|
2
|
-
VERSION = "1.9.
|
|
2
|
+
VERSION = "1.9.2"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: snowflake-ml-python
|
|
3
|
-
Version: 1.9.
|
|
3
|
+
Version: 1.9.2
|
|
4
4
|
Summary: The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.
|
|
5
5
|
Author-email: "Snowflake, Inc" <support@snowflake.com>
|
|
6
6
|
License:
|
|
@@ -243,6 +243,7 @@ Requires-Dist: importlib_resources<7,>=6.1.1
|
|
|
243
243
|
Requires-Dist: numpy<3,>=1.23
|
|
244
244
|
Requires-Dist: packaging<25,>=20.9
|
|
245
245
|
Requires-Dist: pandas<3,>=2.1.4
|
|
246
|
+
Requires-Dist: platformdirs<5
|
|
246
247
|
Requires-Dist: pyarrow
|
|
247
248
|
Requires-Dist: pydantic<3,>=2.8.2
|
|
248
249
|
Requires-Dist: pyjwt<3,>=2.0.0
|
|
@@ -273,7 +274,7 @@ Requires-Dist: tensorflow<3,>=2.17.0; extra == "all"
|
|
|
273
274
|
Requires-Dist: tokenizers<1,>=0.15.1; extra == "all"
|
|
274
275
|
Requires-Dist: torch<3,>=2.0.1; extra == "all"
|
|
275
276
|
Requires-Dist: torchdata<1,>=0.4; extra == "all"
|
|
276
|
-
Requires-Dist: transformers
|
|
277
|
+
Requires-Dist: transformers!=4.51.3,<5,>=4.39.3; extra == "all"
|
|
277
278
|
Provides-Extra: altair
|
|
278
279
|
Requires-Dist: altair<6,>=5; extra == "altair"
|
|
279
280
|
Provides-Extra: catboost
|
|
@@ -298,7 +299,7 @@ Requires-Dist: sentence-transformers<4,>=2.7.0; extra == "transformers"
|
|
|
298
299
|
Requires-Dist: sentencepiece<0.2.0,>=0.1.95; extra == "transformers"
|
|
299
300
|
Requires-Dist: tokenizers<1,>=0.15.1; extra == "transformers"
|
|
300
301
|
Requires-Dist: torch<3,>=2.0.1; extra == "transformers"
|
|
301
|
-
Requires-Dist: transformers
|
|
302
|
+
Requires-Dist: transformers!=4.51.3,<5,>=4.39.3; extra == "transformers"
|
|
302
303
|
Dynamic: license-file
|
|
303
304
|
|
|
304
305
|
# Snowpark ML
|
|
@@ -409,7 +410,44 @@ NOTE: Version 1.7.0 is used as example here. Please choose the the latest versio
|
|
|
409
410
|
|
|
410
411
|
# Release History
|
|
411
412
|
|
|
412
|
-
## 1.9.
|
|
413
|
+
## 1.9.2
|
|
414
|
+
|
|
415
|
+
### Bug Fixes
|
|
416
|
+
|
|
417
|
+
- DataConnector: Fix `self._session` related errors inside Container Runtime.
|
|
418
|
+
- Registry: Fix a bug when trying to pass `None` to array (`pd.dtype('O')`) in signature and pandas data handler.
|
|
419
|
+
|
|
420
|
+
### New Features
|
|
421
|
+
|
|
422
|
+
- Experiment Tracking (PrPr): Automatically log the model, metrics, and parameters while training
|
|
423
|
+
XGBoost and LightGBM models.
|
|
424
|
+
|
|
425
|
+
```python
|
|
426
|
+
from snowflake.ml.experiment import ExperimentTracking
|
|
427
|
+
from snowflake.ml.experiment.callback import SnowflakeXgboostCallback, SnowflakeLightgbmCallback
|
|
428
|
+
|
|
429
|
+
exp = ExperimentTracking(session=sp_session, database_name="ML", schema_name="PUBLIC")
|
|
430
|
+
|
|
431
|
+
exp.set_experiment("MY_EXPERIMENT")
|
|
432
|
+
|
|
433
|
+
# XGBoost
|
|
434
|
+
callback = SnowflakeXgboostCallback(
|
|
435
|
+
exp, log_model=True, log_metrics=True, log_params=True, model_name="model_name", model_signature=sig
|
|
436
|
+
)
|
|
437
|
+
model = XGBClassifier(callbacks=[callback])
|
|
438
|
+
with exp.start_run():
|
|
439
|
+
model.fit(X, y, eval_set=[(X_test, y_test)])
|
|
440
|
+
|
|
441
|
+
# LightGBM
|
|
442
|
+
callback = SnowflakeLightgbmCallback(
|
|
443
|
+
exp, log_model=True, log_metrics=True, log_params=True, model_name="model_name", model_signature=sig
|
|
444
|
+
)
|
|
445
|
+
model = LGBMClassifier()
|
|
446
|
+
with exp.start_run():
|
|
447
|
+
model.fit(X, y, eval_set=[(X_test, y_test)], callbacks=[callback])
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
## 1.9.1 (07-18-2025)
|
|
413
451
|
|
|
414
452
|
### Bug Fixes
|
|
415
453
|
|