snowflake-ml-python 1.9.1__py3-none-any.whl → 1.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. snowflake/ml/_internal/utils/mixins.py +6 -4
  2. snowflake/ml/_internal/utils/service_logger.py +101 -1
  3. snowflake/ml/data/_internal/arrow_ingestor.py +4 -1
  4. snowflake/ml/data/data_connector.py +4 -34
  5. snowflake/ml/dataset/dataset.py +1 -1
  6. snowflake/ml/dataset/dataset_reader.py +2 -8
  7. snowflake/ml/experiment/__init__.py +3 -0
  8. snowflake/ml/experiment/callback.py +121 -0
  9. snowflake/ml/jobs/_utils/constants.py +15 -4
  10. snowflake/ml/jobs/_utils/payload_utils.py +150 -49
  11. snowflake/ml/jobs/_utils/scripts/constants.py +0 -22
  12. snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +125 -22
  13. snowflake/ml/jobs/_utils/spec_utils.py +1 -1
  14. snowflake/ml/jobs/_utils/stage_utils.py +30 -14
  15. snowflake/ml/jobs/_utils/types.py +64 -4
  16. snowflake/ml/jobs/job.py +22 -6
  17. snowflake/ml/jobs/manager.py +5 -3
  18. snowflake/ml/model/_client/ops/service_ops.py +17 -2
  19. snowflake/ml/model/_client/sql/service.py +1 -38
  20. snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -5
  21. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -0
  22. snowflake/ml/model/_signatures/pandas_handler.py +3 -0
  23. snowflake/ml/model/_signatures/utils.py +4 -0
  24. snowflake/ml/model/model_signature.py +2 -0
  25. snowflake/ml/version.py +1 -1
  26. {snowflake_ml_python-1.9.1.dist-info → snowflake_ml_python-1.9.2.dist-info}/METADATA +42 -4
  27. {snowflake_ml_python-1.9.1.dist-info → snowflake_ml_python-1.9.2.dist-info}/RECORD +30 -28
  28. {snowflake_ml_python-1.9.1.dist-info → snowflake_ml_python-1.9.2.dist-info}/WHEEL +0 -0
  29. {snowflake_ml_python-1.9.1.dist-info → snowflake_ml_python-1.9.2.dist-info}/licenses/LICENSE.txt +0 -0
  30. {snowflake_ml_python-1.9.1.dist-info → snowflake_ml_python-1.9.2.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,10 @@ _SNOWURL_PATH_RE = re.compile(
14
14
  r"(?P<path>versions(?:/(?P<version>[^/]+)(?:/(?P<relpath>.*))?)?)$"
15
15
  )
16
16
 
17
- _STAGEF_PATH_RE = re.compile(r"^@(?P<stage>~|%?\w+)(?:/(?P<relpath>[\w\-./]*))?$")
17
+ # Break long regex into two main parts
18
+ _STAGE_PATTERN = rf"~|%?(?:(?:{identifier._SF_IDENTIFIER}\.?){{,2}}{identifier._SF_IDENTIFIER})"
19
+ _RELPATH_PATTERN = r"[\w\-./]*"
20
+ _STAGEF_PATH_RE = re.compile(rf"^@(?P<stage>{_STAGE_PATTERN})(?:/(?P<relpath>{_RELPATH_PATTERN}))?$")
18
21
 
19
22
 
20
23
  class StagePath:
@@ -29,6 +32,14 @@ class StagePath:
29
32
  self._root = self._raw_path[0:start].rstrip("/") if relpath else self._raw_path.rstrip("/")
30
33
  self._path = Path(relpath or "")
31
34
 
35
+ @property
36
+ def parts(self) -> tuple[str, ...]:
37
+ return self._path.parts
38
+
39
+ @property
40
+ def name(self) -> str:
41
+ return self._path.name
42
+
32
43
  @property
33
44
  def parent(self) -> "StagePath":
34
45
  if self._path.parent == Path(""):
@@ -51,18 +62,28 @@ class StagePath:
51
62
  else:
52
63
  return f"{self.root}/{path}"
53
64
 
54
- def is_relative_to(self, path: Union[str, PathLike[str], "StagePath"]) -> bool:
65
+ def is_relative_to(self, *other: Union[str, os.PathLike[str]]) -> bool:
66
+ if not other:
67
+ raise TypeError("is_relative_to() requires at least one argument")
68
+ # For now, we only support a single argument, like pathlib.Path in Python < 3.12
69
+ path = other[0]
55
70
  stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
56
71
  if stage_path.root == self.root:
57
72
  return self._path.is_relative_to(stage_path._path)
58
73
  else:
59
74
  return False
60
75
 
61
- def relative_to(self, path: Union[str, PathLike[str], "StagePath"]) -> PurePath:
76
+ def relative_to(self, *other: Union[str, os.PathLike[str]]) -> PurePath:
77
+ if not other:
78
+ raise TypeError("relative_to() requires at least one argument")
79
+ if not self.is_relative_to(*other):
80
+ raise ValueError(f"{other} does not start with {self._raw_path}")
81
+ path = other[0]
62
82
  stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
63
83
  if self.root == stage_path.root:
64
84
  return self._path.relative_to(stage_path._path)
65
- raise ValueError(f"{self._raw_path} does not start with {stage_path._raw_path}")
85
+ else:
86
+ raise ValueError(f"{self._raw_path} does not start with {stage_path._raw_path}")
66
87
 
67
88
  def absolute(self) -> "StagePath":
68
89
  return self
@@ -88,6 +109,9 @@ class StagePath:
88
109
  def __str__(self) -> str:
89
110
  return self.as_posix()
90
111
 
112
+ def __repr__(self) -> str:
113
+ return f"StagePath('{self.as_posix()}')"
114
+
91
115
  def __eq__(self, other: object) -> bool:
92
116
  if not isinstance(other, StagePath):
93
117
  raise NotImplementedError
@@ -96,24 +120,16 @@ class StagePath:
96
120
  def __fspath__(self) -> str:
97
121
  return self._compose_path(self._path)
98
122
 
99
- def joinpath(self, *args: Union[str, PathLike[str], "StagePath"]) -> "StagePath":
123
+ def joinpath(self, *args: Union[str, PathLike[str]]) -> "StagePath":
100
124
  path = self
101
125
  for arg in args:
102
126
  path = path._make_child(arg)
103
127
  return path
104
128
 
105
- def _make_child(self, path: Union[str, PathLike[str], "StagePath"]) -> "StagePath":
129
+ def _make_child(self, path: Union[str, PathLike[str]]) -> "StagePath":
106
130
  stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
107
131
  if self.root == stage_path.root:
108
132
  child_path = self._path.joinpath(stage_path._path)
109
133
  return StagePath(self._compose_path(child_path))
110
134
  else:
111
135
  return stage_path
112
-
113
-
114
- def identify_stage_path(path: str) -> Union[StagePath, Path]:
115
- try:
116
- stage_path = StagePath(path)
117
- except ValueError:
118
- return Path(path)
119
- return stage_path
@@ -1,8 +1,7 @@
1
+ import os
1
2
  from dataclasses import dataclass
2
3
  from pathlib import PurePath
3
- from typing import Literal, Optional, Union
4
-
5
- from snowflake.ml.jobs._utils import stage_utils
4
+ from typing import Iterator, Literal, Optional, Protocol, Union, runtime_checkable
6
5
 
7
6
  JOB_STATUS = Literal[
8
7
  "PENDING",
@@ -15,9 +14,70 @@ JOB_STATUS = Literal[
15
14
  ]
16
15
 
17
16
 
17
+ @runtime_checkable
18
+ class PayloadPath(Protocol):
19
+ """A protocol for path-like objects used in this module, covering methods from pathlib.Path and StagePath."""
20
+
21
+ @property
22
+ def name(self) -> str:
23
+ ...
24
+
25
+ @property
26
+ def suffix(self) -> str:
27
+ ...
28
+
29
+ @property
30
+ def parent(self) -> "PayloadPath":
31
+ ...
32
+
33
+ def exists(self) -> bool:
34
+ ...
35
+
36
+ def is_file(self) -> bool:
37
+ ...
38
+
39
+ def is_absolute(self) -> bool:
40
+ ...
41
+
42
+ def absolute(self) -> "PayloadPath":
43
+ ...
44
+
45
+ def joinpath(self, *other: Union[str, os.PathLike[str]]) -> "PayloadPath":
46
+ ...
47
+
48
+ def as_posix(self) -> str:
49
+ ...
50
+
51
+ def is_relative_to(self, *other: Union[str, os.PathLike[str]]) -> bool:
52
+ ...
53
+
54
+ def relative_to(self, *other: Union[str, os.PathLike[str]]) -> PurePath:
55
+ ...
56
+
57
+ def __fspath__(self) -> str:
58
+ ...
59
+
60
+ def __str__(self) -> str:
61
+ ...
62
+
63
+ def __repr__(self) -> str:
64
+ ...
65
+
66
+
67
+ @dataclass
68
+ class PayloadSpec:
69
+ """Represents a payload item to be uploaded."""
70
+
71
+ source_path: PayloadPath
72
+ remote_relative_path: Optional[PurePath] = None
73
+
74
+ def __iter__(self) -> Iterator[Union[PayloadPath, Optional[PurePath]]]:
75
+ return iter((self.source_path, self.remote_relative_path))
76
+
77
+
18
78
  @dataclass(frozen=True)
19
79
  class PayloadEntrypoint:
20
- file_path: Union[PurePath, stage_utils.StagePath]
80
+ file_path: PayloadPath
21
81
  main_func: Optional[str]
22
82
 
23
83
 
snowflake/ml/jobs/job.py CHANGED
@@ -3,6 +3,7 @@ import logging
3
3
  import os
4
4
  import time
5
5
  from functools import cached_property
6
+ from pathlib import Path
6
7
  from typing import Any, Generic, Literal, Optional, TypeVar, Union, cast, overload
7
8
 
8
9
  import yaml
@@ -95,10 +96,24 @@ class MLJob(Generic[T], SerializableSessionMixin):
95
96
  @property
96
97
  def _result_path(self) -> str:
97
98
  """Get the job's result file location."""
98
- result_path = self._container_spec["env"].get(constants.RESULT_PATH_ENV_VAR)
99
- if result_path is None:
99
+ result_path_str = self._container_spec["env"].get(constants.RESULT_PATH_ENV_VAR)
100
+ if result_path_str is None:
100
101
  raise RuntimeError(f"Job {self.name} doesn't have a result path configured")
101
- return f"{self._stage_path}/{result_path}"
102
+ volume_mounts = self._container_spec["volumeMounts"]
103
+ stage_mount_str = next(v for v in volume_mounts if v.get("name") == constants.STAGE_VOLUME_NAME)["mountPath"]
104
+
105
+ result_path = Path(result_path_str)
106
+ stage_mount = Path(stage_mount_str)
107
+ try:
108
+ relative_path = result_path.relative_to(stage_mount)
109
+ except ValueError:
110
+ if result_path.is_absolute():
111
+ raise ValueError(
112
+ f"Result path {result_path} is absolute, but should be relative to stage mount {stage_mount}"
113
+ )
114
+ relative_path = result_path
115
+
116
+ return f"{self._stage_path}/{relative_path.as_posix()}"
102
117
 
103
118
  @overload
104
119
  def get_logs(
@@ -181,7 +196,10 @@ class MLJob(Generic[T], SerializableSessionMixin):
181
196
  start_time = time.monotonic()
182
197
  warning_shown = False
183
198
  while (status := self.status) not in TERMINAL_JOB_STATUSES:
184
- if status == "PENDING" and not warning_shown:
199
+ elapsed = time.monotonic() - start_time
200
+ if elapsed >= timeout >= 0:
201
+ raise TimeoutError(f"Job {self.name} did not complete within {timeout} seconds")
202
+ elif status == "PENDING" and not warning_shown and elapsed >= 2: # Only show warning after 2s
185
203
  pool_info = _get_compute_pool_info(self._session, self._compute_pool)
186
204
  if (pool_info.max_nodes - pool_info.active_nodes) < self.min_instances:
187
205
  logger.warning(
@@ -189,8 +207,6 @@ class MLJob(Generic[T], SerializableSessionMixin):
189
207
  f"{self.min_instances} nodes required). Job execution may be delayed."
190
208
  )
191
209
  warning_shown = True
192
- if timeout >= 0 and (elapsed := time.monotonic() - start_time) >= timeout:
193
- raise TimeoutError(f"Job {self.name} did not complete within {elapsed} seconds")
194
210
  time.sleep(delay)
195
211
  delay = min(delay * 1.2, constants.JOB_POLL_MAX_DELAY_SECONDS) # Exponential backoff
196
212
  return self.status
@@ -447,6 +447,10 @@ def _submit_job(
447
447
  spec_overrides = kwargs.pop("spec_overrides", None)
448
448
  enable_metrics = kwargs.pop("enable_metrics", True)
449
449
  query_warehouse = kwargs.pop("query_warehouse", None)
450
+ additional_payloads = kwargs.pop("additional_payloads", None)
451
+
452
+ if additional_payloads:
453
+ logger.warning("'additional_payloads' is in private preview since 1.9.1. Do not use it in production.")
450
454
 
451
455
  # Warn if there are unknown kwargs
452
456
  if kwargs:
@@ -477,9 +481,7 @@ def _submit_job(
477
481
 
478
482
  # Upload payload
479
483
  uploaded_payload = payload_utils.JobPayload(
480
- source,
481
- entrypoint=entrypoint,
482
- pip_requirements=pip_requirements,
484
+ source, entrypoint=entrypoint, pip_requirements=pip_requirements, additional_payloads=additional_payloads
483
485
  ).upload(session, stage_path)
484
486
 
485
487
  # Generate service spec
@@ -96,11 +96,13 @@ class ServiceLogMetadata:
96
96
  msg: str,
97
97
  is_model_build_service_done: bool,
98
98
  is_model_logger_service_done: bool,
99
+ operation_id: str,
99
100
  propagate: bool = False,
100
101
  ) -> None:
101
102
  to_service_logger = service_logger.get_logger(
102
103
  f"{to_service.display_service_name}-{to_service.instance_id}",
103
104
  to_service.log_color,
105
+ operation_id=operation_id,
104
106
  )
105
107
  to_service_logger.propagate = propagate
106
108
  self.service_logger = to_service_logger
@@ -196,6 +198,9 @@ class ServiceOperator:
196
198
  hf_model_args: Optional[HFModelArgs] = None,
197
199
  ) -> Union[str, async_job.AsyncJob]:
198
200
 
201
+ # Generate operation ID for this deployment
202
+ operation_id = service_logger.get_operation_id()
203
+
199
204
  # Fall back to the registry's database and schema if not provided
200
205
  database_name = database_name or self._database_name
201
206
  schema_name = schema_name or self._schema_name
@@ -327,6 +332,7 @@ class ServiceOperator:
327
332
  model_inference_service=model_inference_service,
328
333
  model_inference_service_exists=model_inference_service_exists,
329
334
  force_rebuild=force_rebuild,
335
+ operation_id=operation_id,
330
336
  statement_params=statement_params,
331
337
  )
332
338
 
@@ -347,6 +353,7 @@ class ServiceOperator:
347
353
  model_inference_service: ServiceLogInfo,
348
354
  model_inference_service_exists: bool,
349
355
  force_rebuild: bool,
356
+ operation_id: str,
350
357
  statement_params: Optional[dict[str, Any]] = None,
351
358
  ) -> threading.Thread:
352
359
  """Start the service log streaming in a separate thread."""
@@ -360,6 +367,7 @@ class ServiceOperator:
360
367
  model_inference_service,
361
368
  model_inference_service_exists,
362
369
  force_rebuild,
370
+ operation_id,
363
371
  statement_params,
364
372
  ),
365
373
  )
@@ -372,6 +380,7 @@ class ServiceOperator:
372
380
  service_log_meta: ServiceLogMetadata,
373
381
  model_build_service: ServiceLogInfo,
374
382
  model_inference_service: ServiceLogInfo,
383
+ operation_id: str,
375
384
  statement_params: Optional[dict[str, Any]] = None,
376
385
  ) -> None:
377
386
  """Helper function to fetch logs and update the service log metadata if needed.
@@ -386,6 +395,7 @@ class ServiceOperator:
386
395
  service_log_meta: The ServiceLogMetadata holds the state of the service log metadata.
387
396
  model_build_service: The ServiceLogInfo for the model build service.
388
397
  model_inference_service: The ServiceLogInfo for the model inference service.
398
+ operation_id: The operation ID for the service, e.g. "model_deploy_a1b2c3d4_1703875200"
389
399
  statement_params: The statement parameters to use for the service client.
390
400
  """
391
401
 
@@ -415,6 +425,7 @@ class ServiceOperator:
415
425
  "Model build is not rebuilding the inference image, but using a previously built image.",
416
426
  is_model_build_service_done=True,
417
427
  is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
428
+ operation_id=operation_id,
418
429
  )
419
430
 
420
431
  try:
@@ -488,6 +499,7 @@ class ServiceOperator:
488
499
  f"Model Logger service {service.display_service_name} complete.",
489
500
  is_model_build_service_done=False,
490
501
  is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
502
+ operation_id=operation_id,
491
503
  )
492
504
  # check if model build service is done
493
505
  # and transition the service log metadata to the model inference service
@@ -497,6 +509,7 @@ class ServiceOperator:
497
509
  f"Image build service {service.display_service_name} complete.",
498
510
  is_model_build_service_done=True,
499
511
  is_model_logger_service_done=service_log_meta.is_model_logger_service_done,
512
+ operation_id=operation_id,
500
513
  )
501
514
  else:
502
515
  module_logger.warning(f"Service {service.display_service_name} is done, but not transitioning.")
@@ -509,6 +522,7 @@ class ServiceOperator:
509
522
  model_inference_service: ServiceLogInfo,
510
523
  model_inference_service_exists: bool,
511
524
  force_rebuild: bool,
525
+ operation_id: str,
512
526
  statement_params: Optional[dict[str, Any]] = None,
513
527
  ) -> None:
514
528
  """Stream service logs while the async job is running."""
@@ -516,14 +530,14 @@ class ServiceOperator:
516
530
  model_build_service_logger = service_logger.get_logger( # BuildJobName
517
531
  model_build_service.display_service_name,
518
532
  model_build_service.log_color,
533
+ operation_id=operation_id,
519
534
  )
520
- model_build_service_logger.propagate = False
521
535
  if model_logger_service:
522
536
  model_logger_service_logger = service_logger.get_logger( # ModelLoggerName
523
537
  model_logger_service.display_service_name,
524
538
  model_logger_service.log_color,
539
+ operation_id=operation_id,
525
540
  )
526
- model_logger_service_logger.propagate = False
527
541
 
528
542
  service_log_meta = ServiceLogMetadata(
529
543
  service_logger=model_logger_service_logger,
@@ -557,6 +571,7 @@ class ServiceOperator:
557
571
  force_rebuild=force_rebuild,
558
572
  model_build_service=model_build_service,
559
573
  model_inference_service=model_inference_service,
574
+ operation_id=operation_id,
560
575
  statement_params=statement_params,
561
576
  )
562
577
  except Exception as ex:
@@ -2,7 +2,7 @@ import dataclasses
2
2
  import enum
3
3
  import logging
4
4
  import textwrap
5
- from typing import Any, Optional, Union
5
+ from typing import Any, Optional
6
6
 
7
7
  from snowflake import snowpark
8
8
  from snowflake.ml._internal.utils import (
@@ -69,43 +69,6 @@ class ServiceSQLClient(_base._BaseSQLClient):
69
69
  CONTAINER_STATUS = "status"
70
70
  MESSAGE = "message"
71
71
 
72
- def build_model_container(
73
- self,
74
- *,
75
- database_name: Optional[sql_identifier.SqlIdentifier],
76
- schema_name: Optional[sql_identifier.SqlIdentifier],
77
- model_name: sql_identifier.SqlIdentifier,
78
- version_name: sql_identifier.SqlIdentifier,
79
- compute_pool_name: sql_identifier.SqlIdentifier,
80
- image_repo_database_name: Optional[sql_identifier.SqlIdentifier],
81
- image_repo_schema_name: Optional[sql_identifier.SqlIdentifier],
82
- image_repo_name: sql_identifier.SqlIdentifier,
83
- gpu: Optional[Union[str, int]],
84
- force_rebuild: bool,
85
- external_access_integration: sql_identifier.SqlIdentifier,
86
- statement_params: Optional[dict[str, Any]] = None,
87
- ) -> None:
88
- actual_image_repo_database = image_repo_database_name or self._database_name
89
- actual_image_repo_schema = image_repo_schema_name or self._schema_name
90
- actual_model_database = database_name or self._database_name
91
- actual_model_schema = schema_name or self._schema_name
92
- fq_model_name = self.fully_qualified_object_name(actual_model_database, actual_model_schema, model_name)
93
- fq_image_repo_name = identifier.get_schema_level_object_identifier(
94
- actual_image_repo_database.identifier(),
95
- actual_image_repo_schema.identifier(),
96
- image_repo_name.identifier(),
97
- )
98
- is_gpu_str = "TRUE" if gpu else "FALSE"
99
- force_rebuild_str = "TRUE" if force_rebuild else "FALSE"
100
- query_result_checker.SqlResultValidator(
101
- self._session,
102
- (
103
- f"CALL SYSTEM$BUILD_MODEL_CONTAINER('{fq_model_name}', '{version_name}', '{compute_pool_name}',"
104
- f" '{fq_image_repo_name}', '{is_gpu_str}', '{force_rebuild_str}', '', '{external_access_integration}')"
105
- ),
106
- statement_params=statement_params,
107
- ).has_dimensions(expected_rows=1, expected_cols=1).validate()
108
-
109
72
  def deploy_model(
110
73
  self,
111
74
  *,
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import os
2
3
  import warnings
3
4
  from typing import TYPE_CHECKING, Callable, Optional, Sequence, Union, cast, final
@@ -24,6 +25,8 @@ if TYPE_CHECKING:
24
25
  import sklearn.base
25
26
  import sklearn.pipeline
26
27
 
28
+ logger = logging.getLogger(__name__)
29
+
27
30
 
28
31
  def _unpack_container_runtime_pipeline(model: "sklearn.pipeline.Pipeline") -> "sklearn.pipeline.Pipeline":
29
32
  new_steps = []
@@ -201,13 +204,13 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
201
204
  explain_target_method = str(explain_target_method) # mypy complains if we don't cast to str here
202
205
 
203
206
  input_signature = handlers_utils.get_input_signature(model_meta, explain_target_method)
204
- transformed_background_data = _apply_transforms_up_to_last_step(
205
- model=model,
206
- data=background_data,
207
- input_feature_names=[spec.name for spec in input_signature],
208
- )
209
207
 
210
208
  try:
209
+ transformed_background_data = _apply_transforms_up_to_last_step(
210
+ model=model,
211
+ data=background_data,
212
+ input_feature_names=[spec.name for spec in input_signature],
213
+ )
211
214
  model_meta = handlers_utils.add_inferred_explain_method_signature(
212
215
  model_meta=model_meta,
213
216
  explain_method="explain",
@@ -217,6 +220,7 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
217
220
  output_feature_names=transformed_background_data.columns,
218
221
  )
219
222
  except Exception:
223
+ logger.debug("Explainability is disabled due to an exception.", exc_info=True)
220
224
  if kwargs.get("enable_explainability", None):
221
225
  # user explicitly enabled explainability, so we should raise the error
222
226
  raise ValueError(
@@ -13,6 +13,7 @@ REQUIREMENTS = [
13
13
  "numpy>=1.23,<3",
14
14
  "packaging>=20.9,<25",
15
15
  "pandas>=2.1.4,<3",
16
+ "platformdirs<5",
16
17
  "pyarrow",
17
18
  "pydantic>=2.8.2, <3",
18
19
  "pyjwt>=2.0.0, <3",
@@ -86,6 +86,9 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
86
86
  df_col_data = utils.series_dropna(df_col_data)
87
87
  df_col_dtype = df_col_data.dtype
88
88
 
89
+ if utils.check_if_series_is_empty(df_col_data):
90
+ continue
91
+
89
92
  if df_col_dtype == np.dtype("O"):
90
93
  # Check if all objects have the same type
91
94
  if not all(isinstance(data_row, type(df_col_data.iloc[0])) for data_row in df_col_data):
@@ -412,3 +412,7 @@ def infer_dict(name: str, data: dict[str, Any]) -> core.FeatureGroupSpec:
412
412
  specs.append(core.FeatureSpec(name=key, dtype=core.DataType.from_numpy_type(np.array(value).dtype)))
413
413
 
414
414
  return core.FeatureGroupSpec(name=name, specs=specs)
415
+
416
+
417
+ def check_if_series_is_empty(series: Optional[pd.Series]) -> bool:
418
+ return series is None or series.empty
@@ -272,6 +272,8 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
272
272
  ),
273
273
  )
274
274
  else:
275
+ if utils.check_if_series_is_empty(data_col):
276
+ continue
275
277
  if isinstance(data_col.iloc[0], list):
276
278
  if not ft_shape:
277
279
  raise snowml_exceptions.SnowflakeMLException(
snowflake/ml/version.py CHANGED
@@ -1,2 +1,2 @@
1
1
  # This is parsed by regex in conda recipe meta file. Make sure not to break it.
2
- VERSION = "1.9.1"
2
+ VERSION = "1.9.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowflake-ml-python
3
- Version: 1.9.1
3
+ Version: 1.9.2
4
4
  Summary: The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.
5
5
  Author-email: "Snowflake, Inc" <support@snowflake.com>
6
6
  License:
@@ -243,6 +243,7 @@ Requires-Dist: importlib_resources<7,>=6.1.1
243
243
  Requires-Dist: numpy<3,>=1.23
244
244
  Requires-Dist: packaging<25,>=20.9
245
245
  Requires-Dist: pandas<3,>=2.1.4
246
+ Requires-Dist: platformdirs<5
246
247
  Requires-Dist: pyarrow
247
248
  Requires-Dist: pydantic<3,>=2.8.2
248
249
  Requires-Dist: pyjwt<3,>=2.0.0
@@ -273,7 +274,7 @@ Requires-Dist: tensorflow<3,>=2.17.0; extra == "all"
273
274
  Requires-Dist: tokenizers<1,>=0.15.1; extra == "all"
274
275
  Requires-Dist: torch<3,>=2.0.1; extra == "all"
275
276
  Requires-Dist: torchdata<1,>=0.4; extra == "all"
276
- Requires-Dist: transformers<5,>=4.39.3; extra == "all"
277
+ Requires-Dist: transformers!=4.51.3,<5,>=4.39.3; extra == "all"
277
278
  Provides-Extra: altair
278
279
  Requires-Dist: altair<6,>=5; extra == "altair"
279
280
  Provides-Extra: catboost
@@ -298,7 +299,7 @@ Requires-Dist: sentence-transformers<4,>=2.7.0; extra == "transformers"
298
299
  Requires-Dist: sentencepiece<0.2.0,>=0.1.95; extra == "transformers"
299
300
  Requires-Dist: tokenizers<1,>=0.15.1; extra == "transformers"
300
301
  Requires-Dist: torch<3,>=2.0.1; extra == "transformers"
301
- Requires-Dist: transformers<5,>=4.39.3; extra == "transformers"
302
+ Requires-Dist: transformers!=4.51.3,<5,>=4.39.3; extra == "transformers"
302
303
  Dynamic: license-file
303
304
 
304
305
  # Snowpark ML
@@ -409,7 +410,44 @@ NOTE: Version 1.7.0 is used as example here. Please choose the the latest versio
409
410
 
410
411
  # Release History
411
412
 
412
- ## 1.9.1
413
+ ## 1.9.2
414
+
415
+ ### Bug Fixes
416
+
417
+ - DataConnector: Fix `self._session` related errors inside Container Runtime.
418
+ - Registry: Fix a bug when trying to pass `None` to array (`pd.dtype('O')`) in signature and pandas data handler.
419
+
420
+ ### New Features
421
+
422
+ - Experiment Tracking (PrPr): Automatically log the model, metrics, and parameters while training
423
+ XGBoost and LightGBM models.
424
+
425
+ ```python
426
+ from snowflake.ml.experiment import ExperimentTracking
427
+ from snowflake.ml.experiment.callback import SnowflakeXgboostCallback, SnowflakeLightgbmCallback
428
+
429
+ exp = ExperimentTracking(session=sp_session, database_name="ML", schema_name="PUBLIC")
430
+
431
+ exp.set_experiment("MY_EXPERIMENT")
432
+
433
+ # XGBoost
434
+ callback = SnowflakeXgboostCallback(
435
+ exp, log_model=True, log_metrics=True, log_params=True, model_name="model_name", model_signature=sig
436
+ )
437
+ model = XGBClassifier(callbacks=[callback])
438
+ with exp.start_run():
439
+ model.fit(X, y, eval_set=[(X_test, y_test)])
440
+
441
+ # LightGBM
442
+ callback = SnowflakeLightgbmCallback(
443
+ exp, log_model=True, log_metrics=True, log_params=True, model_name="model_name", model_signature=sig
444
+ )
445
+ model = LGBMClassifier()
446
+ with exp.start_run():
447
+ model.fit(X, y, eval_set=[(X_test, y_test)], callbacks=[callback])
448
+ ```
449
+
450
+ ## 1.9.1 (07-18-2025)
413
451
 
414
452
  ### Bug Fixes
415
453