wandb 0.22.1__py3-none-win32.whl → 0.22.3__py3-none-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. wandb/__init__.py +1 -1
  2. wandb/__init__.pyi +7 -4
  3. wandb/_pydantic/__init__.py +8 -1
  4. wandb/_pydantic/base.py +54 -18
  5. wandb/_pydantic/field_types.py +8 -3
  6. wandb/_pydantic/pagination.py +46 -0
  7. wandb/_pydantic/utils.py +2 -2
  8. wandb/apis/public/api.py +24 -19
  9. wandb/apis/public/artifacts.py +259 -270
  10. wandb/apis/public/registries/_utils.py +40 -54
  11. wandb/apis/public/registries/registries_search.py +70 -85
  12. wandb/apis/public/registries/registry.py +173 -156
  13. wandb/apis/public/runs.py +27 -6
  14. wandb/apis/public/utils.py +43 -20
  15. wandb/automations/_generated/create_automation.py +2 -2
  16. wandb/automations/_generated/create_generic_webhook_integration.py +4 -4
  17. wandb/automations/_generated/delete_automation.py +2 -2
  18. wandb/automations/_generated/fragments.py +31 -52
  19. wandb/automations/_generated/generic_webhook_integrations_by_entity.py +3 -3
  20. wandb/automations/_generated/get_automations.py +3 -3
  21. wandb/automations/_generated/get_automations_by_entity.py +3 -3
  22. wandb/automations/_generated/input_types.py +9 -9
  23. wandb/automations/_generated/integrations_by_entity.py +3 -3
  24. wandb/automations/_generated/operations.py +6 -6
  25. wandb/automations/_generated/slack_integrations_by_entity.py +3 -3
  26. wandb/automations/_generated/update_automation.py +2 -2
  27. wandb/automations/_utils.py +3 -3
  28. wandb/automations/actions.py +3 -3
  29. wandb/automations/automations.py +6 -5
  30. wandb/bin/gpu_stats.exe +0 -0
  31. wandb/bin/wandb-core +0 -0
  32. wandb/cli/beta.py +23 -3
  33. wandb/cli/beta_leet.py +75 -0
  34. wandb/cli/beta_sync.py +1 -1
  35. wandb/cli/cli.py +34 -7
  36. wandb/errors/term.py +8 -8
  37. wandb/jupyter.py +0 -51
  38. wandb/old/settings.py +6 -6
  39. wandb/proto/v3/wandb_api_pb2.py +86 -0
  40. wandb/proto/v3/wandb_server_pb2.py +38 -37
  41. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  42. wandb/proto/v3/wandb_sync_pb2.py +19 -6
  43. wandb/proto/v4/wandb_api_pb2.py +37 -0
  44. wandb/proto/v4/wandb_server_pb2.py +38 -37
  45. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  46. wandb/proto/v4/wandb_sync_pb2.py +10 -6
  47. wandb/proto/v5/wandb_api_pb2.py +38 -0
  48. wandb/proto/v5/wandb_server_pb2.py +38 -37
  49. wandb/proto/v5/wandb_settings_pb2.py +2 -2
  50. wandb/proto/v5/wandb_sync_pb2.py +10 -6
  51. wandb/proto/v6/wandb_api_pb2.py +48 -0
  52. wandb/proto/v6/wandb_server_pb2.py +38 -37
  53. wandb/proto/v6/wandb_settings_pb2.py +2 -2
  54. wandb/proto/v6/wandb_sync_pb2.py +10 -6
  55. wandb/proto/wandb_api_pb2.py +18 -0
  56. wandb/proto/wandb_generate_proto.py +1 -0
  57. wandb/sdk/artifacts/_generated/__init__.py +96 -40
  58. wandb/sdk/artifacts/_generated/add_aliases.py +3 -3
  59. wandb/sdk/artifacts/_generated/add_artifact_collection_tags.py +26 -0
  60. wandb/sdk/artifacts/_generated/artifact_by_id.py +2 -2
  61. wandb/sdk/artifacts/_generated/artifact_by_name.py +3 -3
  62. wandb/sdk/artifacts/_generated/artifact_collection_membership_file_urls.py +27 -8
  63. wandb/sdk/artifacts/_generated/artifact_collection_membership_files.py +27 -8
  64. wandb/sdk/artifacts/_generated/artifact_created_by.py +7 -20
  65. wandb/sdk/artifacts/_generated/artifact_file_urls.py +19 -6
  66. wandb/sdk/artifacts/_generated/artifact_membership_by_name.py +26 -0
  67. wandb/sdk/artifacts/_generated/artifact_type.py +5 -5
  68. wandb/sdk/artifacts/_generated/artifact_used_by.py +8 -17
  69. wandb/sdk/artifacts/_generated/artifact_version_files.py +19 -8
  70. wandb/sdk/artifacts/_generated/delete_aliases.py +3 -3
  71. wandb/sdk/artifacts/_generated/delete_artifact.py +4 -4
  72. wandb/sdk/artifacts/_generated/delete_artifact_collection_tags.py +23 -0
  73. wandb/sdk/artifacts/_generated/delete_artifact_portfolio.py +4 -4
  74. wandb/sdk/artifacts/_generated/delete_artifact_sequence.py +4 -4
  75. wandb/sdk/artifacts/_generated/delete_registry.py +21 -0
  76. wandb/sdk/artifacts/_generated/fetch_artifact_manifest.py +8 -20
  77. wandb/sdk/artifacts/_generated/fetch_linked_artifacts.py +13 -35
  78. wandb/sdk/artifacts/_generated/fetch_org_info_from_entity.py +28 -0
  79. wandb/sdk/artifacts/_generated/fetch_registries.py +18 -8
  80. wandb/sdk/{projects → artifacts}/_generated/fetch_registry.py +4 -4
  81. wandb/sdk/artifacts/_generated/fragments.py +183 -333
  82. wandb/sdk/artifacts/_generated/input_types.py +133 -7
  83. wandb/sdk/artifacts/_generated/link_artifact.py +5 -5
  84. wandb/sdk/artifacts/_generated/operations.py +1053 -548
  85. wandb/sdk/artifacts/_generated/project_artifact_collection.py +9 -77
  86. wandb/sdk/artifacts/_generated/project_artifact_collections.py +21 -9
  87. wandb/sdk/artifacts/_generated/project_artifact_type.py +3 -3
  88. wandb/sdk/artifacts/_generated/project_artifact_types.py +19 -6
  89. wandb/sdk/artifacts/_generated/project_artifacts.py +7 -8
  90. wandb/sdk/artifacts/_generated/registry_collections.py +21 -9
  91. wandb/sdk/artifacts/_generated/registry_versions.py +20 -9
  92. wandb/sdk/artifacts/_generated/rename_registry.py +25 -0
  93. wandb/sdk/artifacts/_generated/run_input_artifacts.py +5 -9
  94. wandb/sdk/artifacts/_generated/run_output_artifacts.py +5 -9
  95. wandb/sdk/artifacts/_generated/type_info.py +2 -2
  96. wandb/sdk/artifacts/_generated/unlink_artifact.py +3 -5
  97. wandb/sdk/artifacts/_generated/update_artifact.py +3 -3
  98. wandb/sdk/artifacts/_generated/update_artifact_collection_type.py +28 -0
  99. wandb/sdk/artifacts/_generated/update_artifact_portfolio.py +7 -16
  100. wandb/sdk/artifacts/_generated/update_artifact_sequence.py +7 -16
  101. wandb/sdk/artifacts/_generated/upsert_registry.py +25 -0
  102. wandb/sdk/artifacts/_gqlutils.py +170 -6
  103. wandb/sdk/artifacts/_models/__init__.py +9 -0
  104. wandb/sdk/artifacts/_models/artifact_collection.py +109 -0
  105. wandb/sdk/artifacts/_models/manifest.py +26 -0
  106. wandb/sdk/artifacts/_models/pagination.py +26 -0
  107. wandb/sdk/artifacts/_models/registry.py +100 -0
  108. wandb/sdk/artifacts/_validators.py +45 -27
  109. wandb/sdk/artifacts/artifact.py +249 -244
  110. wandb/sdk/artifacts/artifact_file_cache.py +1 -1
  111. wandb/sdk/artifacts/artifact_manifest.py +37 -32
  112. wandb/sdk/artifacts/artifact_manifest_entry.py +82 -133
  113. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +43 -61
  114. wandb/sdk/artifacts/storage_handler.py +18 -12
  115. wandb/sdk/artifacts/storage_handlers/azure_handler.py +11 -6
  116. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +17 -12
  117. wandb/sdk/artifacts/storage_handlers/http_handler.py +9 -4
  118. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +10 -6
  119. wandb/sdk/artifacts/storage_handlers/multi_handler.py +5 -4
  120. wandb/sdk/artifacts/storage_handlers/s3_handler.py +10 -8
  121. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +6 -4
  122. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +24 -21
  123. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +4 -2
  124. wandb/sdk/artifacts/storage_policies/_multipart.py +187 -0
  125. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +61 -242
  126. wandb/sdk/artifacts/storage_policy.py +25 -12
  127. wandb/sdk/data_types/image.py +2 -2
  128. wandb/sdk/data_types/object_3d.py +67 -2
  129. wandb/sdk/interface/interface.py +72 -64
  130. wandb/sdk/interface/interface_queue.py +27 -18
  131. wandb/sdk/interface/interface_shared.py +61 -23
  132. wandb/sdk/interface/interface_sock.py +9 -5
  133. wandb/sdk/internal/_generated/server_features_query.py +4 -4
  134. wandb/sdk/internal/job_builder.py +27 -10
  135. wandb/sdk/internal/sender.py +4 -1
  136. wandb/sdk/launch/create_job.py +2 -1
  137. wandb/sdk/launch/inputs/schema.py +13 -10
  138. wandb/sdk/lib/apikey.py +8 -12
  139. wandb/sdk/lib/asyncio_compat.py +1 -1
  140. wandb/sdk/lib/asyncio_manager.py +5 -5
  141. wandb/sdk/lib/console_capture.py +38 -30
  142. wandb/sdk/lib/progress.py +151 -125
  143. wandb/sdk/lib/retry.py +3 -2
  144. wandb/sdk/lib/service/service_connection.py +2 -2
  145. wandb/sdk/lib/wb_logging.py +2 -1
  146. wandb/sdk/mailbox/mailbox.py +1 -1
  147. wandb/sdk/wandb_init.py +11 -14
  148. wandb/sdk/wandb_run.py +14 -48
  149. wandb/sdk/wandb_settings.py +114 -30
  150. {wandb-0.22.1.dist-info → wandb-0.22.3.dist-info}/METADATA +2 -1
  151. {wandb-0.22.1.dist-info → wandb-0.22.3.dist-info}/RECORD +154 -146
  152. wandb/sdk/artifacts/_generated/artifact_via_membership_by_name.py +0 -26
  153. wandb/sdk/artifacts/_generated/create_artifact_collection_tag_assignments.py +0 -36
  154. wandb/sdk/artifacts/_generated/delete_artifact_collection_tag_assignments.py +0 -25
  155. wandb/sdk/artifacts/_generated/move_artifact_collection.py +0 -35
  156. wandb/sdk/projects/_generated/__init__.py +0 -26
  157. wandb/sdk/projects/_generated/delete_project.py +0 -22
  158. wandb/sdk/projects/_generated/enums.py +0 -4
  159. wandb/sdk/projects/_generated/fragments.py +0 -41
  160. wandb/sdk/projects/_generated/input_types.py +0 -13
  161. wandb/sdk/projects/_generated/operations.py +0 -88
  162. wandb/sdk/projects/_generated/rename_project.py +0 -27
  163. wandb/sdk/projects/_generated/upsert_registry_project.py +0 -27
  164. {wandb-0.22.1.dist-info → wandb-0.22.3.dist-info}/WHEEL +0 -0
  165. {wandb-0.22.1.dist-info → wandb-0.22.3.dist-info}/entry_points.txt +0 -0
  166. {wandb-0.22.1.dist-info → wandb-0.22.3.dist-info}/licenses/LICENSE +0 -0
@@ -147,7 +147,7 @@ class ArtifactFileCache:
147
147
  if temp_size:
148
148
  wandb.termwarn(
149
149
  f"Cache contains {util.to_human_size(temp_size)} of temporary files. "
150
- "Run `wandb artifact cleanup --remove-temp` to remove them."
150
+ "Run `wandb artifact cache cleanup --remove-temp` to remove them."
151
151
  )
152
152
 
153
153
  entries = []
@@ -2,75 +2,80 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import TYPE_CHECKING, Mapping
5
+ from abc import ABC, abstractmethod
6
+ from typing import TYPE_CHECKING, Any, Dict
7
+
8
+ from pydantic import Field
9
+ from typing_extensions import Annotated
6
10
 
7
11
  from wandb.sdk.internal.internal_api import Api as InternalApi
8
12
  from wandb.sdk.lib.hashutil import HexMD5
9
13
 
14
+ from ._models.base_model import ArtifactsBase
15
+
10
16
  if TYPE_CHECKING:
11
- from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
12
- from wandb.sdk.artifacts.storage_policy import StoragePolicy
17
+ from .artifact_manifest_entry import ArtifactManifestEntry
18
+ from .storage_policy import StoragePolicy
13
19
 
14
20
 
15
- class ArtifactManifest:
16
- entries: dict[str, ArtifactManifestEntry]
21
+ class ArtifactManifest(ArtifactsBase, ABC):
22
+ # Note: this can't be named "version" since it conflicts with the prior `version()` classmethod.
23
+ manifest_version: Annotated[Any, Field(repr=False)]
24
+ entries: Dict[str, ArtifactManifestEntry] = Field(default_factory=dict) # noqa: UP006
25
+
26
+ storage_policy: Annotated[StoragePolicy, Field(exclude=True, repr=False)]
17
27
 
18
28
  @classmethod
29
+ def version(cls) -> int:
30
+ return cls.model_fields["manifest_version"].default
31
+
32
+ @classmethod
33
+ @abstractmethod
19
34
  def from_manifest_json(
20
- cls, manifest_json: dict, api: InternalApi | None = None
35
+ cls, manifest_json: dict[str, Any], api: InternalApi | None = None
21
36
  ) -> ArtifactManifest:
22
- if "version" not in manifest_json:
37
+ if (version := manifest_json.get("version")) is None:
23
38
  raise ValueError("Invalid manifest format. Must contain version field.")
24
- version = manifest_json["version"]
39
+
25
40
  for sub in cls.__subclasses__():
26
41
  if sub.version() == version:
27
42
  return sub.from_manifest_json(manifest_json, api=api)
28
43
  raise ValueError("Invalid manifest version.")
29
44
 
30
- @classmethod
31
- def version(cls) -> int:
32
- raise NotImplementedError
33
-
34
- def __init__(
35
- self,
36
- storage_policy: StoragePolicy,
37
- entries: Mapping[str, ArtifactManifestEntry] | None = None,
38
- ) -> None:
39
- self.storage_policy = storage_policy
40
- self.entries = dict(entries) if entries else {}
41
-
42
45
  def __len__(self) -> int:
43
46
  return len(self.entries)
44
47
 
45
- def to_manifest_json(self) -> dict:
48
+ @abstractmethod
49
+ def to_manifest_json(self) -> dict[str, Any]:
46
50
  raise NotImplementedError
47
51
 
52
+ @abstractmethod
48
53
  def digest(self) -> HexMD5:
49
54
  raise NotImplementedError
50
55
 
56
+ @abstractmethod
57
+ def size(self) -> int:
58
+ raise NotImplementedError
59
+
51
60
  def add_entry(self, entry: ArtifactManifestEntry, overwrite: bool = False) -> None:
52
- path = entry.path
53
61
  if (
54
62
  (not overwrite)
55
- and (old_entry := self.entries.get(path))
63
+ and (old_entry := self.entries.get(entry.path))
56
64
  and (entry.digest != old_entry.digest)
57
65
  ):
58
- raise ValueError(f"Cannot add the same path twice: {path!r}")
59
- self.entries[path] = entry
66
+ raise ValueError(f"Cannot add the same path twice: {entry.path!r}")
67
+ self.entries[entry.path] = entry
60
68
 
61
69
  def remove_entry(self, entry: ArtifactManifestEntry) -> None:
62
70
  try:
63
71
  del self.entries[entry.path]
64
72
  except LookupError:
65
- raise FileNotFoundError(f"Cannot remove missing entry: '{entry.path}'")
73
+ raise FileNotFoundError(f"Cannot remove missing entry: {entry.path!r}")
66
74
 
67
75
  def get_entry_by_path(self, path: str) -> ArtifactManifestEntry | None:
68
76
  return self.entries.get(path)
69
77
 
70
78
  def get_entries_in_directory(self, directory: str) -> list[ArtifactManifestEntry]:
71
- return [
72
- entry
73
- for key, entry in self.entries.items()
74
- # entry keys (paths) use forward slash even for windows
75
- if key.startswith(f"{directory}/")
76
- ]
79
+ # entry keys (paths) use forward slash even for windows
80
+ dir_prefix = f"{directory}/"
81
+ return [obj for key, obj in self.entries.items() if key.startswith(dir_prefix)]
@@ -1,17 +1,24 @@
1
1
  """Artifact manifest entry."""
2
2
 
3
+ # Older-style type annotations required for Pydantic v1 / python 3.8 compatibility.
4
+ # ruff: noqa: UP006, UP007, UP045
5
+
3
6
  from __future__ import annotations
4
7
 
5
8
  import concurrent.futures
6
9
  import hashlib
7
- import json
8
10
  import logging
9
11
  import os
10
12
  from contextlib import suppress
11
- from pathlib import Path
12
- from typing import TYPE_CHECKING
13
+ from os.path import getsize
14
+ from typing import TYPE_CHECKING, Any, Dict, Final, Optional, Union
13
15
  from urllib.parse import urlparse
14
16
 
17
+ from pydantic import Field, NonNegativeInt
18
+ from typing_extensions import Annotated, Self
19
+
20
+ from wandb._pydantic import field_validator, model_validator
21
+ from wandb._strutils import nameof
15
22
  from wandb.proto.wandb_deprecated import Deprecated
16
23
  from wandb.sdk.lib.deprecate import deprecate
17
24
  from wandb.sdk.lib.filesystem import copy_or_overwrite_changed
@@ -22,27 +29,18 @@ from wandb.sdk.lib.hashutil import (
22
29
  hex_to_b64_id,
23
30
  md5_file_b64,
24
31
  )
25
- from wandb.sdk.lib.paths import FilePathStr, LogicalPath, StrPath, URIStr
32
+ from wandb.sdk.lib.paths import FilePathStr, LogicalPath, URIStr
26
33
 
27
- logger = logging.getLogger(__name__)
34
+ from ._models.base_model import ArtifactsBase
28
35
 
29
36
  if TYPE_CHECKING:
30
- from typing_extensions import TypedDict
37
+ from .artifact import Artifact
31
38
 
32
- from wandb.sdk.artifacts.artifact import Artifact
33
39
 
34
- class ArtifactManifestEntryDict(TypedDict, total=False):
35
- path: str
36
- digest: str
37
- skip_cache: bool
38
- ref: str
39
- birthArtifactID: str
40
- size: int
41
- extra: dict
42
- local_path: str
40
+ logger = logging.getLogger(__name__)
43
41
 
44
42
 
45
- _WB_ARTIFACT_SCHEME = "wandb-artifact"
43
+ _WB_ARTIFACT_SCHEME: Final[str] = "wandb-artifact"
46
44
 
47
45
 
48
46
  def _checksum_cache_path(file_path: str) -> str:
@@ -87,76 +85,54 @@ def _write_cached_checksum(file_path: str, checksum: str) -> None:
87
85
  logger.debug(f"Failed to write checksum cache for {file_path!r}")
88
86
 
89
87
 
90
- class ArtifactManifestEntry:
91
- """A single entry in an artifact manifest."""
88
+ class ArtifactManifestEntry(ArtifactsBase):
89
+ """A single entry in an artifact manifest.
90
+
91
+ External code should avoid instantiating this class directly.
92
+ """
92
93
 
93
94
  path: LogicalPath
94
- digest: B64MD5 | URIStr | FilePathStr | ETag
95
- skip_cache: bool
96
- ref: FilePathStr | URIStr | None
97
- birth_artifact_id: str | None
98
- size: int | None
99
- extra: dict
100
- local_path: str | None
101
-
102
- _parent_artifact: Artifact | None = None
103
- _download_url: str | None = None
104
-
105
- def __init__(
106
- self,
107
- path: StrPath,
108
- digest: B64MD5 | URIStr | FilePathStr | ETag,
109
- skip_cache: bool | None = False,
110
- ref: FilePathStr | URIStr | None = None,
111
- birth_artifact_id: str | None = None,
112
- size: int | None = None,
113
- extra: dict | None = None,
114
- local_path: StrPath | None = None,
115
- ) -> None:
116
- self.path = LogicalPath(path)
117
- self.digest = digest
118
- self.ref = ref
119
- self.birth_artifact_id = birth_artifact_id
120
- self.size = size
121
- self.extra = extra or {}
122
- self.local_path = str(local_path) if local_path else None
123
- if self.local_path and self.size is None:
124
- self.size = Path(self.local_path).stat().st_size
125
- self.skip_cache = skip_cache or False
126
95
 
127
- def __repr__(self) -> str:
128
- cls = self.__class__.__name__
129
- ref = f", ref={self.ref!r}" if self.ref is not None else ""
130
- birth_artifact_id = (
131
- f", birth_artifact_id={self.birth_artifact_id!r}"
132
- if self.birth_artifact_id is not None
133
- else ""
134
- )
135
- size = f", size={self.size}" if self.size is not None else ""
136
- extra = f", extra={json.dumps(self.extra)}" if self.extra else ""
137
- local_path = f", local_path={self.local_path!r}" if self.local_path else ""
138
- skip_cache = f", skip_cache={self.skip_cache}"
139
- others = ref + birth_artifact_id + size + extra + local_path + skip_cache
140
- return f"{cls}(path={self.path!r}, digest={self.digest!r}{others})"
141
-
142
- def __eq__(self, other: object) -> bool:
143
- """Strict equality, comparing all public fields.
144
-
145
- ArtifactManifestEntries for the same file may not compare equal if they were
146
- added in different ways or created for different parent artifacts.
96
+ digest: Union[B64MD5, ETag, URIStr, FilePathStr]
97
+ ref: Union[URIStr, FilePathStr, None] = None
98
+ birth_artifact_id: Annotated[Optional[str], Field(alias="birthArtifactID")] = None
99
+ size: Optional[NonNegativeInt] = None
100
+ extra: Dict[str, Any] = Field(default_factory=dict)
101
+ local_path: Optional[str] = None
102
+
103
+ skip_cache: bool = False
104
+
105
+ # Note: Pydantic considers these private attributes, omitting them from validation and comparison logic.
106
+ _parent_artifact: Optional[Artifact] = None
107
+ _download_url: Optional[str] = None
108
+
109
+ @field_validator("path", mode="before")
110
+ def _validate_path(cls, v: Any) -> LogicalPath:
111
+ """Coerce `path` to a LogicalPath.
112
+
113
+ LogicalPath doesn't implement its own pydantic validator, and implementing one for
114
+ both pydantic V1 _and_ V2 would add too much boilerplate. Until we drop V1 support,
115
+ just coerce to LogicalPath in the field validator here.
147
116
  """
148
- if not isinstance(other, ArtifactManifestEntry):
149
- return False
150
- return (
151
- self.path == other.path
152
- and self.digest == other.digest
153
- and self.ref == other.ref
154
- and self.birth_artifact_id == other.birth_artifact_id
155
- and self.size == other.size
156
- and self.extra == other.extra
157
- and self.local_path == other.local_path
158
- and self.skip_cache == other.skip_cache
159
- )
117
+ return LogicalPath(v)
118
+
119
+ @field_validator("local_path", mode="before")
120
+ def _validate_local_path(cls, v: Any) -> str | None:
121
+ """Coerce `local_path` to a str. Necessary if the input is a `PosixPath`."""
122
+ return str(v) if v else None
123
+
124
+ @model_validator(mode="after")
125
+ def _infer_size_from_local_path(self) -> Self:
126
+ """If `size` isn't set, try to infer it from `local_path`."""
127
+ if (self.size is None) and self.local_path:
128
+ self.size = getsize(self.local_path)
129
+ return self
130
+
131
+ def __repr__(self) -> str:
132
+ # For compatibility with prior behavior, don't display `extra` if it's empty
133
+ exclude = None if self.extra else {"extra"}
134
+ repr_dict = self.model_dump(by_alias=False, exclude_none=True, exclude=exclude)
135
+ return f"{nameof(type(self))}({', '.join(f'{k}={v!r}' for k, v in repr_dict.items())})"
160
136
 
161
137
  @property
162
138
  def name(self) -> LogicalPath:
@@ -182,7 +158,6 @@ class ArtifactManifestEntry:
182
158
  root: str | None = None,
183
159
  skip_cache: bool | None = None,
184
160
  executor: concurrent.futures.Executor | None = None,
185
- multipart: bool | None = None,
186
161
  ) -> FilePathStr:
187
162
  """Download this artifact entry to the specified root path.
188
163
 
@@ -193,18 +168,9 @@ class ArtifactManifestEntry:
193
168
  Returns:
194
169
  (str): The path of the downloaded artifact entry.
195
170
  """
196
- if self._parent_artifact is None:
197
- raise NotImplementedError
198
-
199
- root = root or self._parent_artifact._default_root()
200
- self._parent_artifact._add_download_root(root)
201
- path = str(Path(self.path))
202
- dest_path = os.path.join(root, path)
203
-
204
- if skip_cache:
205
- override_cache_path = dest_path
206
- else:
207
- override_cache_path = None
171
+ artifact = self.parent_artifact()
172
+ rootdir = artifact._add_download_root(root)
173
+ dest_path = os.path.join(rootdir, self.path)
208
174
 
209
175
  # Skip checking the cache (and possibly downloading) if the file already exists
210
176
  # and has the digest we're expecting.
@@ -224,30 +190,28 @@ class ArtifactManifestEntry:
224
190
  if self.digest == md5_hash:
225
191
  return FilePathStr(dest_path)
226
192
 
193
+ # Override the target cache path IF we're skipping the cache.
194
+ # Note that `override_cache_path is None` <=> `skip_cache is False`.
195
+ override_cache_path = FilePathStr(dest_path) if skip_cache else None
196
+ storage_policy = artifact.manifest.storage_policy
227
197
  if self.ref is not None:
228
- cache_path = self._parent_artifact.manifest.storage_policy.load_reference(
198
+ cache_path = storage_policy.load_reference(
229
199
  self, local=True, dest_path=override_cache_path
230
200
  )
231
201
  else:
232
- cache_path = self._parent_artifact.manifest.storage_policy.load_file(
233
- self._parent_artifact,
234
- self,
235
- dest_path=override_cache_path,
236
- executor=executor,
237
- multipart=multipart,
202
+ cache_path = storage_policy.load_file(
203
+ artifact, self, dest_path=override_cache_path, executor=executor
238
204
  )
239
205
 
240
206
  # Determine the final path
241
- final_path = (
242
- dest_path
243
- if skip_cache
244
- else copy_or_overwrite_changed(cache_path, dest_path)
207
+ final_path = FilePathStr(
208
+ override_cache_path or copy_or_overwrite_changed(cache_path, dest_path)
245
209
  )
246
210
 
247
211
  # Cache the checksum for future downloads
248
- _write_cached_checksum(str(final_path), self.digest)
212
+ _write_cached_checksum(final_path, self.digest)
249
213
 
250
- return FilePathStr(final_path)
214
+ return final_path
251
215
 
252
216
  def ref_target(self) -> FilePathStr | URIStr:
253
217
  """Get the reference URL that is targeted by this artifact entry.
@@ -260,11 +224,9 @@ class ArtifactManifestEntry:
260
224
  """
261
225
  if self.ref is None:
262
226
  raise ValueError("Only reference entries support ref_target().")
263
- if self._parent_artifact is None:
227
+ if (parent_artifact := self._parent_artifact) is None:
264
228
  return self.ref
265
- return self._parent_artifact.manifest.storage_policy.load_reference(
266
- self._parent_artifact.manifest.entries[self.path], local=False
267
- )
229
+ return parent_artifact.manifest.storage_policy.load_reference(self, local=False)
268
230
 
269
231
  def ref_url(self) -> str:
270
232
  """Get a URL to this artifact entry.
@@ -285,26 +247,13 @@ class ArtifactManifestEntry:
285
247
  raise ValueError("Parent artifact is not set")
286
248
  elif (parent_id := parent_artifact.id) is None:
287
249
  raise ValueError("Parent artifact ID is not set")
288
- return f"{_WB_ARTIFACT_SCHEME}://{b64_to_hex_id(B64MD5(parent_id))}/{self.path}"
289
-
290
- def to_json(self) -> ArtifactManifestEntryDict:
291
- contents: ArtifactManifestEntryDict = {
292
- "path": self.path,
293
- "digest": self.digest,
294
- }
295
- if self.size is not None:
296
- contents["size"] = self.size
297
- if self.ref:
298
- contents["ref"] = self.ref
299
- if self.birth_artifact_id:
300
- contents["birthArtifactID"] = self.birth_artifact_id
301
- if self.local_path:
302
- contents["local_path"] = self.local_path
303
- if self.skip_cache:
304
- contents["skip_cache"] = self.skip_cache
305
- if self.extra:
306
- contents["extra"] = self.extra
307
- return contents
250
+ return f"{_WB_ARTIFACT_SCHEME}://{b64_to_hex_id(parent_id)}/{self.path}"
251
+
252
+ def to_json(self) -> dict[str, Any]:
253
+ # NOTE: The method name `to_json` is a bit misleading, as this returns a
254
+ # python dict, NOT a JSON string. The historical name is kept for continuity,
255
+ # but consider deprecating this in favor of `BaseModel.model_dump()`.
256
+ return self.model_dump(exclude_none=True) # type: ignore[return-value]
308
257
 
309
258
  def _is_artifact_reference(self) -> bool:
310
259
  return self.ref is not None and urlparse(self.ref).scheme == _WB_ARTIFACT_SCHEME
@@ -1,61 +1,48 @@
1
1
  """Artifact manifest v1."""
2
2
 
3
+ # Older-style type annotations required for Pydantic v1 / python 3.8 compatibility.
4
+ # ruff: noqa: UP006
5
+
3
6
  from __future__ import annotations
4
7
 
5
8
  from operator import itemgetter
6
- from typing import Any, Mapping
9
+ from typing import Any, ClassVar, Dict, Literal, final
10
+
11
+ from pydantic import Field
12
+ from typing_extensions import Annotated
7
13
 
8
- from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
9
- from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
10
- from wandb.sdk.artifacts.storage_policy import StoragePolicy
11
14
  from wandb.sdk.internal.internal_api import Api as InternalApi
12
15
  from wandb.sdk.lib.hashutil import HexMD5, _md5
13
16
 
17
+ from .._factories import make_storage_policy
18
+ from .._models.manifest import ArtifactManifestV1Data
19
+ from ..artifact_manifest import ArtifactManifest
20
+ from ..artifact_manifest_entry import ArtifactManifestEntry
21
+ from ..storage_policy import StoragePolicy
22
+
14
23
 
24
+ @final
15
25
  class ArtifactManifestV1(ArtifactManifest):
16
- @classmethod
17
- def version(cls) -> int:
18
- return 1
26
+ manifest_version: Annotated[Literal[1], Field(repr=False)] = 1
27
+ entries: Dict[str, ArtifactManifestEntry] = Field(default_factory=dict)
28
+
29
+ storage_policy: StoragePolicy = Field(
30
+ default_factory=make_storage_policy, exclude=True, repr=False
31
+ )
19
32
 
20
33
  @classmethod
21
34
  def from_manifest_json(
22
- cls, manifest_json: dict, api: InternalApi | None = None
35
+ cls, manifest_json: dict[str, Any], api: InternalApi | None = None
23
36
  ) -> ArtifactManifestV1:
24
- if manifest_json["version"] != cls.version():
25
- raise ValueError(
26
- "Expected manifest version 1, got {}".format(manifest_json["version"])
27
- )
28
-
29
- storage_policy_name = manifest_json["storagePolicy"]
30
- storage_policy_config = manifest_json.get("storagePolicyConfig", {})
31
- storage_policy_cls = StoragePolicy.lookup_by_name(storage_policy_name)
32
-
33
- entries: Mapping[str, ArtifactManifestEntry]
34
- entries = {
35
- name: ArtifactManifestEntry(
36
- path=name,
37
- digest=val["digest"],
38
- birth_artifact_id=val.get("birthArtifactID"),
39
- ref=val.get("ref"),
40
- size=val.get("size"),
41
- extra=val.get("extra"),
42
- local_path=val.get("local_path"),
43
- skip_cache=val.get("skip_cache"),
44
- )
45
- for name, val in manifest_json["contents"].items()
46
- }
37
+ data = ArtifactManifestV1Data(**manifest_json)
47
38
 
39
+ policy_name = data.storage_policy
40
+ policy_cfg = data.storage_policy_config
41
+ policy = StoragePolicy.lookup_by_name(policy_name).from_config(policy_cfg, api)
48
42
  return cls(
49
- storage_policy_cls.from_config(storage_policy_config, api=api), entries
43
+ manifest_version=data.version, entries=data.contents, storage_policy=policy
50
44
  )
51
45
 
52
- def __init__(
53
- self,
54
- storage_policy: StoragePolicy,
55
- entries: Mapping[str, ArtifactManifestEntry] | None = None,
56
- ) -> None:
57
- super().__init__(storage_policy, entries=entries)
58
-
59
46
  def to_manifest_json(self) -> dict:
60
47
  """This is the JSON that's stored in wandb_manifest.json.
61
48
 
@@ -64,31 +51,26 @@ class ArtifactManifestV1(ArtifactManifest):
64
51
  system. We don't need to include the local paths in the artifact manifest
65
52
  contents.
66
53
  """
67
- contents = {}
68
- for name, entry in sorted(self.entries.items(), key=itemgetter(0)):
69
- json_entry: dict[str, Any] = {
70
- "digest": entry.digest,
71
- }
72
- if entry.birth_artifact_id:
73
- json_entry["birthArtifactID"] = entry.birth_artifact_id
74
- if entry.ref:
75
- json_entry["ref"] = entry.ref
76
- if entry.extra:
77
- json_entry["extra"] = entry.extra
78
- if entry.size is not None:
79
- json_entry["size"] = entry.size
80
- contents[name] = json_entry
54
+ omit_entry_fields = {"path", "local_path", "skip_cache"}
81
55
  return {
82
- "version": self.__class__.version(),
56
+ "version": self.manifest_version,
83
57
  "storagePolicy": self.storage_policy.name(),
84
- "storagePolicyConfig": self.storage_policy.config() or {},
85
- "contents": contents,
58
+ "storagePolicyConfig": self.storage_policy.config(),
59
+ "contents": {
60
+ path: entry.model_dump(exclude=omit_entry_fields, exclude_defaults=True)
61
+ for path, entry in self.entries.items()
62
+ },
86
63
  }
87
64
 
65
+ _DIGEST_HEADER: ClassVar[bytes] = b"wandb-artifact-manifest-v1\n"
66
+ """Encoded prefix/header for the ArtifactManifest digest."""
67
+
88
68
  def digest(self) -> HexMD5:
89
- hasher = _md5()
90
- hasher.update(b"wandb-artifact-manifest-v1\n")
69
+ hasher = _md5(self._DIGEST_HEADER)
91
70
  # sort by key (path)
92
- for name, entry in sorted(self.entries.items(), key=itemgetter(0)):
93
- hasher.update(f"{name}:{entry.digest}\n".encode())
94
- return HexMD5(hasher.hexdigest())
71
+ for path, entry in sorted(self.entries.items(), key=itemgetter(0)):
72
+ hasher.update(f"{path}:{entry.digest}\n".encode())
73
+ return hasher.hexdigest()
74
+
75
+ def size(self) -> int:
76
+ return sum(entry.size for entry in self.entries.values() if entry.size)
@@ -2,7 +2,8 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import TYPE_CHECKING, Sequence
5
+ from abc import ABC, abstractmethod
6
+ from typing import TYPE_CHECKING, Final
6
7
 
7
8
  from wandb.sdk.lib.paths import FilePathStr, URIStr
8
9
 
@@ -12,18 +13,11 @@ if TYPE_CHECKING:
12
13
  from wandb.sdk.artifacts.artifact import Artifact
13
14
  from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
14
15
 
15
- DEFAULT_MAX_OBJECTS = 10**7
16
+ DEFAULT_MAX_OBJECTS: Final[int] = 10_000_000 # 10**7
16
17
 
17
18
 
18
- class StorageHandler:
19
- def can_handle(self, parsed_url: ParseResult) -> bool:
20
- """Checks whether this handler can handle the given url.
21
-
22
- Returns:
23
- Whether this handler can handle the given url.
24
- """
25
- raise NotImplementedError
26
-
19
+ class _BaseStorageHandler(ABC):
20
+ @abstractmethod
27
21
  def load_path(
28
22
  self,
29
23
  manifest_entry: ArtifactManifestEntry,
@@ -40,6 +34,7 @@ class StorageHandler:
40
34
  """
41
35
  raise NotImplementedError
42
36
 
37
+ @abstractmethod
43
38
  def store_path(
44
39
  self,
45
40
  artifact: Artifact,
@@ -47,7 +42,7 @@ class StorageHandler:
47
42
  name: str | None = None,
48
43
  checksum: bool = True,
49
44
  max_objects: int | None = None,
50
- ) -> Sequence[ArtifactManifestEntry]:
45
+ ) -> list[ArtifactManifestEntry]:
51
46
  """Store the file or directory at the given path to the specified artifact.
52
47
 
53
48
  Args:
@@ -60,3 +55,14 @@ class StorageHandler:
60
55
  A list of manifest entries to store within the artifact
61
56
  """
62
57
  raise NotImplementedError
58
+
59
+
60
+ class StorageHandler(_BaseStorageHandler, ABC): # Handles a single storage protocol
61
+ @abstractmethod
62
+ def can_handle(self, parsed_url: ParseResult) -> bool:
63
+ """Checks whether this handler can handle the given url.
64
+
65
+ Returns:
66
+ Whether this handler can handle the given url.
67
+ """
68
+ raise NotImplementedError
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from pathlib import PurePosixPath
6
6
  from types import ModuleType
7
- from typing import TYPE_CHECKING, Sequence
7
+ from typing import TYPE_CHECKING
8
8
  from urllib.parse import ParseResult, parse_qsl, urlparse
9
9
 
10
10
  import wandb
@@ -20,17 +20,22 @@ if TYPE_CHECKING:
20
20
  import azure.storage.blob # type: ignore
21
21
 
22
22
  from wandb.sdk.artifacts.artifact import Artifact
23
+ from wandb.sdk.artifacts.artifact_file_cache import ArtifactFileCache
23
24
 
24
25
 
25
26
  class AzureHandler(StorageHandler):
27
+ _scheme: str
28
+ _cache: ArtifactFileCache
29
+
30
+ def __init__(self, scheme: str = "https") -> None:
31
+ self._scheme = scheme
32
+ self._cache = get_artifact_file_cache()
33
+
26
34
  def can_handle(self, parsed_url: ParseResult) -> bool:
27
- return parsed_url.scheme == "https" and parsed_url.netloc.endswith(
35
+ return parsed_url.scheme == self._scheme and parsed_url.netloc.endswith(
28
36
  ".blob.core.windows.net"
29
37
  )
30
38
 
31
- def __init__(self, scheme: str | None = None) -> None:
32
- self._cache = get_artifact_file_cache()
33
-
34
39
  def load_path(
35
40
  self,
36
41
  manifest_entry: ArtifactManifestEntry,
@@ -101,7 +106,7 @@ class AzureHandler(StorageHandler):
101
106
  name: StrPath | None = None,
102
107
  checksum: bool = True,
103
108
  max_objects: int | None = None,
104
- ) -> Sequence[ArtifactManifestEntry]:
109
+ ) -> list[ArtifactManifestEntry]:
105
110
  account_url, container_name, blob_name, query = self._parse_uri(path)
106
111
  path = URIStr(f"{account_url}/{container_name}/{blob_name}")
107
112