wandb 0.22.2__py3-none-musllinux_1_2_aarch64.whl → 0.22.3__py3-none-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. wandb/__init__.py +1 -1
  2. wandb/__init__.pyi +2 -2
  3. wandb/_pydantic/__init__.py +8 -1
  4. wandb/_pydantic/base.py +54 -18
  5. wandb/_pydantic/field_types.py +8 -3
  6. wandb/_pydantic/pagination.py +46 -0
  7. wandb/_pydantic/utils.py +2 -2
  8. wandb/apis/public/api.py +24 -19
  9. wandb/apis/public/artifacts.py +259 -270
  10. wandb/apis/public/registries/_utils.py +40 -54
  11. wandb/apis/public/registries/registries_search.py +70 -85
  12. wandb/apis/public/registries/registry.py +173 -156
  13. wandb/apis/public/runs.py +27 -6
  14. wandb/apis/public/utils.py +43 -20
  15. wandb/automations/_generated/create_automation.py +2 -2
  16. wandb/automations/_generated/create_generic_webhook_integration.py +4 -4
  17. wandb/automations/_generated/delete_automation.py +2 -2
  18. wandb/automations/_generated/fragments.py +31 -52
  19. wandb/automations/_generated/generic_webhook_integrations_by_entity.py +3 -3
  20. wandb/automations/_generated/get_automations.py +3 -3
  21. wandb/automations/_generated/get_automations_by_entity.py +3 -3
  22. wandb/automations/_generated/input_types.py +9 -9
  23. wandb/automations/_generated/integrations_by_entity.py +3 -3
  24. wandb/automations/_generated/operations.py +6 -6
  25. wandb/automations/_generated/slack_integrations_by_entity.py +3 -3
  26. wandb/automations/_generated/update_automation.py +2 -2
  27. wandb/automations/_utils.py +3 -3
  28. wandb/automations/actions.py +3 -3
  29. wandb/automations/automations.py +6 -5
  30. wandb/bin/gpu_stats +0 -0
  31. wandb/bin/wandb-core +0 -0
  32. wandb/cli/beta.py +8 -2
  33. wandb/cli/beta_leet.py +2 -1
  34. wandb/cli/beta_sync.py +1 -1
  35. wandb/errors/term.py +8 -8
  36. wandb/jupyter.py +0 -51
  37. wandb/old/settings.py +6 -6
  38. wandb/proto/v3/wandb_internal_pb2.py +351 -352
  39. wandb/proto/v3/wandb_server_pb2.py +38 -37
  40. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  41. wandb/proto/v3/wandb_sync_pb2.py +19 -6
  42. wandb/proto/v4/wandb_internal_pb2.py +351 -352
  43. wandb/proto/v4/wandb_server_pb2.py +38 -37
  44. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  45. wandb/proto/v4/wandb_sync_pb2.py +10 -6
  46. wandb/proto/v5/wandb_internal_pb2.py +351 -352
  47. wandb/proto/v5/wandb_server_pb2.py +38 -37
  48. wandb/proto/v5/wandb_settings_pb2.py +2 -2
  49. wandb/proto/v5/wandb_sync_pb2.py +10 -6
  50. wandb/proto/v6/wandb_internal_pb2.py +351 -352
  51. wandb/proto/v6/wandb_server_pb2.py +38 -37
  52. wandb/proto/v6/wandb_settings_pb2.py +2 -2
  53. wandb/proto/v6/wandb_sync_pb2.py +10 -6
  54. wandb/sdk/artifacts/_generated/__init__.py +96 -40
  55. wandb/sdk/artifacts/_generated/add_aliases.py +3 -3
  56. wandb/sdk/artifacts/_generated/add_artifact_collection_tags.py +26 -0
  57. wandb/sdk/artifacts/_generated/artifact_by_id.py +2 -2
  58. wandb/sdk/artifacts/_generated/artifact_by_name.py +3 -3
  59. wandb/sdk/artifacts/_generated/artifact_collection_membership_file_urls.py +27 -8
  60. wandb/sdk/artifacts/_generated/artifact_collection_membership_files.py +27 -8
  61. wandb/sdk/artifacts/_generated/artifact_created_by.py +7 -20
  62. wandb/sdk/artifacts/_generated/artifact_file_urls.py +19 -6
  63. wandb/sdk/artifacts/_generated/artifact_membership_by_name.py +26 -0
  64. wandb/sdk/artifacts/_generated/artifact_type.py +5 -5
  65. wandb/sdk/artifacts/_generated/artifact_used_by.py +8 -17
  66. wandb/sdk/artifacts/_generated/artifact_version_files.py +19 -8
  67. wandb/sdk/artifacts/_generated/delete_aliases.py +3 -3
  68. wandb/sdk/artifacts/_generated/delete_artifact.py +4 -4
  69. wandb/sdk/artifacts/_generated/delete_artifact_collection_tags.py +23 -0
  70. wandb/sdk/artifacts/_generated/delete_artifact_portfolio.py +4 -4
  71. wandb/sdk/artifacts/_generated/delete_artifact_sequence.py +4 -4
  72. wandb/sdk/artifacts/_generated/delete_registry.py +21 -0
  73. wandb/sdk/artifacts/_generated/fetch_artifact_manifest.py +8 -20
  74. wandb/sdk/artifacts/_generated/fetch_linked_artifacts.py +13 -35
  75. wandb/sdk/artifacts/_generated/fetch_org_info_from_entity.py +28 -0
  76. wandb/sdk/artifacts/_generated/fetch_registries.py +18 -8
  77. wandb/sdk/{projects → artifacts}/_generated/fetch_registry.py +4 -4
  78. wandb/sdk/artifacts/_generated/fragments.py +183 -333
  79. wandb/sdk/artifacts/_generated/input_types.py +133 -7
  80. wandb/sdk/artifacts/_generated/link_artifact.py +5 -5
  81. wandb/sdk/artifacts/_generated/operations.py +1053 -548
  82. wandb/sdk/artifacts/_generated/project_artifact_collection.py +9 -77
  83. wandb/sdk/artifacts/_generated/project_artifact_collections.py +21 -9
  84. wandb/sdk/artifacts/_generated/project_artifact_type.py +3 -3
  85. wandb/sdk/artifacts/_generated/project_artifact_types.py +19 -6
  86. wandb/sdk/artifacts/_generated/project_artifacts.py +7 -8
  87. wandb/sdk/artifacts/_generated/registry_collections.py +21 -9
  88. wandb/sdk/artifacts/_generated/registry_versions.py +20 -9
  89. wandb/sdk/artifacts/_generated/rename_registry.py +25 -0
  90. wandb/sdk/artifacts/_generated/run_input_artifacts.py +5 -9
  91. wandb/sdk/artifacts/_generated/run_output_artifacts.py +5 -9
  92. wandb/sdk/artifacts/_generated/type_info.py +2 -2
  93. wandb/sdk/artifacts/_generated/unlink_artifact.py +3 -5
  94. wandb/sdk/artifacts/_generated/update_artifact.py +3 -3
  95. wandb/sdk/artifacts/_generated/update_artifact_collection_type.py +28 -0
  96. wandb/sdk/artifacts/_generated/update_artifact_portfolio.py +7 -16
  97. wandb/sdk/artifacts/_generated/update_artifact_sequence.py +7 -16
  98. wandb/sdk/artifacts/_generated/upsert_registry.py +25 -0
  99. wandb/sdk/artifacts/_gqlutils.py +170 -6
  100. wandb/sdk/artifacts/_models/__init__.py +9 -0
  101. wandb/sdk/artifacts/_models/artifact_collection.py +109 -0
  102. wandb/sdk/artifacts/_models/manifest.py +26 -0
  103. wandb/sdk/artifacts/_models/pagination.py +26 -0
  104. wandb/sdk/artifacts/_models/registry.py +100 -0
  105. wandb/sdk/artifacts/_validators.py +45 -27
  106. wandb/sdk/artifacts/artifact.py +220 -215
  107. wandb/sdk/artifacts/artifact_file_cache.py +1 -1
  108. wandb/sdk/artifacts/artifact_manifest.py +37 -32
  109. wandb/sdk/artifacts/artifact_manifest_entry.py +80 -125
  110. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +43 -61
  111. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +8 -6
  112. wandb/sdk/data_types/image.py +2 -2
  113. wandb/sdk/interface/interface.py +72 -64
  114. wandb/sdk/interface/interface_queue.py +27 -18
  115. wandb/sdk/interface/interface_shared.py +61 -23
  116. wandb/sdk/interface/interface_sock.py +9 -5
  117. wandb/sdk/internal/_generated/server_features_query.py +4 -4
  118. wandb/sdk/launch/inputs/schema.py +13 -10
  119. wandb/sdk/lib/apikey.py +8 -12
  120. wandb/sdk/lib/asyncio_compat.py +1 -1
  121. wandb/sdk/lib/asyncio_manager.py +5 -5
  122. wandb/sdk/lib/console_capture.py +38 -30
  123. wandb/sdk/lib/progress.py +159 -64
  124. wandb/sdk/lib/retry.py +3 -2
  125. wandb/sdk/lib/service/service_connection.py +2 -2
  126. wandb/sdk/lib/wb_logging.py +2 -1
  127. wandb/sdk/mailbox/mailbox.py +1 -1
  128. wandb/sdk/wandb_init.py +10 -13
  129. wandb/sdk/wandb_run.py +9 -46
  130. wandb/sdk/wandb_settings.py +102 -19
  131. {wandb-0.22.2.dist-info → wandb-0.22.3.dist-info}/METADATA +2 -1
  132. {wandb-0.22.2.dist-info → wandb-0.22.3.dist-info}/RECORD +135 -134
  133. wandb/sdk/artifacts/_generated/artifact_via_membership_by_name.py +0 -26
  134. wandb/sdk/artifacts/_generated/create_artifact_collection_tag_assignments.py +0 -36
  135. wandb/sdk/artifacts/_generated/delete_artifact_collection_tag_assignments.py +0 -25
  136. wandb/sdk/artifacts/_generated/move_artifact_collection.py +0 -35
  137. wandb/sdk/projects/_generated/__init__.py +0 -26
  138. wandb/sdk/projects/_generated/delete_project.py +0 -22
  139. wandb/sdk/projects/_generated/enums.py +0 -4
  140. wandb/sdk/projects/_generated/fragments.py +0 -41
  141. wandb/sdk/projects/_generated/input_types.py +0 -13
  142. wandb/sdk/projects/_generated/operations.py +0 -88
  143. wandb/sdk/projects/_generated/rename_project.py +0 -27
  144. wandb/sdk/projects/_generated/upsert_registry_project.py +0 -27
  145. {wandb-0.22.2.dist-info → wandb-0.22.3.dist-info}/WHEEL +0 -0
  146. {wandb-0.22.2.dist-info → wandb-0.22.3.dist-info}/entry_points.txt +0 -0
  147. {wandb-0.22.2.dist-info → wandb-0.22.3.dist-info}/licenses/LICENSE +0 -0
@@ -147,7 +147,7 @@ class ArtifactFileCache:
147
147
  if temp_size:
148
148
  wandb.termwarn(
149
149
  f"Cache contains {util.to_human_size(temp_size)} of temporary files. "
150
- "Run `wandb artifact cleanup --remove-temp` to remove them."
150
+ "Run `wandb artifact cache cleanup --remove-temp` to remove them."
151
151
  )
152
152
 
153
153
  entries = []
@@ -2,75 +2,80 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import TYPE_CHECKING, Mapping
5
+ from abc import ABC, abstractmethod
6
+ from typing import TYPE_CHECKING, Any, Dict
7
+
8
+ from pydantic import Field
9
+ from typing_extensions import Annotated
6
10
 
7
11
  from wandb.sdk.internal.internal_api import Api as InternalApi
8
12
  from wandb.sdk.lib.hashutil import HexMD5
9
13
 
14
+ from ._models.base_model import ArtifactsBase
15
+
10
16
  if TYPE_CHECKING:
11
- from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
12
- from wandb.sdk.artifacts.storage_policy import StoragePolicy
17
+ from .artifact_manifest_entry import ArtifactManifestEntry
18
+ from .storage_policy import StoragePolicy
13
19
 
14
20
 
15
- class ArtifactManifest:
16
- entries: dict[str, ArtifactManifestEntry]
21
+ class ArtifactManifest(ArtifactsBase, ABC):
22
+ # Note: this can't be named "version" since it conflicts with the prior `version()` classmethod.
23
+ manifest_version: Annotated[Any, Field(repr=False)]
24
+ entries: Dict[str, ArtifactManifestEntry] = Field(default_factory=dict) # noqa: UP006
25
+
26
+ storage_policy: Annotated[StoragePolicy, Field(exclude=True, repr=False)]
17
27
 
18
28
  @classmethod
29
+ def version(cls) -> int:
30
+ return cls.model_fields["manifest_version"].default
31
+
32
+ @classmethod
33
+ @abstractmethod
19
34
  def from_manifest_json(
20
- cls, manifest_json: dict, api: InternalApi | None = None
35
+ cls, manifest_json: dict[str, Any], api: InternalApi | None = None
21
36
  ) -> ArtifactManifest:
22
- if "version" not in manifest_json:
37
+ if (version := manifest_json.get("version")) is None:
23
38
  raise ValueError("Invalid manifest format. Must contain version field.")
24
- version = manifest_json["version"]
39
+
25
40
  for sub in cls.__subclasses__():
26
41
  if sub.version() == version:
27
42
  return sub.from_manifest_json(manifest_json, api=api)
28
43
  raise ValueError("Invalid manifest version.")
29
44
 
30
- @classmethod
31
- def version(cls) -> int:
32
- raise NotImplementedError
33
-
34
- def __init__(
35
- self,
36
- storage_policy: StoragePolicy,
37
- entries: Mapping[str, ArtifactManifestEntry] | None = None,
38
- ) -> None:
39
- self.storage_policy = storage_policy
40
- self.entries = dict(entries) if entries else {}
41
-
42
45
  def __len__(self) -> int:
43
46
  return len(self.entries)
44
47
 
45
- def to_manifest_json(self) -> dict:
48
+ @abstractmethod
49
+ def to_manifest_json(self) -> dict[str, Any]:
46
50
  raise NotImplementedError
47
51
 
52
+ @abstractmethod
48
53
  def digest(self) -> HexMD5:
49
54
  raise NotImplementedError
50
55
 
56
+ @abstractmethod
57
+ def size(self) -> int:
58
+ raise NotImplementedError
59
+
51
60
  def add_entry(self, entry: ArtifactManifestEntry, overwrite: bool = False) -> None:
52
- path = entry.path
53
61
  if (
54
62
  (not overwrite)
55
- and (old_entry := self.entries.get(path))
63
+ and (old_entry := self.entries.get(entry.path))
56
64
  and (entry.digest != old_entry.digest)
57
65
  ):
58
- raise ValueError(f"Cannot add the same path twice: {path!r}")
59
- self.entries[path] = entry
66
+ raise ValueError(f"Cannot add the same path twice: {entry.path!r}")
67
+ self.entries[entry.path] = entry
60
68
 
61
69
  def remove_entry(self, entry: ArtifactManifestEntry) -> None:
62
70
  try:
63
71
  del self.entries[entry.path]
64
72
  except LookupError:
65
- raise FileNotFoundError(f"Cannot remove missing entry: '{entry.path}'")
73
+ raise FileNotFoundError(f"Cannot remove missing entry: {entry.path!r}")
66
74
 
67
75
  def get_entry_by_path(self, path: str) -> ArtifactManifestEntry | None:
68
76
  return self.entries.get(path)
69
77
 
70
78
  def get_entries_in_directory(self, directory: str) -> list[ArtifactManifestEntry]:
71
- return [
72
- entry
73
- for key, entry in self.entries.items()
74
- # entry keys (paths) use forward slash even for windows
75
- if key.startswith(f"{directory}/")
76
- ]
79
+ # entry keys (paths) use forward slash even for windows
80
+ dir_prefix = f"{directory}/"
81
+ return [obj for key, obj in self.entries.items() if key.startswith(dir_prefix)]
@@ -1,17 +1,24 @@
1
1
  """Artifact manifest entry."""
2
2
 
3
+ # Older-style type annotations required for Pydantic v1 / python 3.8 compatibility.
4
+ # ruff: noqa: UP006, UP007, UP045
5
+
3
6
  from __future__ import annotations
4
7
 
5
8
  import concurrent.futures
6
9
  import hashlib
7
- import json
8
10
  import logging
9
11
  import os
10
12
  from contextlib import suppress
11
- from pathlib import Path
12
- from typing import TYPE_CHECKING
13
+ from os.path import getsize
14
+ from typing import TYPE_CHECKING, Any, Dict, Final, Optional, Union
13
15
  from urllib.parse import urlparse
14
16
 
17
+ from pydantic import Field, NonNegativeInt
18
+ from typing_extensions import Annotated, Self
19
+
20
+ from wandb._pydantic import field_validator, model_validator
21
+ from wandb._strutils import nameof
15
22
  from wandb.proto.wandb_deprecated import Deprecated
16
23
  from wandb.sdk.lib.deprecate import deprecate
17
24
  from wandb.sdk.lib.filesystem import copy_or_overwrite_changed
@@ -22,27 +29,18 @@ from wandb.sdk.lib.hashutil import (
22
29
  hex_to_b64_id,
23
30
  md5_file_b64,
24
31
  )
25
- from wandb.sdk.lib.paths import FilePathStr, LogicalPath, StrPath, URIStr
32
+ from wandb.sdk.lib.paths import FilePathStr, LogicalPath, URIStr
26
33
 
27
- logger = logging.getLogger(__name__)
34
+ from ._models.base_model import ArtifactsBase
28
35
 
29
36
  if TYPE_CHECKING:
30
- from typing_extensions import TypedDict
37
+ from .artifact import Artifact
31
38
 
32
- from wandb.sdk.artifacts.artifact import Artifact
33
39
 
34
- class ArtifactManifestEntryDict(TypedDict, total=False):
35
- path: str
36
- digest: str
37
- skip_cache: bool
38
- ref: str
39
- birthArtifactID: str
40
- size: int
41
- extra: dict
42
- local_path: str
40
+ logger = logging.getLogger(__name__)
43
41
 
44
42
 
45
- _WB_ARTIFACT_SCHEME = "wandb-artifact"
43
+ _WB_ARTIFACT_SCHEME: Final[str] = "wandb-artifact"
46
44
 
47
45
 
48
46
  def _checksum_cache_path(file_path: str) -> str:
@@ -87,76 +85,54 @@ def _write_cached_checksum(file_path: str, checksum: str) -> None:
87
85
  logger.debug(f"Failed to write checksum cache for {file_path!r}")
88
86
 
89
87
 
90
- class ArtifactManifestEntry:
91
- """A single entry in an artifact manifest."""
88
+ class ArtifactManifestEntry(ArtifactsBase):
89
+ """A single entry in an artifact manifest.
90
+
91
+ External code should avoid instantiating this class directly.
92
+ """
92
93
 
93
94
  path: LogicalPath
94
- digest: B64MD5 | URIStr | FilePathStr | ETag
95
- skip_cache: bool
96
- ref: FilePathStr | URIStr | None
97
- birth_artifact_id: str | None
98
- size: int | None
99
- extra: dict
100
- local_path: str | None
101
-
102
- _parent_artifact: Artifact | None = None
103
- _download_url: str | None = None
104
-
105
- def __init__(
106
- self,
107
- path: StrPath,
108
- digest: B64MD5 | URIStr | FilePathStr | ETag,
109
- skip_cache: bool | None = False,
110
- ref: FilePathStr | URIStr | None = None,
111
- birth_artifact_id: str | None = None,
112
- size: int | None = None,
113
- extra: dict | None = None,
114
- local_path: StrPath | None = None,
115
- ) -> None:
116
- self.path = LogicalPath(path)
117
- self.digest = digest
118
- self.ref = ref
119
- self.birth_artifact_id = birth_artifact_id
120
- self.size = size
121
- self.extra = extra or {}
122
- self.local_path = str(local_path) if local_path else None
123
- if self.local_path and self.size is None:
124
- self.size = Path(self.local_path).stat().st_size
125
- self.skip_cache = skip_cache or False
126
95
 
127
- def __repr__(self) -> str:
128
- cls = self.__class__.__name__
129
- ref = f", ref={self.ref!r}" if self.ref is not None else ""
130
- birth_artifact_id = (
131
- f", birth_artifact_id={self.birth_artifact_id!r}"
132
- if self.birth_artifact_id is not None
133
- else ""
134
- )
135
- size = f", size={self.size}" if self.size is not None else ""
136
- extra = f", extra={json.dumps(self.extra)}" if self.extra else ""
137
- local_path = f", local_path={self.local_path!r}" if self.local_path else ""
138
- skip_cache = f", skip_cache={self.skip_cache}"
139
- others = ref + birth_artifact_id + size + extra + local_path + skip_cache
140
- return f"{cls}(path={self.path!r}, digest={self.digest!r}{others})"
141
-
142
- def __eq__(self, other: object) -> bool:
143
- """Strict equality, comparing all public fields.
144
-
145
- ArtifactManifestEntries for the same file may not compare equal if they were
146
- added in different ways or created for different parent artifacts.
96
+ digest: Union[B64MD5, ETag, URIStr, FilePathStr]
97
+ ref: Union[URIStr, FilePathStr, None] = None
98
+ birth_artifact_id: Annotated[Optional[str], Field(alias="birthArtifactID")] = None
99
+ size: Optional[NonNegativeInt] = None
100
+ extra: Dict[str, Any] = Field(default_factory=dict)
101
+ local_path: Optional[str] = None
102
+
103
+ skip_cache: bool = False
104
+
105
+ # Note: Pydantic considers these private attributes, omitting them from validation and comparison logic.
106
+ _parent_artifact: Optional[Artifact] = None
107
+ _download_url: Optional[str] = None
108
+
109
+ @field_validator("path", mode="before")
110
+ def _validate_path(cls, v: Any) -> LogicalPath:
111
+ """Coerce `path` to a LogicalPath.
112
+
113
+ LogicalPath doesn't implement its own pydantic validator, and implementing one for
114
+ both pydantic V1 _and_ V2 would add too much boilerplate. Until we drop V1 support,
115
+ just coerce to LogicalPath in the field validator here.
147
116
  """
148
- if not isinstance(other, ArtifactManifestEntry):
149
- return False
150
- return (
151
- self.path == other.path
152
- and self.digest == other.digest
153
- and self.ref == other.ref
154
- and self.birth_artifact_id == other.birth_artifact_id
155
- and self.size == other.size
156
- and self.extra == other.extra
157
- and self.local_path == other.local_path
158
- and self.skip_cache == other.skip_cache
159
- )
117
+ return LogicalPath(v)
118
+
119
+ @field_validator("local_path", mode="before")
120
+ def _validate_local_path(cls, v: Any) -> str | None:
121
+ """Coerce `local_path` to a str. Necessary if the input is a `PosixPath`."""
122
+ return str(v) if v else None
123
+
124
+ @model_validator(mode="after")
125
+ def _infer_size_from_local_path(self) -> Self:
126
+ """If `size` isn't set, try to infer it from `local_path`."""
127
+ if (self.size is None) and self.local_path:
128
+ self.size = getsize(self.local_path)
129
+ return self
130
+
131
+ def __repr__(self) -> str:
132
+ # For compatibility with prior behavior, don't display `extra` if it's empty
133
+ exclude = None if self.extra else {"extra"}
134
+ repr_dict = self.model_dump(by_alias=False, exclude_none=True, exclude=exclude)
135
+ return f"{nameof(type(self))}({', '.join(f'{k}={v!r}' for k, v in repr_dict.items())})"
160
136
 
161
137
  @property
162
138
  def name(self) -> LogicalPath:
@@ -193,16 +169,8 @@ class ArtifactManifestEntry:
193
169
  (str): The path of the downloaded artifact entry.
194
170
  """
195
171
  artifact = self.parent_artifact()
196
-
197
- root = root or artifact._default_root()
198
- artifact._add_download_root(root)
199
- path = str(Path(self.path))
200
- dest_path = os.path.join(root, path)
201
-
202
- if skip_cache:
203
- override_cache_path = dest_path
204
- else:
205
- override_cache_path = None
172
+ rootdir = artifact._add_download_root(root)
173
+ dest_path = os.path.join(rootdir, self.path)
206
174
 
207
175
  # Skip checking the cache (and possibly downloading) if the file already exists
208
176
  # and has the digest we're expecting.
@@ -222,26 +190,28 @@ class ArtifactManifestEntry:
222
190
  if self.digest == md5_hash:
223
191
  return FilePathStr(dest_path)
224
192
 
193
+ # Override the target cache path IF we're skipping the cache.
194
+ # Note that `override_cache_path is None` <=> `skip_cache is False`.
195
+ override_cache_path = FilePathStr(dest_path) if skip_cache else None
196
+ storage_policy = artifact.manifest.storage_policy
225
197
  if self.ref is not None:
226
- cache_path = artifact.manifest.storage_policy.load_reference(
198
+ cache_path = storage_policy.load_reference(
227
199
  self, local=True, dest_path=override_cache_path
228
200
  )
229
201
  else:
230
- cache_path = artifact.manifest.storage_policy.load_file(
202
+ cache_path = storage_policy.load_file(
231
203
  artifact, self, dest_path=override_cache_path, executor=executor
232
204
  )
233
205
 
234
206
  # Determine the final path
235
- final_path = (
236
- dest_path
237
- if skip_cache
238
- else copy_or_overwrite_changed(cache_path, dest_path)
207
+ final_path = FilePathStr(
208
+ override_cache_path or copy_or_overwrite_changed(cache_path, dest_path)
239
209
  )
240
210
 
241
211
  # Cache the checksum for future downloads
242
- _write_cached_checksum(str(final_path), self.digest)
212
+ _write_cached_checksum(final_path, self.digest)
243
213
 
244
- return FilePathStr(final_path)
214
+ return final_path
245
215
 
246
216
  def ref_target(self) -> FilePathStr | URIStr:
247
217
  """Get the reference URL that is targeted by this artifact entry.
@@ -254,11 +224,9 @@ class ArtifactManifestEntry:
254
224
  """
255
225
  if self.ref is None:
256
226
  raise ValueError("Only reference entries support ref_target().")
257
- if self._parent_artifact is None:
227
+ if (parent_artifact := self._parent_artifact) is None:
258
228
  return self.ref
259
- return self._parent_artifact.manifest.storage_policy.load_reference(
260
- self._parent_artifact.manifest.entries[self.path], local=False
261
- )
229
+ return parent_artifact.manifest.storage_policy.load_reference(self, local=False)
262
230
 
263
231
  def ref_url(self) -> str:
264
232
  """Get a URL to this artifact entry.
@@ -279,26 +247,13 @@ class ArtifactManifestEntry:
279
247
  raise ValueError("Parent artifact is not set")
280
248
  elif (parent_id := parent_artifact.id) is None:
281
249
  raise ValueError("Parent artifact ID is not set")
282
- return f"{_WB_ARTIFACT_SCHEME}://{b64_to_hex_id(B64MD5(parent_id))}/{self.path}"
283
-
284
- def to_json(self) -> ArtifactManifestEntryDict:
285
- contents: ArtifactManifestEntryDict = {
286
- "path": self.path,
287
- "digest": self.digest,
288
- }
289
- if self.size is not None:
290
- contents["size"] = self.size
291
- if self.ref:
292
- contents["ref"] = self.ref
293
- if self.birth_artifact_id:
294
- contents["birthArtifactID"] = self.birth_artifact_id
295
- if self.local_path:
296
- contents["local_path"] = self.local_path
297
- if self.skip_cache:
298
- contents["skip_cache"] = self.skip_cache
299
- if self.extra:
300
- contents["extra"] = self.extra
301
- return contents
250
+ return f"{_WB_ARTIFACT_SCHEME}://{b64_to_hex_id(parent_id)}/{self.path}"
251
+
252
+ def to_json(self) -> dict[str, Any]:
253
+ # NOTE: The method name `to_json` is a bit misleading, as this returns a
254
+ # python dict, NOT a JSON string. The historical name is kept for continuity,
255
+ # but consider deprecating this in favor of `BaseModel.model_dump()`.
256
+ return self.model_dump(exclude_none=True) # type: ignore[return-value]
302
257
 
303
258
  def _is_artifact_reference(self) -> bool:
304
259
  return self.ref is not None and urlparse(self.ref).scheme == _WB_ARTIFACT_SCHEME
@@ -1,61 +1,48 @@
1
1
  """Artifact manifest v1."""
2
2
 
3
+ # Older-style type annotations required for Pydantic v1 / python 3.8 compatibility.
4
+ # ruff: noqa: UP006
5
+
3
6
  from __future__ import annotations
4
7
 
5
8
  from operator import itemgetter
6
- from typing import Any, Mapping
9
+ from typing import Any, ClassVar, Dict, Literal, final
10
+
11
+ from pydantic import Field
12
+ from typing_extensions import Annotated
7
13
 
8
- from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
9
- from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
10
- from wandb.sdk.artifacts.storage_policy import StoragePolicy
11
14
  from wandb.sdk.internal.internal_api import Api as InternalApi
12
15
  from wandb.sdk.lib.hashutil import HexMD5, _md5
13
16
 
17
+ from .._factories import make_storage_policy
18
+ from .._models.manifest import ArtifactManifestV1Data
19
+ from ..artifact_manifest import ArtifactManifest
20
+ from ..artifact_manifest_entry import ArtifactManifestEntry
21
+ from ..storage_policy import StoragePolicy
22
+
14
23
 
24
+ @final
15
25
  class ArtifactManifestV1(ArtifactManifest):
16
- @classmethod
17
- def version(cls) -> int:
18
- return 1
26
+ manifest_version: Annotated[Literal[1], Field(repr=False)] = 1
27
+ entries: Dict[str, ArtifactManifestEntry] = Field(default_factory=dict)
28
+
29
+ storage_policy: StoragePolicy = Field(
30
+ default_factory=make_storage_policy, exclude=True, repr=False
31
+ )
19
32
 
20
33
  @classmethod
21
34
  def from_manifest_json(
22
- cls, manifest_json: dict, api: InternalApi | None = None
35
+ cls, manifest_json: dict[str, Any], api: InternalApi | None = None
23
36
  ) -> ArtifactManifestV1:
24
- if manifest_json["version"] != cls.version():
25
- raise ValueError(
26
- "Expected manifest version 1, got {}".format(manifest_json["version"])
27
- )
28
-
29
- storage_policy_name = manifest_json["storagePolicy"]
30
- storage_policy_config = manifest_json.get("storagePolicyConfig", {})
31
- storage_policy_cls = StoragePolicy.lookup_by_name(storage_policy_name)
32
-
33
- entries: Mapping[str, ArtifactManifestEntry]
34
- entries = {
35
- name: ArtifactManifestEntry(
36
- path=name,
37
- digest=val["digest"],
38
- birth_artifact_id=val.get("birthArtifactID"),
39
- ref=val.get("ref"),
40
- size=val.get("size"),
41
- extra=val.get("extra"),
42
- local_path=val.get("local_path"),
43
- skip_cache=val.get("skip_cache"),
44
- )
45
- for name, val in manifest_json["contents"].items()
46
- }
37
+ data = ArtifactManifestV1Data(**manifest_json)
47
38
 
39
+ policy_name = data.storage_policy
40
+ policy_cfg = data.storage_policy_config
41
+ policy = StoragePolicy.lookup_by_name(policy_name).from_config(policy_cfg, api)
48
42
  return cls(
49
- storage_policy_cls.from_config(storage_policy_config, api=api), entries
43
+ manifest_version=data.version, entries=data.contents, storage_policy=policy
50
44
  )
51
45
 
52
- def __init__(
53
- self,
54
- storage_policy: StoragePolicy,
55
- entries: Mapping[str, ArtifactManifestEntry] | None = None,
56
- ) -> None:
57
- super().__init__(storage_policy, entries=entries)
58
-
59
46
  def to_manifest_json(self) -> dict:
60
47
  """This is the JSON that's stored in wandb_manifest.json.
61
48
 
@@ -64,31 +51,26 @@ class ArtifactManifestV1(ArtifactManifest):
64
51
  system. We don't need to include the local paths in the artifact manifest
65
52
  contents.
66
53
  """
67
- contents = {}
68
- for name, entry in sorted(self.entries.items(), key=itemgetter(0)):
69
- json_entry: dict[str, Any] = {
70
- "digest": entry.digest,
71
- }
72
- if entry.birth_artifact_id:
73
- json_entry["birthArtifactID"] = entry.birth_artifact_id
74
- if entry.ref:
75
- json_entry["ref"] = entry.ref
76
- if entry.extra:
77
- json_entry["extra"] = entry.extra
78
- if entry.size is not None:
79
- json_entry["size"] = entry.size
80
- contents[name] = json_entry
54
+ omit_entry_fields = {"path", "local_path", "skip_cache"}
81
55
  return {
82
- "version": self.__class__.version(),
56
+ "version": self.manifest_version,
83
57
  "storagePolicy": self.storage_policy.name(),
84
- "storagePolicyConfig": self.storage_policy.config() or {},
85
- "contents": contents,
58
+ "storagePolicyConfig": self.storage_policy.config(),
59
+ "contents": {
60
+ path: entry.model_dump(exclude=omit_entry_fields, exclude_defaults=True)
61
+ for path, entry in self.entries.items()
62
+ },
86
63
  }
87
64
 
65
+ _DIGEST_HEADER: ClassVar[bytes] = b"wandb-artifact-manifest-v1\n"
66
+ """Encoded prefix/header for the ArtifactManifest digest."""
67
+
88
68
  def digest(self) -> HexMD5:
89
- hasher = _md5()
90
- hasher.update(b"wandb-artifact-manifest-v1\n")
69
+ hasher = _md5(self._DIGEST_HEADER)
91
70
  # sort by key (path)
92
- for name, entry in sorted(self.entries.items(), key=itemgetter(0)):
93
- hasher.update(f"{name}:{entry.digest}\n".encode())
94
- return HexMD5(hasher.hexdigest())
71
+ for path, entry in sorted(self.entries.items(), key=itemgetter(0)):
72
+ hasher.update(f"{path}:{entry.digest}\n".encode())
73
+ return hasher.hexdigest()
74
+
75
+ def size(self) -> int:
76
+ return sum(entry.size for entry in self.entries.values() if entry.size)
@@ -79,6 +79,7 @@ class GCSHandler(StorageHandler):
79
79
  bucket, key, _ = self._parse_uri(manifest_entry.ref)
80
80
  version = manifest_entry.extra.get("versionID")
81
81
 
82
+ # Skip downloading an entry that corresponds to a folder
82
83
  if self._is_dir(manifest_entry):
83
84
  raise _GCSIsADirectoryError(
84
85
  f"Unable to download GCS folder {manifest_entry.ref!r}, skipping"
@@ -132,7 +133,8 @@ class GCSHandler(StorageHandler):
132
133
  obj = self._client.bucket(bucket).get_blob(key, generation=version)
133
134
  if obj is None and version is not None:
134
135
  raise ValueError(f"Object does not exist: {path}#{version}")
135
- multi = obj is None
136
+ # HNS buckets have blobs for directories, so we also check the blob name to see if its a directory
137
+ multi = obj is None or obj.name.endswith("/")
136
138
  if multi:
137
139
  start_time = time.monotonic()
138
140
  termlog(
@@ -215,11 +217,11 @@ class GCSHandler(StorageHandler):
215
217
  assert manifest_entry.ref is not None
216
218
  bucket, key, _ = self._parse_uri(manifest_entry.ref)
217
219
  bucket_obj = self._client.bucket(bucket)
218
- # A gcs bucket key should end with a forward slash on gcloud, but
219
- # we save these refs without the forward slash in the manifest entry
220
- # so we check the size and extension, make sure its not referring to
221
- # an actual file with this reference, and that the ref with the slash
222
- # exists on gcloud
220
+ # A gcs folder key should end with a forward slash on gcloud, but
221
+ # we previously saved these refs without the forward slash in the manifest entry
222
+ # To check whether the entry corresponds to a folder, we check the size and extension,
223
+ # make sure there is no file with this reference, and that the ref with the slash
224
+ # exists on gcloud as a folder
223
225
  return key.endswith("/") or (
224
226
  not (manifest_entry.size or PurePosixPath(key).suffix)
225
227
  and bucket_obj.get_blob(key) is None
@@ -182,8 +182,8 @@ class Image(BatchableMedia):
182
182
  unless `normalize` is set to `False`.
183
183
  - pytorch tensor should be in the format (channel, height, width)
184
184
  - NumPy array should be in the format (height, width, channel)
185
- mode: The PIL mode for an image. Most common are "L", "RGB",
186
- "RGBA". Full explanation at https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes
185
+ mode: The PIL mode for an image. Most common are "L", "RGB", "RGBA".
186
+ Full Pillow docs for more information https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes
187
187
  caption: Label for display of image.
188
188
  grouping: The grouping number for the image.
189
189
  classes: A list of class information for the image,